Commit 1b383bf9 authored by Pablo Neira Ayuso's avatar Pablo Neira Ayuso

Merge tag 'ipvs2-for-v4.3' of https://git.kernel.org/pub/scm/linux/kernel/git/horms/ipvs-next

Simon Horman says:

====================
Second Round of IPVS Updates for v4.3

I realise these are a little late in the cycle, so if you would prefer
me to repost them for v4.4 then just let me know.

The updates include:
* A new scheduler from Raducu Deaconu
* Enhanced configurability of the sync daemon from Julian Anastasov
====================
Signed-off-by: default avatarPablo Neira Ayuso <pablo@netfilter.org>
parents 1afe839e d3328817
...@@ -846,6 +846,17 @@ struct ipvs_master_sync_state { ...@@ -846,6 +846,17 @@ struct ipvs_master_sync_state {
/* How much time to keep dests in trash */ /* How much time to keep dests in trash */
#define IP_VS_DEST_TRASH_PERIOD (120 * HZ) #define IP_VS_DEST_TRASH_PERIOD (120 * HZ)
struct ipvs_sync_daemon_cfg {
union nf_inet_addr mcast_group;
int syncid;
u16 sync_maxlen;
u16 mcast_port;
u8 mcast_af;
u8 mcast_ttl;
/* multicast interface name */
char mcast_ifn[IP_VS_IFNAME_MAXLEN];
};
/* IPVS in network namespace */ /* IPVS in network namespace */
struct netns_ipvs { struct netns_ipvs {
int gen; /* Generation */ int gen; /* Generation */
...@@ -961,15 +972,10 @@ struct netns_ipvs { ...@@ -961,15 +972,10 @@ struct netns_ipvs {
spinlock_t sync_buff_lock; spinlock_t sync_buff_lock;
struct task_struct **backup_threads; struct task_struct **backup_threads;
int threads_mask; int threads_mask;
int send_mesg_maxlen;
int recv_mesg_maxlen;
volatile int sync_state; volatile int sync_state;
volatile int master_syncid;
volatile int backup_syncid;
struct mutex sync_mutex; struct mutex sync_mutex;
/* multicast interface name */ struct ipvs_sync_daemon_cfg mcfg; /* Master Configuration */
char master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; struct ipvs_sync_daemon_cfg bcfg; /* Backup Configuration */
char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
/* net name space ptr */ /* net name space ptr */
struct net *net; /* Needed by timer routines */ struct net *net; /* Needed by timer routines */
/* Number of heterogeneous destinations, needed becaus heterogeneous /* Number of heterogeneous destinations, needed becaus heterogeneous
...@@ -1408,7 +1414,8 @@ static inline void ip_vs_dest_put_and_free(struct ip_vs_dest *dest) ...@@ -1408,7 +1414,8 @@ static inline void ip_vs_dest_put_and_free(struct ip_vs_dest *dest)
/* IPVS sync daemon data and function prototypes /* IPVS sync daemon data and function prototypes
* (from ip_vs_sync.c) * (from ip_vs_sync.c)
*/ */
int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid); int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *cfg,
int state);
int stop_sync_thread(struct net *net, int state); int stop_sync_thread(struct net *net, int state);
void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts); void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts);
......
...@@ -406,6 +406,11 @@ enum { ...@@ -406,6 +406,11 @@ enum {
IPVS_DAEMON_ATTR_STATE, /* sync daemon state (master/backup) */ IPVS_DAEMON_ATTR_STATE, /* sync daemon state (master/backup) */
IPVS_DAEMON_ATTR_MCAST_IFN, /* multicast interface name */ IPVS_DAEMON_ATTR_MCAST_IFN, /* multicast interface name */
IPVS_DAEMON_ATTR_SYNC_ID, /* SyncID we belong to */ IPVS_DAEMON_ATTR_SYNC_ID, /* SyncID we belong to */
IPVS_DAEMON_ATTR_SYNC_MAXLEN, /* UDP Payload Size */
IPVS_DAEMON_ATTR_MCAST_GROUP, /* IPv4 Multicast Address */
IPVS_DAEMON_ATTR_MCAST_GROUP6, /* IPv6 Multicast Address */
IPVS_DAEMON_ATTR_MCAST_PORT, /* Multicast Port (base) */
IPVS_DAEMON_ATTR_MCAST_TTL, /* Multicast TTL */
__IPVS_DAEMON_ATTR_MAX, __IPVS_DAEMON_ATTR_MAX,
}; };
......
...@@ -162,6 +162,17 @@ config IP_VS_FO ...@@ -162,6 +162,17 @@ config IP_VS_FO
If you want to compile it in kernel, say Y. To compile it as a If you want to compile it in kernel, say Y. To compile it as a
module, choose M here. If unsure, say N. module, choose M here. If unsure, say N.
config IP_VS_OVF
tristate "weighted overflow scheduling"
---help---
The weighted overflow scheduling algorithm directs network
connections to the server with the highest weight that is
currently available and overflows to the next when active
connections exceed the node's weight.
If you want to compile it in kernel, say Y. To compile it as a
module, choose M here. If unsure, say N.
config IP_VS_LBLC config IP_VS_LBLC
tristate "locality-based least-connection scheduling" tristate "locality-based least-connection scheduling"
---help--- ---help---
......
...@@ -27,6 +27,7 @@ obj-$(CONFIG_IP_VS_WRR) += ip_vs_wrr.o ...@@ -27,6 +27,7 @@ obj-$(CONFIG_IP_VS_WRR) += ip_vs_wrr.o
obj-$(CONFIG_IP_VS_LC) += ip_vs_lc.o obj-$(CONFIG_IP_VS_LC) += ip_vs_lc.o
obj-$(CONFIG_IP_VS_WLC) += ip_vs_wlc.o obj-$(CONFIG_IP_VS_WLC) += ip_vs_wlc.o
obj-$(CONFIG_IP_VS_FO) += ip_vs_fo.o obj-$(CONFIG_IP_VS_FO) += ip_vs_fo.o
obj-$(CONFIG_IP_VS_OVF) += ip_vs_ovf.o
obj-$(CONFIG_IP_VS_LBLC) += ip_vs_lblc.o obj-$(CONFIG_IP_VS_LBLC) += ip_vs_lblc.o
obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o
obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o
......
...@@ -2335,13 +2335,23 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) ...@@ -2335,13 +2335,23 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
cmd == IP_VS_SO_SET_STOPDAEMON) { cmd == IP_VS_SO_SET_STOPDAEMON) {
struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
mutex_lock(&ipvs->sync_mutex); if (cmd == IP_VS_SO_SET_STARTDAEMON) {
if (cmd == IP_VS_SO_SET_STARTDAEMON) struct ipvs_sync_daemon_cfg cfg;
ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
dm->syncid); memset(&cfg, 0, sizeof(cfg));
else strlcpy(cfg.mcast_ifn, dm->mcast_ifn,
sizeof(cfg.mcast_ifn));
cfg.syncid = dm->syncid;
rtnl_lock();
mutex_lock(&ipvs->sync_mutex);
ret = start_sync_thread(net, &cfg, dm->state);
mutex_unlock(&ipvs->sync_mutex);
rtnl_unlock();
} else {
mutex_lock(&ipvs->sync_mutex);
ret = stop_sync_thread(net, dm->state); ret = stop_sync_thread(net, dm->state);
mutex_unlock(&ipvs->sync_mutex); mutex_unlock(&ipvs->sync_mutex);
}
goto out_dec; goto out_dec;
} }
...@@ -2645,15 +2655,15 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ...@@ -2645,15 +2655,15 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
mutex_lock(&ipvs->sync_mutex); mutex_lock(&ipvs->sync_mutex);
if (ipvs->sync_state & IP_VS_STATE_MASTER) { if (ipvs->sync_state & IP_VS_STATE_MASTER) {
d[0].state = IP_VS_STATE_MASTER; d[0].state = IP_VS_STATE_MASTER;
strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn, strlcpy(d[0].mcast_ifn, ipvs->mcfg.mcast_ifn,
sizeof(d[0].mcast_ifn)); sizeof(d[0].mcast_ifn));
d[0].syncid = ipvs->master_syncid; d[0].syncid = ipvs->mcfg.syncid;
} }
if (ipvs->sync_state & IP_VS_STATE_BACKUP) { if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
d[1].state = IP_VS_STATE_BACKUP; d[1].state = IP_VS_STATE_BACKUP;
strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn, strlcpy(d[1].mcast_ifn, ipvs->bcfg.mcast_ifn,
sizeof(d[1].mcast_ifn)); sizeof(d[1].mcast_ifn));
d[1].syncid = ipvs->backup_syncid; d[1].syncid = ipvs->bcfg.syncid;
} }
if (copy_to_user(user, &d, sizeof(d)) != 0) if (copy_to_user(user, &d, sizeof(d)) != 0)
ret = -EFAULT; ret = -EFAULT;
...@@ -2808,6 +2818,11 @@ static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = { ...@@ -2808,6 +2818,11 @@ static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
[IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING, [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
.len = IP_VS_IFNAME_MAXLEN }, .len = IP_VS_IFNAME_MAXLEN },
[IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 }, [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
[IPVS_DAEMON_ATTR_SYNC_MAXLEN] = { .type = NLA_U16 },
[IPVS_DAEMON_ATTR_MCAST_GROUP] = { .type = NLA_U32 },
[IPVS_DAEMON_ATTR_MCAST_GROUP6] = { .len = sizeof(struct in6_addr) },
[IPVS_DAEMON_ATTR_MCAST_PORT] = { .type = NLA_U16 },
[IPVS_DAEMON_ATTR_MCAST_TTL] = { .type = NLA_U8 },
}; };
/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */ /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
...@@ -3266,7 +3281,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, ...@@ -3266,7 +3281,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
} }
static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state, static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state,
const char *mcast_ifn, __u32 syncid) struct ipvs_sync_daemon_cfg *c)
{ {
struct nlattr *nl_daemon; struct nlattr *nl_daemon;
...@@ -3275,9 +3290,23 @@ static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state, ...@@ -3275,9 +3290,23 @@ static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state,
return -EMSGSIZE; return -EMSGSIZE;
if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) || if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) ||
nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn) || nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, c->mcast_ifn) ||
nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid)) nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, c->syncid) ||
nla_put_u16(skb, IPVS_DAEMON_ATTR_SYNC_MAXLEN, c->sync_maxlen) ||
nla_put_u16(skb, IPVS_DAEMON_ATTR_MCAST_PORT, c->mcast_port) ||
nla_put_u8(skb, IPVS_DAEMON_ATTR_MCAST_TTL, c->mcast_ttl))
goto nla_put_failure; goto nla_put_failure;
#ifdef CONFIG_IP_VS_IPV6
if (c->mcast_af == AF_INET6) {
if (nla_put_in6_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP6,
&c->mcast_group.in6))
goto nla_put_failure;
} else
#endif
if (c->mcast_af == AF_INET &&
nla_put_in_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP,
c->mcast_group.ip))
goto nla_put_failure;
nla_nest_end(skb, nl_daemon); nla_nest_end(skb, nl_daemon);
return 0; return 0;
...@@ -3288,7 +3317,7 @@ static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state, ...@@ -3288,7 +3317,7 @@ static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state,
} }
static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state, static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state,
const char *mcast_ifn, __u32 syncid, struct ipvs_sync_daemon_cfg *c,
struct netlink_callback *cb) struct netlink_callback *cb)
{ {
void *hdr; void *hdr;
...@@ -3298,7 +3327,7 @@ static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state, ...@@ -3298,7 +3327,7 @@ static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state,
if (!hdr) if (!hdr)
return -EMSGSIZE; return -EMSGSIZE;
if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid)) if (ip_vs_genl_fill_daemon(skb, state, c))
goto nla_put_failure; goto nla_put_failure;
genlmsg_end(skb, hdr); genlmsg_end(skb, hdr);
...@@ -3318,8 +3347,7 @@ static int ip_vs_genl_dump_daemons(struct sk_buff *skb, ...@@ -3318,8 +3347,7 @@ static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
mutex_lock(&ipvs->sync_mutex); mutex_lock(&ipvs->sync_mutex);
if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
ipvs->master_mcast_ifn, &ipvs->mcfg, cb) < 0)
ipvs->master_syncid, cb) < 0)
goto nla_put_failure; goto nla_put_failure;
cb->args[0] = 1; cb->args[0] = 1;
...@@ -3327,8 +3355,7 @@ static int ip_vs_genl_dump_daemons(struct sk_buff *skb, ...@@ -3327,8 +3355,7 @@ static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP, if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
ipvs->backup_mcast_ifn, &ipvs->bcfg, cb) < 0)
ipvs->backup_syncid, cb) < 0)
goto nla_put_failure; goto nla_put_failure;
cb->args[1] = 1; cb->args[1] = 1;
...@@ -3342,30 +3369,83 @@ static int ip_vs_genl_dump_daemons(struct sk_buff *skb, ...@@ -3342,30 +3369,83 @@ static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs) static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
{ {
struct netns_ipvs *ipvs = net_ipvs(net);
struct ipvs_sync_daemon_cfg c;
struct nlattr *a;
int ret;
memset(&c, 0, sizeof(c));
if (!(attrs[IPVS_DAEMON_ATTR_STATE] && if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
attrs[IPVS_DAEMON_ATTR_MCAST_IFN] && attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
attrs[IPVS_DAEMON_ATTR_SYNC_ID])) attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
return -EINVAL; return -EINVAL;
strlcpy(c.mcast_ifn, nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
sizeof(c.mcast_ifn));
c.syncid = nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]);
a = attrs[IPVS_DAEMON_ATTR_SYNC_MAXLEN];
if (a)
c.sync_maxlen = nla_get_u16(a);
a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP];
if (a) {
c.mcast_af = AF_INET;
c.mcast_group.ip = nla_get_in_addr(a);
if (!ipv4_is_multicast(c.mcast_group.ip))
return -EINVAL;
} else {
a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP6];
if (a) {
#ifdef CONFIG_IP_VS_IPV6
int addr_type;
c.mcast_af = AF_INET6;
c.mcast_group.in6 = nla_get_in6_addr(a);
addr_type = ipv6_addr_type(&c.mcast_group.in6);
if (!(addr_type & IPV6_ADDR_MULTICAST))
return -EINVAL;
#else
return -EAFNOSUPPORT;
#endif
}
}
a = attrs[IPVS_DAEMON_ATTR_MCAST_PORT];
if (a)
c.mcast_port = nla_get_u16(a);
a = attrs[IPVS_DAEMON_ATTR_MCAST_TTL];
if (a)
c.mcast_ttl = nla_get_u8(a);
/* The synchronization protocol is incompatible with mixed family /* The synchronization protocol is incompatible with mixed family
* services * services
*/ */
if (net_ipvs(net)->mixed_address_family_dests > 0) if (ipvs->mixed_address_family_dests > 0)
return -EINVAL; return -EINVAL;
return start_sync_thread(net, rtnl_lock();
nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]), mutex_lock(&ipvs->sync_mutex);
nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), ret = start_sync_thread(net, &c,
nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID])); nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
mutex_unlock(&ipvs->sync_mutex);
rtnl_unlock();
return ret;
} }
static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs) static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
{ {
struct netns_ipvs *ipvs = net_ipvs(net);
int ret;
if (!attrs[IPVS_DAEMON_ATTR_STATE]) if (!attrs[IPVS_DAEMON_ATTR_STATE])
return -EINVAL; return -EINVAL;
return stop_sync_thread(net, mutex_lock(&ipvs->sync_mutex);
nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); ret = stop_sync_thread(net,
nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
mutex_unlock(&ipvs->sync_mutex);
return ret;
} }
static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs) static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
...@@ -3389,7 +3469,7 @@ static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs) ...@@ -3389,7 +3469,7 @@ static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info) static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
{ {
int ret = 0, cmd; int ret = -EINVAL, cmd;
struct net *net; struct net *net;
struct netns_ipvs *ipvs; struct netns_ipvs *ipvs;
...@@ -3400,22 +3480,19 @@ static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info) ...@@ -3400,22 +3480,19 @@ static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) { if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1]; struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
mutex_lock(&ipvs->sync_mutex);
if (!info->attrs[IPVS_CMD_ATTR_DAEMON] || if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX, nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
info->attrs[IPVS_CMD_ATTR_DAEMON], info->attrs[IPVS_CMD_ATTR_DAEMON],
ip_vs_daemon_policy)) { ip_vs_daemon_policy))
ret = -EINVAL;
goto out; goto out;
}
if (cmd == IPVS_CMD_NEW_DAEMON) if (cmd == IPVS_CMD_NEW_DAEMON)
ret = ip_vs_genl_new_daemon(net, daemon_attrs); ret = ip_vs_genl_new_daemon(net, daemon_attrs);
else else
ret = ip_vs_genl_del_daemon(net, daemon_attrs); ret = ip_vs_genl_del_daemon(net, daemon_attrs);
out:
mutex_unlock(&ipvs->sync_mutex);
} }
out:
return ret; return ret;
} }
......
/*
* IPVS: Overflow-Connection Scheduling module
*
* Authors: Raducu Deaconu <rhadoo_io@yahoo.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Scheduler implements "overflow" loadbalancing according to number of active
* connections , will keep all conections to the node with the highest weight
* and overflow to the next node if the number of connections exceeds the node's
* weight.
* Note that this scheduler might not be suitable for UDP because it only uses
* active connections
*
*/
#define KMSG_COMPONENT "IPVS"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/module.h>
#include <linux/kernel.h>
#include <net/ip_vs.h>
/* OVF Connection scheduling */
static struct ip_vs_dest *
ip_vs_ovf_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
struct ip_vs_iphdr *iph)
{
struct ip_vs_dest *dest, *h = NULL;
int hw = 0, w;
IP_VS_DBG(6, "ip_vs_ovf_schedule(): Scheduling...\n");
/* select the node with highest weight, go to next in line if active
* connections exceed weight
*/
list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
w = atomic_read(&dest->weight);
if ((dest->flags & IP_VS_DEST_F_OVERLOAD) ||
atomic_read(&dest->activeconns) > w ||
w == 0)
continue;
if (!h || w > hw) {
h = dest;
hw = w;
}
}
if (h) {
IP_VS_DBG_BUF(6, "OVF: server %s:%u active %d w %d\n",
IP_VS_DBG_ADDR(h->af, &h->addr),
ntohs(h->port),
atomic_read(&h->activeconns),
atomic_read(&h->weight));
return h;
}
ip_vs_scheduler_err(svc, "no destination available");
return NULL;
}
static struct ip_vs_scheduler ip_vs_ovf_scheduler = {
.name = "ovf",
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_ovf_scheduler.n_list),
.schedule = ip_vs_ovf_schedule,
};
static int __init ip_vs_ovf_init(void)
{
return register_ip_vs_scheduler(&ip_vs_ovf_scheduler);
}
static void __exit ip_vs_ovf_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_ovf_scheduler);
synchronize_rcu();
}
module_init(ip_vs_ovf_init);
module_exit(ip_vs_ovf_cleanup);
MODULE_LICENSE("GPL");
...@@ -262,6 +262,11 @@ struct ip_vs_sync_mesg { ...@@ -262,6 +262,11 @@ struct ip_vs_sync_mesg {
/* ip_vs_sync_conn entries start here */ /* ip_vs_sync_conn entries start here */
}; };
union ipvs_sockaddr {
struct sockaddr_in in;
struct sockaddr_in6 in6;
};
struct ip_vs_sync_buff { struct ip_vs_sync_buff {
struct list_head list; struct list_head list;
unsigned long firstuse; unsigned long firstuse;
...@@ -320,26 +325,28 @@ sb_dequeue(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms) ...@@ -320,26 +325,28 @@ sb_dequeue(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
* Create a new sync buffer for Version 1 proto. * Create a new sync buffer for Version 1 proto.
*/ */
static inline struct ip_vs_sync_buff * static inline struct ip_vs_sync_buff *
ip_vs_sync_buff_create(struct netns_ipvs *ipvs) ip_vs_sync_buff_create(struct netns_ipvs *ipvs, unsigned int len)
{ {
struct ip_vs_sync_buff *sb; struct ip_vs_sync_buff *sb;
if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
return NULL; return NULL;
sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC); len = max_t(unsigned int, len + sizeof(struct ip_vs_sync_mesg),
ipvs->mcfg.sync_maxlen);
sb->mesg = kmalloc(len, GFP_ATOMIC);
if (!sb->mesg) { if (!sb->mesg) {
kfree(sb); kfree(sb);
return NULL; return NULL;
} }
sb->mesg->reserved = 0; /* old nr_conns i.e. must be zero now */ sb->mesg->reserved = 0; /* old nr_conns i.e. must be zero now */
sb->mesg->version = SYNC_PROTO_VER; sb->mesg->version = SYNC_PROTO_VER;
sb->mesg->syncid = ipvs->master_syncid; sb->mesg->syncid = ipvs->mcfg.syncid;
sb->mesg->size = htons(sizeof(struct ip_vs_sync_mesg)); sb->mesg->size = htons(sizeof(struct ip_vs_sync_mesg));
sb->mesg->nr_conns = 0; sb->mesg->nr_conns = 0;
sb->mesg->spare = 0; sb->mesg->spare = 0;
sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg); sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen; sb->end = (unsigned char *)sb->mesg + len;
sb->firstuse = jiffies; sb->firstuse = jiffies;
return sb; return sb;
...@@ -402,7 +409,7 @@ select_master_thread_id(struct netns_ipvs *ipvs, struct ip_vs_conn *cp) ...@@ -402,7 +409,7 @@ select_master_thread_id(struct netns_ipvs *ipvs, struct ip_vs_conn *cp)
* Create a new sync buffer for Version 0 proto. * Create a new sync buffer for Version 0 proto.
*/ */
static inline struct ip_vs_sync_buff * static inline struct ip_vs_sync_buff *
ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs) ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs, unsigned int len)
{ {
struct ip_vs_sync_buff *sb; struct ip_vs_sync_buff *sb;
struct ip_vs_sync_mesg_v0 *mesg; struct ip_vs_sync_mesg_v0 *mesg;
...@@ -410,17 +417,19 @@ ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs) ...@@ -410,17 +417,19 @@ ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
return NULL; return NULL;
sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC); len = max_t(unsigned int, len + sizeof(struct ip_vs_sync_mesg_v0),
ipvs->mcfg.sync_maxlen);
sb->mesg = kmalloc(len, GFP_ATOMIC);
if (!sb->mesg) { if (!sb->mesg) {
kfree(sb); kfree(sb);
return NULL; return NULL;
} }
mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg; mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
mesg->nr_conns = 0; mesg->nr_conns = 0;
mesg->syncid = ipvs->master_syncid; mesg->syncid = ipvs->mcfg.syncid;
mesg->size = htons(sizeof(struct ip_vs_sync_mesg_v0)); mesg->size = htons(sizeof(struct ip_vs_sync_mesg_v0));
sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0); sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);
sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen; sb->end = (unsigned char *)mesg + len;
sb->firstuse = jiffies; sb->firstuse = jiffies;
return sb; return sb;
} }
...@@ -533,7 +542,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, ...@@ -533,7 +542,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
struct ip_vs_sync_buff *buff; struct ip_vs_sync_buff *buff;
struct ipvs_master_sync_state *ms; struct ipvs_master_sync_state *ms;
int id; int id;
int len; unsigned int len;
if (unlikely(cp->af != AF_INET)) if (unlikely(cp->af != AF_INET))
return; return;
...@@ -553,17 +562,19 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, ...@@ -553,17 +562,19 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
id = select_master_thread_id(ipvs, cp); id = select_master_thread_id(ipvs, cp);
ms = &ipvs->ms[id]; ms = &ipvs->ms[id];
buff = ms->sync_buff; buff = ms->sync_buff;
len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
SIMPLE_CONN_SIZE;
if (buff) { if (buff) {
m = (struct ip_vs_sync_mesg_v0 *) buff->mesg; m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
/* Send buffer if it is for v1 */ /* Send buffer if it is for v1 */
if (!m->nr_conns) { if (buff->head + len > buff->end || !m->nr_conns) {
sb_queue_tail(ipvs, ms); sb_queue_tail(ipvs, ms);
ms->sync_buff = NULL; ms->sync_buff = NULL;
buff = NULL; buff = NULL;
} }
} }
if (!buff) { if (!buff) {
buff = ip_vs_sync_buff_create_v0(ipvs); buff = ip_vs_sync_buff_create_v0(ipvs, len);
if (!buff) { if (!buff) {
spin_unlock_bh(&ipvs->sync_buff_lock); spin_unlock_bh(&ipvs->sync_buff_lock);
pr_err("ip_vs_sync_buff_create failed.\n"); pr_err("ip_vs_sync_buff_create failed.\n");
...@@ -572,8 +583,6 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, ...@@ -572,8 +583,6 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
ms->sync_buff = buff; ms->sync_buff = buff;
} }
len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
SIMPLE_CONN_SIZE;
m = (struct ip_vs_sync_mesg_v0 *) buff->mesg; m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
s = (struct ip_vs_sync_conn_v0 *) buff->head; s = (struct ip_vs_sync_conn_v0 *) buff->head;
...@@ -597,12 +606,6 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, ...@@ -597,12 +606,6 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
m->nr_conns++; m->nr_conns++;
m->size = htons(ntohs(m->size) + len); m->size = htons(ntohs(m->size) + len);
buff->head += len; buff->head += len;
/* check if there is a space for next one */
if (buff->head + FULL_CONN_SIZE > buff->end) {
sb_queue_tail(ipvs, ms);
ms->sync_buff = NULL;
}
spin_unlock_bh(&ipvs->sync_buff_lock); spin_unlock_bh(&ipvs->sync_buff_lock);
/* synchronize its controller if it has */ /* synchronize its controller if it has */
...@@ -694,7 +697,7 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts) ...@@ -694,7 +697,7 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts)
} }
if (!buff) { if (!buff) {
buff = ip_vs_sync_buff_create(ipvs); buff = ip_vs_sync_buff_create(ipvs, len);
if (!buff) { if (!buff) {
spin_unlock_bh(&ipvs->sync_buff_lock); spin_unlock_bh(&ipvs->sync_buff_lock);
pr_err("ip_vs_sync_buff_create failed.\n"); pr_err("ip_vs_sync_buff_create failed.\n");
...@@ -1219,7 +1222,7 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer, ...@@ -1219,7 +1222,7 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer,
return; return;
} }
/* SyncID sanity check */ /* SyncID sanity check */
if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) { if (ipvs->bcfg.syncid != 0 && m2->syncid != ipvs->bcfg.syncid) {
IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid); IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);
return; return;
} }
...@@ -1303,6 +1306,14 @@ static void set_mcast_loop(struct sock *sk, u_char loop) ...@@ -1303,6 +1306,14 @@ static void set_mcast_loop(struct sock *sk, u_char loop)
/* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */ /* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */
lock_sock(sk); lock_sock(sk);
inet->mc_loop = loop ? 1 : 0; inet->mc_loop = loop ? 1 : 0;
#ifdef CONFIG_IP_VS_IPV6
if (sk->sk_family == AF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
/* IPV6_MULTICAST_LOOP */
np->mc_loop = loop ? 1 : 0;
}
#endif
release_sock(sk); release_sock(sk);
} }
...@@ -1316,6 +1327,33 @@ static void set_mcast_ttl(struct sock *sk, u_char ttl) ...@@ -1316,6 +1327,33 @@ static void set_mcast_ttl(struct sock *sk, u_char ttl)
/* setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); */ /* setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); */
lock_sock(sk); lock_sock(sk);
inet->mc_ttl = ttl; inet->mc_ttl = ttl;
#ifdef CONFIG_IP_VS_IPV6
if (sk->sk_family == AF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
/* IPV6_MULTICAST_HOPS */
np->mcast_hops = ttl;
}
#endif
release_sock(sk);
}
/* Control fragmentation of messages */
static void set_mcast_pmtudisc(struct sock *sk, int val)
{
struct inet_sock *inet = inet_sk(sk);
/* setsockopt(sock, SOL_IP, IP_MTU_DISCOVER, &val, sizeof(val)); */
lock_sock(sk);
inet->pmtudisc = val;
#ifdef CONFIG_IP_VS_IPV6
if (sk->sk_family == AF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
/* IPV6_MTU_DISCOVER */
np->pmtudisc = val;
}
#endif
release_sock(sk); release_sock(sk);
} }
...@@ -1338,44 +1376,15 @@ static int set_mcast_if(struct sock *sk, char *ifname) ...@@ -1338,44 +1376,15 @@ static int set_mcast_if(struct sock *sk, char *ifname)
lock_sock(sk); lock_sock(sk);
inet->mc_index = dev->ifindex; inet->mc_index = dev->ifindex;
/* inet->mc_addr = 0; */ /* inet->mc_addr = 0; */
release_sock(sk); #ifdef CONFIG_IP_VS_IPV6
if (sk->sk_family == AF_INET6) {
return 0; struct ipv6_pinfo *np = inet6_sk(sk);
}
/* /* IPV6_MULTICAST_IF */
* Set the maximum length of sync message according to the np->mcast_oif = dev->ifindex;
* specified interface's MTU.
*/
static int set_sync_mesg_maxlen(struct net *net, int sync_state)
{
struct netns_ipvs *ipvs = net_ipvs(net);
struct net_device *dev;
int num;
if (sync_state == IP_VS_STATE_MASTER) {
dev = __dev_get_by_name(net, ipvs->master_mcast_ifn);
if (!dev)
return -ENODEV;
num = (dev->mtu - sizeof(struct iphdr) -
sizeof(struct udphdr) -
SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF);
IP_VS_DBG(7, "setting the maximum length of sync sending "
"message %d.\n", ipvs->send_mesg_maxlen);
} else if (sync_state == IP_VS_STATE_BACKUP) {
dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn);
if (!dev)
return -ENODEV;
ipvs->recv_mesg_maxlen = dev->mtu -
sizeof(struct iphdr) - sizeof(struct udphdr);
IP_VS_DBG(7, "setting the maximum length of sync receiving "
"message %d.\n", ipvs->recv_mesg_maxlen);
} }
#endif
release_sock(sk);
return 0; return 0;
} }
...@@ -1405,15 +1414,34 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) ...@@ -1405,15 +1414,34 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
mreq.imr_ifindex = dev->ifindex; mreq.imr_ifindex = dev->ifindex;
rtnl_lock();
lock_sock(sk); lock_sock(sk);
ret = ip_mc_join_group(sk, &mreq); ret = ip_mc_join_group(sk, &mreq);
release_sock(sk); release_sock(sk);
rtnl_unlock();
return ret; return ret;
} }
#ifdef CONFIG_IP_VS_IPV6
static int join_mcast_group6(struct sock *sk, struct in6_addr *addr,
char *ifname)
{
struct net *net = sock_net(sk);
struct net_device *dev;
int ret;
dev = __dev_get_by_name(net, ifname);
if (!dev)
return -ENODEV;
if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
return -EINVAL;
lock_sock(sk);
ret = ipv6_sock_mc_join(sk, dev->ifindex, addr);
release_sock(sk);
return ret;
}
#endif
static int bind_mcastif_addr(struct socket *sock, char *ifname) static int bind_mcastif_addr(struct socket *sock, char *ifname)
{ {
...@@ -1442,6 +1470,26 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname) ...@@ -1442,6 +1470,26 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname)
return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin)); return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin));
} }
static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen,
struct ipvs_sync_daemon_cfg *c, int id)
{
if (AF_INET6 == c->mcast_af) {
sa->in6 = (struct sockaddr_in6) {
.sin6_family = AF_INET6,
.sin6_port = htons(c->mcast_port + id),
};
sa->in6.sin6_addr = c->mcast_group.in6;
*salen = sizeof(sa->in6);
} else {
sa->in = (struct sockaddr_in) {
.sin_family = AF_INET,
.sin_port = htons(c->mcast_port + id),
};
sa->in.sin_addr = c->mcast_group.in;
*salen = sizeof(sa->in);
}
}
/* /*
* Set up sending multicast socket over UDP * Set up sending multicast socket over UDP
*/ */
...@@ -1449,40 +1497,43 @@ static struct socket *make_send_sock(struct net *net, int id) ...@@ -1449,40 +1497,43 @@ static struct socket *make_send_sock(struct net *net, int id)
{ {
struct netns_ipvs *ipvs = net_ipvs(net); struct netns_ipvs *ipvs = net_ipvs(net);
/* multicast addr */ /* multicast addr */
struct sockaddr_in mcast_addr = { union ipvs_sockaddr mcast_addr;
.sin_family = AF_INET,
.sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id),
.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
};
struct socket *sock; struct socket *sock;
int result; int result, salen;
/* First create a socket */ /* First create a socket */
result = sock_create_kern(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); result = sock_create_kern(net, ipvs->mcfg.mcast_af, SOCK_DGRAM,
IPPROTO_UDP, &sock);
if (result < 0) { if (result < 0) {
pr_err("Error during creation of socket; terminating\n"); pr_err("Error during creation of socket; terminating\n");
return ERR_PTR(result); return ERR_PTR(result);
} }
result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn); result = set_mcast_if(sock->sk, ipvs->mcfg.mcast_ifn);
if (result < 0) { if (result < 0) {
pr_err("Error setting outbound mcast interface\n"); pr_err("Error setting outbound mcast interface\n");
goto error; goto error;
} }
set_mcast_loop(sock->sk, 0); set_mcast_loop(sock->sk, 0);
set_mcast_ttl(sock->sk, 1); set_mcast_ttl(sock->sk, ipvs->mcfg.mcast_ttl);
/* Allow fragmentation if MTU changes */
set_mcast_pmtudisc(sock->sk, IP_PMTUDISC_DONT);
result = sysctl_sync_sock_size(ipvs); result = sysctl_sync_sock_size(ipvs);
if (result > 0) if (result > 0)
set_sock_size(sock->sk, 1, result); set_sock_size(sock->sk, 1, result);
result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn); if (AF_INET == ipvs->mcfg.mcast_af)
result = bind_mcastif_addr(sock, ipvs->mcfg.mcast_ifn);
else
result = 0;
if (result < 0) { if (result < 0) {
pr_err("Error binding address of the mcast interface\n"); pr_err("Error binding address of the mcast interface\n");
goto error; goto error;
} }
get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->mcfg, id);
result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr, result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
sizeof(struct sockaddr), 0); salen, 0);
if (result < 0) { if (result < 0) {
pr_err("Error connecting to the multicast addr\n"); pr_err("Error connecting to the multicast addr\n");
goto error; goto error;
...@@ -1503,16 +1554,13 @@ static struct socket *make_receive_sock(struct net *net, int id) ...@@ -1503,16 +1554,13 @@ static struct socket *make_receive_sock(struct net *net, int id)
{ {
struct netns_ipvs *ipvs = net_ipvs(net); struct netns_ipvs *ipvs = net_ipvs(net);
/* multicast addr */ /* multicast addr */
struct sockaddr_in mcast_addr = { union ipvs_sockaddr mcast_addr;
.sin_family = AF_INET,
.sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id),
.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
};
struct socket *sock; struct socket *sock;
int result; int result, salen;
/* First create a socket */ /* First create a socket */
result = sock_create_kern(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); result = sock_create_kern(net, ipvs->bcfg.mcast_af, SOCK_DGRAM,
IPPROTO_UDP, &sock);
if (result < 0) { if (result < 0) {
pr_err("Error during creation of socket; terminating\n"); pr_err("Error during creation of socket; terminating\n");
return ERR_PTR(result); return ERR_PTR(result);
...@@ -1523,17 +1571,22 @@ static struct socket *make_receive_sock(struct net *net, int id) ...@@ -1523,17 +1571,22 @@ static struct socket *make_receive_sock(struct net *net, int id)
if (result > 0) if (result > 0)
set_sock_size(sock->sk, 0, result); set_sock_size(sock->sk, 0, result);
result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr, get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id);
sizeof(struct sockaddr)); result = sock->ops->bind(sock, (struct sockaddr *)&mcast_addr, salen);
if (result < 0) { if (result < 0) {
pr_err("Error binding to the multicast addr\n"); pr_err("Error binding to the multicast addr\n");
goto error; goto error;
} }
/* join the multicast group */ /* join the multicast group */
result = join_mcast_group(sock->sk, #ifdef CONFIG_IP_VS_IPV6
(struct in_addr *) &mcast_addr.sin_addr, if (ipvs->bcfg.mcast_af == AF_INET6)
ipvs->backup_mcast_ifn); result = join_mcast_group6(sock->sk, &mcast_addr.in6.sin6_addr,
ipvs->bcfg.mcast_ifn);
else
#endif
result = join_mcast_group(sock->sk, &mcast_addr.in.sin_addr,
ipvs->bcfg.mcast_ifn);
if (result < 0) { if (result < 0) {
pr_err("Error joining to the multicast group\n"); pr_err("Error joining to the multicast group\n");
goto error; goto error;
...@@ -1641,7 +1694,7 @@ static int sync_thread_master(void *data) ...@@ -1641,7 +1694,7 @@ static int sync_thread_master(void *data)
pr_info("sync thread started: state = MASTER, mcast_ifn = %s, " pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
"syncid = %d, id = %d\n", "syncid = %d, id = %d\n",
ipvs->master_mcast_ifn, ipvs->master_syncid, tinfo->id); ipvs->mcfg.mcast_ifn, ipvs->mcfg.syncid, tinfo->id);
for (;;) { for (;;) {
sb = next_sync_buff(ipvs, ms); sb = next_sync_buff(ipvs, ms);
...@@ -1695,7 +1748,7 @@ static int sync_thread_backup(void *data) ...@@ -1695,7 +1748,7 @@ static int sync_thread_backup(void *data)
pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, " pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
"syncid = %d, id = %d\n", "syncid = %d, id = %d\n",
ipvs->backup_mcast_ifn, ipvs->backup_syncid, tinfo->id); ipvs->bcfg.mcast_ifn, ipvs->bcfg.syncid, tinfo->id);
while (!kthread_should_stop()) { while (!kthread_should_stop()) {
wait_event_interruptible(*sk_sleep(tinfo->sock->sk), wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
...@@ -1705,7 +1758,7 @@ static int sync_thread_backup(void *data) ...@@ -1705,7 +1758,7 @@ static int sync_thread_backup(void *data)
/* do we have data now? */ /* do we have data now? */
while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) { while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
len = ip_vs_receive(tinfo->sock, tinfo->buf, len = ip_vs_receive(tinfo->sock, tinfo->buf,
ipvs->recv_mesg_maxlen); ipvs->bcfg.sync_maxlen);
if (len <= 0) { if (len <= 0) {
if (len != -EAGAIN) if (len != -EAGAIN)
pr_err("receiving message error\n"); pr_err("receiving message error\n");
...@@ -1725,16 +1778,19 @@ static int sync_thread_backup(void *data) ...@@ -1725,16 +1778,19 @@ static int sync_thread_backup(void *data)
} }
int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *c,
int state)
{ {
struct ip_vs_sync_thread_data *tinfo; struct ip_vs_sync_thread_data *tinfo;
struct task_struct **array = NULL, *task; struct task_struct **array = NULL, *task;
struct socket *sock; struct socket *sock;
struct netns_ipvs *ipvs = net_ipvs(net); struct netns_ipvs *ipvs = net_ipvs(net);
struct net_device *dev;
char *name; char *name;
int (*threadfn)(void *data); int (*threadfn)(void *data);
int id, count; int id, count, hlen;
int result = -ENOMEM; int result = -ENOMEM;
u16 mtu, min_mtu;
IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n", IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
...@@ -1746,22 +1802,46 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) ...@@ -1746,22 +1802,46 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
} else } else
count = ipvs->threads_mask + 1; count = ipvs->threads_mask + 1;
if (c->mcast_af == AF_UNSPEC) {
c->mcast_af = AF_INET;
c->mcast_group.ip = cpu_to_be32(IP_VS_SYNC_GROUP);
}
if (!c->mcast_port)
c->mcast_port = IP_VS_SYNC_PORT;
if (!c->mcast_ttl)
c->mcast_ttl = 1;
dev = __dev_get_by_name(net, c->mcast_ifn);
if (!dev) {
pr_err("Unknown mcast interface: %s\n", c->mcast_ifn);
return -ENODEV;
}
hlen = (AF_INET6 == c->mcast_af) ?
sizeof(struct ipv6hdr) + sizeof(struct udphdr) :
sizeof(struct iphdr) + sizeof(struct udphdr);
mtu = (state == IP_VS_STATE_BACKUP) ?
clamp(dev->mtu, 1500U, 65535U) : 1500U;
min_mtu = (state == IP_VS_STATE_BACKUP) ? 1024 : 1;
if (c->sync_maxlen)
c->sync_maxlen = clamp_t(unsigned int,
c->sync_maxlen, min_mtu,
65535 - hlen);
else
c->sync_maxlen = mtu - hlen;
if (state == IP_VS_STATE_MASTER) { if (state == IP_VS_STATE_MASTER) {
if (ipvs->ms) if (ipvs->ms)
return -EEXIST; return -EEXIST;
strlcpy(ipvs->master_mcast_ifn, mcast_ifn, ipvs->mcfg = *c;
sizeof(ipvs->master_mcast_ifn));
ipvs->master_syncid = syncid;
name = "ipvs-m:%d:%d"; name = "ipvs-m:%d:%d";
threadfn = sync_thread_master; threadfn = sync_thread_master;
} else if (state == IP_VS_STATE_BACKUP) { } else if (state == IP_VS_STATE_BACKUP) {
if (ipvs->backup_threads) if (ipvs->backup_threads)
return -EEXIST; return -EEXIST;
strlcpy(ipvs->backup_mcast_ifn, mcast_ifn, ipvs->bcfg = *c;
sizeof(ipvs->backup_mcast_ifn));
ipvs->backup_syncid = syncid;
name = "ipvs-b:%d:%d"; name = "ipvs-b:%d:%d";
threadfn = sync_thread_backup; threadfn = sync_thread_backup;
} else { } else {
...@@ -1789,7 +1869,6 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) ...@@ -1789,7 +1869,6 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
if (!array) if (!array)
goto out; goto out;
} }
set_sync_mesg_maxlen(net, state);
tinfo = NULL; tinfo = NULL;
for (id = 0; id < count; id++) { for (id = 0; id < count; id++) {
...@@ -1807,7 +1886,7 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) ...@@ -1807,7 +1886,7 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
tinfo->net = net; tinfo->net = net;
tinfo->sock = sock; tinfo->sock = sock;
if (state == IP_VS_STATE_BACKUP) { if (state == IP_VS_STATE_BACKUP) {
tinfo->buf = kmalloc(ipvs->recv_mesg_maxlen, tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen,
GFP_KERNEL); GFP_KERNEL);
if (!tinfo->buf) if (!tinfo->buf)
goto outtinfo; goto outtinfo;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment