Commit 581a5f2a authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following patchset contains Netfilter/IPVS updates for your net-next tree.
In sum, patches to address fallout from the previous round plus updates from
the IPVS folks via Simon Horman, they are:

1) Add a new scheduler to IPVS: The weighted overflow scheduling algorithm
   directs network connections to the server with the highest weight that is
   currently available and overflows to the next when active connections exceed
   the node's weight. From Raducu Deaconu.

2) Fix locking ordering in IPVS, always take rtnl_lock in first place. Patch
   from Julian Anastasov.

3) Allow to indicate the MTU to the IPVS in-kernel state sync daemon. From
   Julian Anastasov.

4) Enhance multicast configuration for the IPVS state sync daemon. Also from
   Julian.

5) Resolve sparse warnings in the nf_dup modules.

6) Fix a linking problem when CONFIG_NF_DUP_IPV6 is not set.

7) Add ICMP codes 5 and 6 to IPv6 REJECT target, they are more informative
   subsets of code 1. From Andreas Herz.

8) Revert the jumpstack size calculation from mark_source_chains due to chain
   depth miscalculations, from Florian Westphal.

9) Calm down more sparse warning around the Netfilter tree, again from Florian
   Westphal.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents cc7acad1 851345c5
...@@ -846,6 +846,17 @@ struct ipvs_master_sync_state { ...@@ -846,6 +846,17 @@ struct ipvs_master_sync_state {
/* How much time to keep dests in trash */ /* How much time to keep dests in trash */
#define IP_VS_DEST_TRASH_PERIOD (120 * HZ) #define IP_VS_DEST_TRASH_PERIOD (120 * HZ)
struct ipvs_sync_daemon_cfg {
union nf_inet_addr mcast_group;
int syncid;
u16 sync_maxlen;
u16 mcast_port;
u8 mcast_af;
u8 mcast_ttl;
/* multicast interface name */
char mcast_ifn[IP_VS_IFNAME_MAXLEN];
};
/* IPVS in network namespace */ /* IPVS in network namespace */
struct netns_ipvs { struct netns_ipvs {
int gen; /* Generation */ int gen; /* Generation */
...@@ -961,15 +972,10 @@ struct netns_ipvs { ...@@ -961,15 +972,10 @@ struct netns_ipvs {
spinlock_t sync_buff_lock; spinlock_t sync_buff_lock;
struct task_struct **backup_threads; struct task_struct **backup_threads;
int threads_mask; int threads_mask;
int send_mesg_maxlen;
int recv_mesg_maxlen;
volatile int sync_state; volatile int sync_state;
volatile int master_syncid;
volatile int backup_syncid;
struct mutex sync_mutex; struct mutex sync_mutex;
/* multicast interface name */ struct ipvs_sync_daemon_cfg mcfg; /* Master Configuration */
char master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; struct ipvs_sync_daemon_cfg bcfg; /* Backup Configuration */
char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
/* net name space ptr */ /* net name space ptr */
struct net *net; /* Needed by timer routines */ struct net *net; /* Needed by timer routines */
/* Number of heterogeneous destinations, needed becaus heterogeneous /* Number of heterogeneous destinations, needed becaus heterogeneous
...@@ -1408,7 +1414,8 @@ static inline void ip_vs_dest_put_and_free(struct ip_vs_dest *dest) ...@@ -1408,7 +1414,8 @@ static inline void ip_vs_dest_put_and_free(struct ip_vs_dest *dest)
/* IPVS sync daemon data and function prototypes /* IPVS sync daemon data and function prototypes
* (from ip_vs_sync.c) * (from ip_vs_sync.c)
*/ */
int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid); int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *cfg,
int state);
int stop_sync_thread(struct net *net, int state); int stop_sync_thread(struct net *net, int state);
void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts); void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts);
......
...@@ -406,6 +406,11 @@ enum { ...@@ -406,6 +406,11 @@ enum {
IPVS_DAEMON_ATTR_STATE, /* sync daemon state (master/backup) */ IPVS_DAEMON_ATTR_STATE, /* sync daemon state (master/backup) */
IPVS_DAEMON_ATTR_MCAST_IFN, /* multicast interface name */ IPVS_DAEMON_ATTR_MCAST_IFN, /* multicast interface name */
IPVS_DAEMON_ATTR_SYNC_ID, /* SyncID we belong to */ IPVS_DAEMON_ATTR_SYNC_ID, /* SyncID we belong to */
IPVS_DAEMON_ATTR_SYNC_MAXLEN, /* UDP Payload Size */
IPVS_DAEMON_ATTR_MCAST_GROUP, /* IPv4 Multicast Address */
IPVS_DAEMON_ATTR_MCAST_GROUP6, /* IPv6 Multicast Address */
IPVS_DAEMON_ATTR_MCAST_PORT, /* Multicast Port (base) */
IPVS_DAEMON_ATTR_MCAST_TTL, /* Multicast TTL */
__IPVS_DAEMON_ATTR_MAX, __IPVS_DAEMON_ATTR_MAX,
}; };
......
...@@ -10,7 +10,9 @@ enum ip6t_reject_with { ...@@ -10,7 +10,9 @@ enum ip6t_reject_with {
IP6T_ICMP6_ADDR_UNREACH, IP6T_ICMP6_ADDR_UNREACH,
IP6T_ICMP6_PORT_UNREACH, IP6T_ICMP6_PORT_UNREACH,
IP6T_ICMP6_ECHOREPLY, IP6T_ICMP6_ECHOREPLY,
IP6T_TCP_RESET IP6T_TCP_RESET,
IP6T_ICMP6_POLICY_FAIL,
IP6T_ICMP6_REJECT_ROUTE
}; };
struct ip6t_reject_info { struct ip6t_reject_info {
......
...@@ -176,7 +176,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, ...@@ -176,7 +176,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
return 0; return 0;
} }
static inline __pure static inline
struct ebt_entry *ebt_next_entry(const struct ebt_entry *entry) struct ebt_entry *ebt_next_entry(const struct ebt_entry *entry)
{ {
return (void *)entry + entry->next_offset; return (void *)entry + entry->next_offset;
......
...@@ -367,13 +367,10 @@ static inline bool unconditional(const struct arpt_arp *arp) ...@@ -367,13 +367,10 @@ static inline bool unconditional(const struct arpt_arp *arp)
/* Figures out from what hook each rule can be called: returns 0 if /* Figures out from what hook each rule can be called: returns 0 if
* there are loops. Puts hook bitmask in comefrom. * there are loops. Puts hook bitmask in comefrom.
*
* Keeps track of largest call depth seen and stores it in newinfo->stacksize.
*/ */
static int mark_source_chains(struct xt_table_info *newinfo, static int mark_source_chains(const struct xt_table_info *newinfo,
unsigned int valid_hooks, void *entry0) unsigned int valid_hooks, void *entry0)
{ {
unsigned int calldepth, max_calldepth = 0;
unsigned int hook; unsigned int hook;
/* No recursion; use packet counter to save back ptrs (reset /* No recursion; use packet counter to save back ptrs (reset
...@@ -389,7 +386,6 @@ static int mark_source_chains(struct xt_table_info *newinfo, ...@@ -389,7 +386,6 @@ static int mark_source_chains(struct xt_table_info *newinfo,
/* Set initial back pointer. */ /* Set initial back pointer. */
e->counters.pcnt = pos; e->counters.pcnt = pos;
calldepth = 0;
for (;;) { for (;;) {
const struct xt_standard_target *t const struct xt_standard_target *t
...@@ -444,8 +440,6 @@ static int mark_source_chains(struct xt_table_info *newinfo, ...@@ -444,8 +440,6 @@ static int mark_source_chains(struct xt_table_info *newinfo,
(entry0 + pos + size); (entry0 + pos + size);
e->counters.pcnt = pos; e->counters.pcnt = pos;
pos += size; pos += size;
if (calldepth > 0)
--calldepth;
} else { } else {
int newpos = t->verdict; int newpos = t->verdict;
...@@ -460,10 +454,6 @@ static int mark_source_chains(struct xt_table_info *newinfo, ...@@ -460,10 +454,6 @@ static int mark_source_chains(struct xt_table_info *newinfo,
return 0; return 0;
} }
if (entry0 + newpos != arpt_next_entry(e) &&
++calldepth > max_calldepth)
max_calldepth = calldepth;
/* This a jump; chase it. */ /* This a jump; chase it. */
duprintf("Jump rule %u -> %u\n", duprintf("Jump rule %u -> %u\n",
pos, newpos); pos, newpos);
...@@ -480,7 +470,6 @@ static int mark_source_chains(struct xt_table_info *newinfo, ...@@ -480,7 +470,6 @@ static int mark_source_chains(struct xt_table_info *newinfo,
next: next:
duprintf("Finished chain %u\n", hook); duprintf("Finished chain %u\n", hook);
} }
newinfo->stacksize = max_calldepth;
return 1; return 1;
} }
...@@ -670,6 +659,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, ...@@ -670,6 +659,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
if (ret != 0) if (ret != 0)
break; break;
++i; ++i;
if (strcmp(arpt_get_target(iter)->u.user.name,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
} }
duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret); duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
if (ret != 0) if (ret != 0)
...@@ -1442,6 +1434,9 @@ static int translate_compat_table(const char *name, ...@@ -1442,6 +1434,9 @@ static int translate_compat_table(const char *name,
break; break;
} }
++i; ++i;
if (strcmp(arpt_get_target(iter1)->u.user.name,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
} }
if (ret) { if (ret) {
/* /*
......
...@@ -443,15 +443,11 @@ ipt_do_table(struct sk_buff *skb, ...@@ -443,15 +443,11 @@ ipt_do_table(struct sk_buff *skb,
} }
/* Figures out from what hook each rule can be called: returns 0 if /* Figures out from what hook each rule can be called: returns 0 if
* there are loops. Puts hook bitmask in comefrom. there are loops. Puts hook bitmask in comefrom. */
*
* Keeps track of largest call depth seen and stores it in newinfo->stacksize.
*/
static int static int
mark_source_chains(struct xt_table_info *newinfo, mark_source_chains(const struct xt_table_info *newinfo,
unsigned int valid_hooks, void *entry0) unsigned int valid_hooks, void *entry0)
{ {
unsigned int calldepth, max_calldepth = 0;
unsigned int hook; unsigned int hook;
/* No recursion; use packet counter to save back ptrs (reset /* No recursion; use packet counter to save back ptrs (reset
...@@ -465,7 +461,6 @@ mark_source_chains(struct xt_table_info *newinfo, ...@@ -465,7 +461,6 @@ mark_source_chains(struct xt_table_info *newinfo,
/* Set initial back pointer. */ /* Set initial back pointer. */
e->counters.pcnt = pos; e->counters.pcnt = pos;
calldepth = 0;
for (;;) { for (;;) {
const struct xt_standard_target *t const struct xt_standard_target *t
...@@ -527,9 +522,6 @@ mark_source_chains(struct xt_table_info *newinfo, ...@@ -527,9 +522,6 @@ mark_source_chains(struct xt_table_info *newinfo,
(entry0 + pos + size); (entry0 + pos + size);
e->counters.pcnt = pos; e->counters.pcnt = pos;
pos += size; pos += size;
WARN_ON_ONCE(calldepth == 0);
if (calldepth > 0)
--calldepth;
} else { } else {
int newpos = t->verdict; int newpos = t->verdict;
...@@ -543,14 +535,9 @@ mark_source_chains(struct xt_table_info *newinfo, ...@@ -543,14 +535,9 @@ mark_source_chains(struct xt_table_info *newinfo,
newpos); newpos);
return 0; return 0;
} }
if (entry0 + newpos != ipt_next_entry(e) &&
!(e->ip.flags & IPT_F_GOTO) &&
++calldepth > max_calldepth)
max_calldepth = calldepth;
/* This a jump; chase it. */ /* This a jump; chase it. */
duprintf("Jump rule %u -> %u, calldepth %d\n", duprintf("Jump rule %u -> %u\n",
pos, newpos, calldepth); pos, newpos);
} else { } else {
/* ... this is a fallthru */ /* ... this is a fallthru */
newpos = pos + e->next_offset; newpos = pos + e->next_offset;
...@@ -564,7 +551,6 @@ mark_source_chains(struct xt_table_info *newinfo, ...@@ -564,7 +551,6 @@ mark_source_chains(struct xt_table_info *newinfo,
next: next:
duprintf("Finished chain %u\n", hook); duprintf("Finished chain %u\n", hook);
} }
newinfo->stacksize = max_calldepth;
return 1; return 1;
} }
...@@ -844,6 +830,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, ...@@ -844,6 +830,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
if (ret != 0) if (ret != 0)
return ret; return ret;
++i; ++i;
if (strcmp(ipt_get_target(iter)->u.user.name,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
} }
if (i != repl->num_entries) { if (i != repl->num_entries) {
...@@ -1759,6 +1748,9 @@ translate_compat_table(struct net *net, ...@@ -1759,6 +1748,9 @@ translate_compat_table(struct net *net,
if (ret != 0) if (ret != 0)
break; break;
++i; ++i;
if (strcmp(ipt_get_target(iter1)->u.user.name,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
} }
if (ret) { if (ret) {
/* /*
......
...@@ -26,7 +26,7 @@ static void nft_dup_ipv4_eval(const struct nft_expr *expr, ...@@ -26,7 +26,7 @@ static void nft_dup_ipv4_eval(const struct nft_expr *expr,
{ {
struct nft_dup_ipv4 *priv = nft_expr_priv(expr); struct nft_dup_ipv4 *priv = nft_expr_priv(expr);
struct in_addr gw = { struct in_addr gw = {
.s_addr = regs->data[priv->sreg_addr], .s_addr = (__force __be32)regs->data[priv->sreg_addr],
}; };
int oif = regs->data[priv->sreg_dev]; int oif = regs->data[priv->sreg_dev];
......
...@@ -455,15 +455,11 @@ ip6t_do_table(struct sk_buff *skb, ...@@ -455,15 +455,11 @@ ip6t_do_table(struct sk_buff *skb,
} }
/* Figures out from what hook each rule can be called: returns 0 if /* Figures out from what hook each rule can be called: returns 0 if
* there are loops. Puts hook bitmask in comefrom. there are loops. Puts hook bitmask in comefrom. */
*
* Keeps track of largest call depth seen and stores it in newinfo->stacksize.
*/
static int static int
mark_source_chains(struct xt_table_info *newinfo, mark_source_chains(const struct xt_table_info *newinfo,
unsigned int valid_hooks, void *entry0) unsigned int valid_hooks, void *entry0)
{ {
unsigned int calldepth, max_calldepth = 0;
unsigned int hook; unsigned int hook;
/* No recursion; use packet counter to save back ptrs (reset /* No recursion; use packet counter to save back ptrs (reset
...@@ -477,7 +473,6 @@ mark_source_chains(struct xt_table_info *newinfo, ...@@ -477,7 +473,6 @@ mark_source_chains(struct xt_table_info *newinfo,
/* Set initial back pointer. */ /* Set initial back pointer. */
e->counters.pcnt = pos; e->counters.pcnt = pos;
calldepth = 0;
for (;;) { for (;;) {
const struct xt_standard_target *t const struct xt_standard_target *t
...@@ -539,8 +534,6 @@ mark_source_chains(struct xt_table_info *newinfo, ...@@ -539,8 +534,6 @@ mark_source_chains(struct xt_table_info *newinfo,
(entry0 + pos + size); (entry0 + pos + size);
e->counters.pcnt = pos; e->counters.pcnt = pos;
pos += size; pos += size;
if (calldepth > 0)
--calldepth;
} else { } else {
int newpos = t->verdict; int newpos = t->verdict;
...@@ -554,11 +547,6 @@ mark_source_chains(struct xt_table_info *newinfo, ...@@ -554,11 +547,6 @@ mark_source_chains(struct xt_table_info *newinfo,
newpos); newpos);
return 0; return 0;
} }
if (entry0 + newpos != ip6t_next_entry(e) &&
!(e->ipv6.flags & IP6T_F_GOTO) &&
++calldepth > max_calldepth)
max_calldepth = calldepth;
/* This a jump; chase it. */ /* This a jump; chase it. */
duprintf("Jump rule %u -> %u\n", duprintf("Jump rule %u -> %u\n",
pos, newpos); pos, newpos);
...@@ -575,7 +563,6 @@ mark_source_chains(struct xt_table_info *newinfo, ...@@ -575,7 +563,6 @@ mark_source_chains(struct xt_table_info *newinfo,
next: next:
duprintf("Finished chain %u\n", hook); duprintf("Finished chain %u\n", hook);
} }
newinfo->stacksize = max_calldepth;
return 1; return 1;
} }
...@@ -855,6 +842,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, ...@@ -855,6 +842,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
if (ret != 0) if (ret != 0)
return ret; return ret;
++i; ++i;
if (strcmp(ip6t_get_target(iter)->u.user.name,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
} }
if (i != repl->num_entries) { if (i != repl->num_entries) {
...@@ -1767,6 +1757,9 @@ translate_compat_table(struct net *net, ...@@ -1767,6 +1757,9 @@ translate_compat_table(struct net *net,
if (ret != 0) if (ret != 0)
break; break;
++i; ++i;
if (strcmp(ip6t_get_target(iter1)->u.user.name,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
} }
if (ret) { if (ret) {
/* /*
......
...@@ -63,6 +63,12 @@ reject_tg6(struct sk_buff *skb, const struct xt_action_param *par) ...@@ -63,6 +63,12 @@ reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
case IP6T_TCP_RESET: case IP6T_TCP_RESET:
nf_send_reset6(net, skb, par->hooknum); nf_send_reset6(net, skb, par->hooknum);
break; break;
case IP6T_ICMP6_POLICY_FAIL:
nf_send_unreach6(net, skb, ICMPV6_POLICY_FAIL, par->hooknum);
break;
case IP6T_ICMP6_REJECT_ROUTE:
nf_send_unreach6(net, skb, ICMPV6_REJECT_ROUTE, par->hooknum);
break;
} }
return NF_DROP; return NF_DROP;
......
...@@ -237,7 +237,7 @@ synproxy_send_client_ack(const struct synproxy_net *snet, ...@@ -237,7 +237,7 @@ synproxy_send_client_ack(const struct synproxy_net *snet,
nth->ack_seq = th->ack_seq; nth->ack_seq = th->ack_seq;
tcp_flag_word(nth) = TCP_FLAG_ACK; tcp_flag_word(nth) = TCP_FLAG_ACK;
nth->doff = tcp_hdr_size / 4; nth->doff = tcp_hdr_size / 4;
nth->window = ntohs(htons(th->window) >> opts->wscale); nth->window = htons(ntohs(th->window) >> opts->wscale);
nth->check = 0; nth->check = 0;
nth->urg_ptr = 0; nth->urg_ptr = 0;
......
...@@ -45,8 +45,8 @@ static bool nf_dup_ipv6_route(struct sk_buff *skb, const struct in6_addr *gw, ...@@ -45,8 +45,8 @@ static bool nf_dup_ipv6_route(struct sk_buff *skb, const struct in6_addr *gw,
fl6.flowi6_oif = oif; fl6.flowi6_oif = oif;
fl6.daddr = *gw; fl6.daddr = *gw;
fl6.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) | fl6.flowlabel = (__force __be32)(((iph->flow_lbl[0] & 0xF) << 16) |
(iph->flow_lbl[1] << 8) | iph->flow_lbl[2]; (iph->flow_lbl[1] << 8) | iph->flow_lbl[2]);
dst = ip6_route_output(net, NULL, &fl6); dst = ip6_route_output(net, NULL, &fl6);
if (dst->error) { if (dst->error) {
dst_release(dst); dst_release(dst);
......
...@@ -388,9 +388,6 @@ EXPORT_SYMBOL(nf_conntrack_destroy); ...@@ -388,9 +388,6 @@ EXPORT_SYMBOL(nf_conntrack_destroy);
struct nfq_ct_hook __rcu *nfq_ct_hook __read_mostly; struct nfq_ct_hook __rcu *nfq_ct_hook __read_mostly;
EXPORT_SYMBOL_GPL(nfq_ct_hook); EXPORT_SYMBOL_GPL(nfq_ct_hook);
struct nfq_ct_nat_hook __rcu *nfq_ct_nat_hook __read_mostly;
EXPORT_SYMBOL_GPL(nfq_ct_nat_hook);
#endif /* CONFIG_NF_CONNTRACK */ #endif /* CONFIG_NF_CONNTRACK */
#ifdef CONFIG_NF_NAT_NEEDED #ifdef CONFIG_NF_NAT_NEEDED
......
...@@ -162,6 +162,17 @@ config IP_VS_FO ...@@ -162,6 +162,17 @@ config IP_VS_FO
If you want to compile it in kernel, say Y. To compile it as a If you want to compile it in kernel, say Y. To compile it as a
module, choose M here. If unsure, say N. module, choose M here. If unsure, say N.
config IP_VS_OVF
tristate "weighted overflow scheduling"
---help---
The weighted overflow scheduling algorithm directs network
connections to the server with the highest weight that is
currently available and overflows to the next when active
connections exceed the node's weight.
If you want to compile it in kernel, say Y. To compile it as a
module, choose M here. If unsure, say N.
config IP_VS_LBLC config IP_VS_LBLC
tristate "locality-based least-connection scheduling" tristate "locality-based least-connection scheduling"
---help--- ---help---
......
...@@ -27,6 +27,7 @@ obj-$(CONFIG_IP_VS_WRR) += ip_vs_wrr.o ...@@ -27,6 +27,7 @@ obj-$(CONFIG_IP_VS_WRR) += ip_vs_wrr.o
obj-$(CONFIG_IP_VS_LC) += ip_vs_lc.o obj-$(CONFIG_IP_VS_LC) += ip_vs_lc.o
obj-$(CONFIG_IP_VS_WLC) += ip_vs_wlc.o obj-$(CONFIG_IP_VS_WLC) += ip_vs_wlc.o
obj-$(CONFIG_IP_VS_FO) += ip_vs_fo.o obj-$(CONFIG_IP_VS_FO) += ip_vs_fo.o
obj-$(CONFIG_IP_VS_OVF) += ip_vs_ovf.o
obj-$(CONFIG_IP_VS_LBLC) += ip_vs_lblc.o obj-$(CONFIG_IP_VS_LBLC) += ip_vs_lblc.o
obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o
obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o
......
...@@ -2335,13 +2335,23 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) ...@@ -2335,13 +2335,23 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
cmd == IP_VS_SO_SET_STOPDAEMON) { cmd == IP_VS_SO_SET_STOPDAEMON) {
struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
mutex_lock(&ipvs->sync_mutex); if (cmd == IP_VS_SO_SET_STARTDAEMON) {
if (cmd == IP_VS_SO_SET_STARTDAEMON) struct ipvs_sync_daemon_cfg cfg;
ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
dm->syncid); memset(&cfg, 0, sizeof(cfg));
else strlcpy(cfg.mcast_ifn, dm->mcast_ifn,
sizeof(cfg.mcast_ifn));
cfg.syncid = dm->syncid;
rtnl_lock();
mutex_lock(&ipvs->sync_mutex);
ret = start_sync_thread(net, &cfg, dm->state);
mutex_unlock(&ipvs->sync_mutex);
rtnl_unlock();
} else {
mutex_lock(&ipvs->sync_mutex);
ret = stop_sync_thread(net, dm->state); ret = stop_sync_thread(net, dm->state);
mutex_unlock(&ipvs->sync_mutex); mutex_unlock(&ipvs->sync_mutex);
}
goto out_dec; goto out_dec;
} }
...@@ -2645,15 +2655,15 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ...@@ -2645,15 +2655,15 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
mutex_lock(&ipvs->sync_mutex); mutex_lock(&ipvs->sync_mutex);
if (ipvs->sync_state & IP_VS_STATE_MASTER) { if (ipvs->sync_state & IP_VS_STATE_MASTER) {
d[0].state = IP_VS_STATE_MASTER; d[0].state = IP_VS_STATE_MASTER;
strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn, strlcpy(d[0].mcast_ifn, ipvs->mcfg.mcast_ifn,
sizeof(d[0].mcast_ifn)); sizeof(d[0].mcast_ifn));
d[0].syncid = ipvs->master_syncid; d[0].syncid = ipvs->mcfg.syncid;
} }
if (ipvs->sync_state & IP_VS_STATE_BACKUP) { if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
d[1].state = IP_VS_STATE_BACKUP; d[1].state = IP_VS_STATE_BACKUP;
strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn, strlcpy(d[1].mcast_ifn, ipvs->bcfg.mcast_ifn,
sizeof(d[1].mcast_ifn)); sizeof(d[1].mcast_ifn));
d[1].syncid = ipvs->backup_syncid; d[1].syncid = ipvs->bcfg.syncid;
} }
if (copy_to_user(user, &d, sizeof(d)) != 0) if (copy_to_user(user, &d, sizeof(d)) != 0)
ret = -EFAULT; ret = -EFAULT;
...@@ -2808,6 +2818,11 @@ static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = { ...@@ -2808,6 +2818,11 @@ static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
[IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING, [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
.len = IP_VS_IFNAME_MAXLEN }, .len = IP_VS_IFNAME_MAXLEN },
[IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 }, [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
[IPVS_DAEMON_ATTR_SYNC_MAXLEN] = { .type = NLA_U16 },
[IPVS_DAEMON_ATTR_MCAST_GROUP] = { .type = NLA_U32 },
[IPVS_DAEMON_ATTR_MCAST_GROUP6] = { .len = sizeof(struct in6_addr) },
[IPVS_DAEMON_ATTR_MCAST_PORT] = { .type = NLA_U16 },
[IPVS_DAEMON_ATTR_MCAST_TTL] = { .type = NLA_U8 },
}; };
/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */ /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
...@@ -3266,7 +3281,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, ...@@ -3266,7 +3281,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
} }
static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state, static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state,
const char *mcast_ifn, __u32 syncid) struct ipvs_sync_daemon_cfg *c)
{ {
struct nlattr *nl_daemon; struct nlattr *nl_daemon;
...@@ -3275,9 +3290,23 @@ static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state, ...@@ -3275,9 +3290,23 @@ static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state,
return -EMSGSIZE; return -EMSGSIZE;
if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) || if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) ||
nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn) || nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, c->mcast_ifn) ||
nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid)) nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, c->syncid) ||
nla_put_u16(skb, IPVS_DAEMON_ATTR_SYNC_MAXLEN, c->sync_maxlen) ||
nla_put_u16(skb, IPVS_DAEMON_ATTR_MCAST_PORT, c->mcast_port) ||
nla_put_u8(skb, IPVS_DAEMON_ATTR_MCAST_TTL, c->mcast_ttl))
goto nla_put_failure; goto nla_put_failure;
#ifdef CONFIG_IP_VS_IPV6
if (c->mcast_af == AF_INET6) {
if (nla_put_in6_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP6,
&c->mcast_group.in6))
goto nla_put_failure;
} else
#endif
if (c->mcast_af == AF_INET &&
nla_put_in_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP,
c->mcast_group.ip))
goto nla_put_failure;
nla_nest_end(skb, nl_daemon); nla_nest_end(skb, nl_daemon);
return 0; return 0;
...@@ -3288,7 +3317,7 @@ static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state, ...@@ -3288,7 +3317,7 @@ static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state,
} }
static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state, static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state,
const char *mcast_ifn, __u32 syncid, struct ipvs_sync_daemon_cfg *c,
struct netlink_callback *cb) struct netlink_callback *cb)
{ {
void *hdr; void *hdr;
...@@ -3298,7 +3327,7 @@ static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state, ...@@ -3298,7 +3327,7 @@ static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state,
if (!hdr) if (!hdr)
return -EMSGSIZE; return -EMSGSIZE;
if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid)) if (ip_vs_genl_fill_daemon(skb, state, c))
goto nla_put_failure; goto nla_put_failure;
genlmsg_end(skb, hdr); genlmsg_end(skb, hdr);
...@@ -3318,8 +3347,7 @@ static int ip_vs_genl_dump_daemons(struct sk_buff *skb, ...@@ -3318,8 +3347,7 @@ static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
mutex_lock(&ipvs->sync_mutex); mutex_lock(&ipvs->sync_mutex);
if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
ipvs->master_mcast_ifn, &ipvs->mcfg, cb) < 0)
ipvs->master_syncid, cb) < 0)
goto nla_put_failure; goto nla_put_failure;
cb->args[0] = 1; cb->args[0] = 1;
...@@ -3327,8 +3355,7 @@ static int ip_vs_genl_dump_daemons(struct sk_buff *skb, ...@@ -3327,8 +3355,7 @@ static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP, if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
ipvs->backup_mcast_ifn, &ipvs->bcfg, cb) < 0)
ipvs->backup_syncid, cb) < 0)
goto nla_put_failure; goto nla_put_failure;
cb->args[1] = 1; cb->args[1] = 1;
...@@ -3342,30 +3369,83 @@ static int ip_vs_genl_dump_daemons(struct sk_buff *skb, ...@@ -3342,30 +3369,83 @@ static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs) static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
{ {
struct netns_ipvs *ipvs = net_ipvs(net);
struct ipvs_sync_daemon_cfg c;
struct nlattr *a;
int ret;
memset(&c, 0, sizeof(c));
if (!(attrs[IPVS_DAEMON_ATTR_STATE] && if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
attrs[IPVS_DAEMON_ATTR_MCAST_IFN] && attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
attrs[IPVS_DAEMON_ATTR_SYNC_ID])) attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
return -EINVAL; return -EINVAL;
strlcpy(c.mcast_ifn, nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
sizeof(c.mcast_ifn));
c.syncid = nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]);
a = attrs[IPVS_DAEMON_ATTR_SYNC_MAXLEN];
if (a)
c.sync_maxlen = nla_get_u16(a);
a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP];
if (a) {
c.mcast_af = AF_INET;
c.mcast_group.ip = nla_get_in_addr(a);
if (!ipv4_is_multicast(c.mcast_group.ip))
return -EINVAL;
} else {
a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP6];
if (a) {
#ifdef CONFIG_IP_VS_IPV6
int addr_type;
c.mcast_af = AF_INET6;
c.mcast_group.in6 = nla_get_in6_addr(a);
addr_type = ipv6_addr_type(&c.mcast_group.in6);
if (!(addr_type & IPV6_ADDR_MULTICAST))
return -EINVAL;
#else
return -EAFNOSUPPORT;
#endif
}
}
a = attrs[IPVS_DAEMON_ATTR_MCAST_PORT];
if (a)
c.mcast_port = nla_get_u16(a);
a = attrs[IPVS_DAEMON_ATTR_MCAST_TTL];
if (a)
c.mcast_ttl = nla_get_u8(a);
/* The synchronization protocol is incompatible with mixed family /* The synchronization protocol is incompatible with mixed family
* services * services
*/ */
if (net_ipvs(net)->mixed_address_family_dests > 0) if (ipvs->mixed_address_family_dests > 0)
return -EINVAL; return -EINVAL;
return start_sync_thread(net, rtnl_lock();
nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]), mutex_lock(&ipvs->sync_mutex);
nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), ret = start_sync_thread(net, &c,
nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID])); nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
mutex_unlock(&ipvs->sync_mutex);
rtnl_unlock();
return ret;
} }
static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs) static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
{ {
struct netns_ipvs *ipvs = net_ipvs(net);
int ret;
if (!attrs[IPVS_DAEMON_ATTR_STATE]) if (!attrs[IPVS_DAEMON_ATTR_STATE])
return -EINVAL; return -EINVAL;
return stop_sync_thread(net, mutex_lock(&ipvs->sync_mutex);
nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); ret = stop_sync_thread(net,
nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
mutex_unlock(&ipvs->sync_mutex);
return ret;
} }
static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs) static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
...@@ -3389,7 +3469,7 @@ static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs) ...@@ -3389,7 +3469,7 @@ static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info) static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
{ {
int ret = 0, cmd; int ret = -EINVAL, cmd;
struct net *net; struct net *net;
struct netns_ipvs *ipvs; struct netns_ipvs *ipvs;
...@@ -3400,22 +3480,19 @@ static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info) ...@@ -3400,22 +3480,19 @@ static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) { if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1]; struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
mutex_lock(&ipvs->sync_mutex);
if (!info->attrs[IPVS_CMD_ATTR_DAEMON] || if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX, nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
info->attrs[IPVS_CMD_ATTR_DAEMON], info->attrs[IPVS_CMD_ATTR_DAEMON],
ip_vs_daemon_policy)) { ip_vs_daemon_policy))
ret = -EINVAL;
goto out; goto out;
}
if (cmd == IPVS_CMD_NEW_DAEMON) if (cmd == IPVS_CMD_NEW_DAEMON)
ret = ip_vs_genl_new_daemon(net, daemon_attrs); ret = ip_vs_genl_new_daemon(net, daemon_attrs);
else else
ret = ip_vs_genl_del_daemon(net, daemon_attrs); ret = ip_vs_genl_del_daemon(net, daemon_attrs);
out:
mutex_unlock(&ipvs->sync_mutex);
} }
out:
return ret; return ret;
} }
......
/*
* IPVS: Overflow-Connection Scheduling module
*
* Authors: Raducu Deaconu <rhadoo_io@yahoo.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Scheduler implements "overflow" loadbalancing according to number of active
* connections , will keep all conections to the node with the highest weight
* and overflow to the next node if the number of connections exceeds the node's
* weight.
* Note that this scheduler might not be suitable for UDP because it only uses
* active connections
*
*/
#define KMSG_COMPONENT "IPVS"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/module.h>
#include <linux/kernel.h>
#include <net/ip_vs.h>
/* OVF Connection scheduling */
static struct ip_vs_dest *
ip_vs_ovf_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
struct ip_vs_iphdr *iph)
{
struct ip_vs_dest *dest, *h = NULL;
int hw = 0, w;
IP_VS_DBG(6, "ip_vs_ovf_schedule(): Scheduling...\n");
/* select the node with highest weight, go to next in line if active
* connections exceed weight
*/
list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
w = atomic_read(&dest->weight);
if ((dest->flags & IP_VS_DEST_F_OVERLOAD) ||
atomic_read(&dest->activeconns) > w ||
w == 0)
continue;
if (!h || w > hw) {
h = dest;
hw = w;
}
}
if (h) {
IP_VS_DBG_BUF(6, "OVF: server %s:%u active %d w %d\n",
IP_VS_DBG_ADDR(h->af, &h->addr),
ntohs(h->port),
atomic_read(&h->activeconns),
atomic_read(&h->weight));
return h;
}
ip_vs_scheduler_err(svc, "no destination available");
return NULL;
}
static struct ip_vs_scheduler ip_vs_ovf_scheduler = {
.name = "ovf",
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_ovf_scheduler.n_list),
.schedule = ip_vs_ovf_schedule,
};
static int __init ip_vs_ovf_init(void)
{
return register_ip_vs_scheduler(&ip_vs_ovf_scheduler);
}
static void __exit ip_vs_ovf_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_ovf_scheduler);
synchronize_rcu();
}
module_init(ip_vs_ovf_init);
module_exit(ip_vs_ovf_cleanup);
MODULE_LICENSE("GPL");
This diff is collapsed.
...@@ -188,7 +188,7 @@ unsigned int synproxy_tstamp_adjust(struct sk_buff *skb, ...@@ -188,7 +188,7 @@ unsigned int synproxy_tstamp_adjust(struct sk_buff *skb,
const struct nf_conn_synproxy *synproxy) const struct nf_conn_synproxy *synproxy)
{ {
unsigned int optoff, optend; unsigned int optoff, optend;
u32 *ptr, old; __be32 *ptr, old;
if (synproxy->tsoff == 0) if (synproxy->tsoff == 0)
return 1; return 1;
...@@ -216,12 +216,12 @@ unsigned int synproxy_tstamp_adjust(struct sk_buff *skb, ...@@ -216,12 +216,12 @@ unsigned int synproxy_tstamp_adjust(struct sk_buff *skb,
if (op[0] == TCPOPT_TIMESTAMP && if (op[0] == TCPOPT_TIMESTAMP &&
op[1] == TCPOLEN_TIMESTAMP) { op[1] == TCPOLEN_TIMESTAMP) {
if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) { if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
ptr = (u32 *)&op[2]; ptr = (__be32 *)&op[2];
old = *ptr; old = *ptr;
*ptr = htonl(ntohl(*ptr) - *ptr = htonl(ntohl(*ptr) -
synproxy->tsoff); synproxy->tsoff);
} else { } else {
ptr = (u32 *)&op[6]; ptr = (__be32 *)&op[6];
old = *ptr; old = *ptr;
*ptr = htonl(ntohl(*ptr) + *ptr = htonl(ntohl(*ptr) +
synproxy->tsoff); synproxy->tsoff);
......
...@@ -37,7 +37,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par) ...@@ -37,7 +37,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
return XT_CONTINUE; return XT_CONTINUE;
} }
#if IS_ENABLED(CONFIG_IPV6) #if IS_ENABLED(CONFIG_NF_DUP_IPV6)
static unsigned int static unsigned int
tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{ {
...@@ -129,7 +129,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = { ...@@ -129,7 +129,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = {
.destroy = tee_tg_destroy, .destroy = tee_tg_destroy,
.me = THIS_MODULE, .me = THIS_MODULE,
}, },
#if IS_ENABLED(CONFIG_IPV6) #if IS_ENABLED(CONFIG_NF_DUP_IPV6)
{ {
.name = "TEE", .name = "TEE",
.revision = 1, .revision = 1,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment