Commit 21f130a2 authored by David S. Miller's avatar David S. Miller

Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6

parents bc5e8fdf c58ec932
...@@ -2211,6 +2211,15 @@ D: OV511 driver ...@@ -2211,6 +2211,15 @@ D: OV511 driver
S: (address available on request) S: (address available on request)
S: USA S: USA
N: Ian McDonald
E: iam4@cs.waikato.ac.nz
E: imcdnzl@gmail.com
W: http://wand.net.nz/~iam4
W: http://imcdnzl.blogspot.com
D: DCCP, CCID3
S: Hamilton
S: New Zealand
N: Patrick McHardy N: Patrick McHardy
E: kaber@trash.net E: kaber@trash.net
P: 1024D/12155E80 B128 7DE6 FF0A C2B2 48BE AB4C C9D4 964E 1215 5E80 P: 1024D/12155E80 B128 7DE6 FF0A C2B2 48BE AB4C C9D4 964E 1215 5E80
...@@ -2246,19 +2255,12 @@ S: D-90453 Nuernberg ...@@ -2246,19 +2255,12 @@ S: D-90453 Nuernberg
S: Germany S: Germany
N: Arnaldo Carvalho de Melo N: Arnaldo Carvalho de Melo
E: acme@conectiva.com.br E: acme@mandriva.com
E: acme@kernel.org E: acme@ghostprotocols.net
E: acme@gnu.org W: http://oops.ghostprotocols.net:81/blog/
W: http://bazar2.conectiva.com.br/~acme
W: http://advogato.org/person/acme
P: 1024D/9224DF01 D5DF E3BB E3C8 BCBB F8AD 841A B6AB 4681 9224 DF01 P: 1024D/9224DF01 D5DF E3BB E3C8 BCBB F8AD 841A B6AB 4681 9224 DF01
D: wanrouter hacking D: IPX, LLC, DCCP, cyc2x, wl3501_cs, net/ hacks
D: misc Makefile, Config.in, drivers and network stacks fixes S: Mandriva
D: IPX & LLC network stacks maintainer
D: Cyclom 2X synchronous card driver
D: wl3501 PCMCIA wireless card driver
D: i18n for minicom, net-tools, util-linux, fetchmail, etc
S: Conectiva S.A.
S: R. Tocantins, 89 - Cristo Rei S: R. Tocantins, 89 - Cristo Rei
S: 80050-430 - Curitiba - Paran S: 80050-430 - Curitiba - Paran
S: Brazil S: Brazil
......
...@@ -686,6 +686,13 @@ P: Guennadi Liakhovetski ...@@ -686,6 +686,13 @@ P: Guennadi Liakhovetski
M: g.liakhovetski@gmx.de M: g.liakhovetski@gmx.de
S: Maintained S: Maintained
DCCP PROTOCOL
P: Arnaldo Carvalho de Melo
M: acme@mandriva.com
L: dccp@vger.kernel.org
W: http://www.wlug.org.nz/DCCP
S: Maintained
DECnet NETWORK LAYER DECnet NETWORK LAYER
P: Patrick Caulfield P: Patrick Caulfield
M: patrick@tykepenguin.com M: patrick@tykepenguin.com
...@@ -2271,12 +2278,6 @@ M: R.E.Wolff@BitWizard.nl ...@@ -2271,12 +2278,6 @@ M: R.E.Wolff@BitWizard.nl
L: linux-kernel@vger.kernel.org ? L: linux-kernel@vger.kernel.org ?
S: Supported S: Supported
SPX NETWORK LAYER
P: Jay Schulist
M: jschlst@samba.org
L: netdev@vger.kernel.org
S: Supported
SRM (Alpha) environment access SRM (Alpha) environment access
P: Jan-Benedict Glaw P: Jan-Benedict Glaw
M: jbglaw@lug-owl.de M: jbglaw@lug-owl.de
......
...@@ -67,8 +67,8 @@ ...@@ -67,8 +67,8 @@
#define DRV_MODULE_NAME "tg3" #define DRV_MODULE_NAME "tg3"
#define PFX DRV_MODULE_NAME ": " #define PFX DRV_MODULE_NAME ": "
#define DRV_MODULE_VERSION "3.39" #define DRV_MODULE_VERSION "3.40"
#define DRV_MODULE_RELDATE "September 5, 2005" #define DRV_MODULE_RELDATE "September 15, 2005"
#define TG3_DEF_MAC_MODE 0 #define TG3_DEF_MAC_MODE 0
#define TG3_DEF_RX_MODE 0 #define TG3_DEF_RX_MODE 0
...@@ -3442,31 +3442,47 @@ static void tg3_tx_timeout(struct net_device *dev) ...@@ -3442,31 +3442,47 @@ static void tg3_tx_timeout(struct net_device *dev)
schedule_work(&tp->reset_task); schedule_work(&tp->reset_task);
} }
/* Test for DMA buffers crossing any 4GB boundaries: 4G, 8G, etc */
static inline int tg3_4g_overflow_test(dma_addr_t mapping, int len)
{
u32 base = (u32) mapping & 0xffffffff;
return ((base > 0xffffdcc0) &&
(base + len + 8 < base));
}
static void tg3_set_txd(struct tg3 *, int, dma_addr_t, int, u32, u32); static void tg3_set_txd(struct tg3 *, int, dma_addr_t, int, u32, u32);
static int tigon3_4gb_hwbug_workaround(struct tg3 *tp, struct sk_buff *skb, static int tigon3_4gb_hwbug_workaround(struct tg3 *tp, struct sk_buff *skb,
u32 guilty_entry, int guilty_len, u32 last_plus_one, u32 *start,
u32 last_plus_one, u32 *start, u32 mss) u32 base_flags, u32 mss)
{ {
struct sk_buff *new_skb = skb_copy(skb, GFP_ATOMIC); struct sk_buff *new_skb = skb_copy(skb, GFP_ATOMIC);
dma_addr_t new_addr; dma_addr_t new_addr = 0;
u32 entry = *start; u32 entry = *start;
int i; int i, ret = 0;
if (!new_skb) { if (!new_skb) {
dev_kfree_skb(skb); ret = -1;
return -1; } else {
/* New SKB is guaranteed to be linear. */
entry = *start;
new_addr = pci_map_single(tp->pdev, new_skb->data, new_skb->len,
PCI_DMA_TODEVICE);
/* Make sure new skb does not cross any 4G boundaries.
* Drop the packet if it does.
*/
if (tg3_4g_overflow_test(new_addr, new_skb->len)) {
ret = -1;
dev_kfree_skb(new_skb);
new_skb = NULL;
} else {
tg3_set_txd(tp, entry, new_addr, new_skb->len,
base_flags, 1 | (mss << 1));
*start = NEXT_TX(entry);
}
} }
/* New SKB is guaranteed to be linear. */
entry = *start;
new_addr = pci_map_single(tp->pdev, new_skb->data, new_skb->len,
PCI_DMA_TODEVICE);
tg3_set_txd(tp, entry, new_addr, new_skb->len,
(skb->ip_summed == CHECKSUM_HW) ?
TXD_FLAG_TCPUDP_CSUM : 0, 1 | (mss << 1));
*start = NEXT_TX(entry);
/* Now clean up the sw ring entries. */ /* Now clean up the sw ring entries. */
i = 0; i = 0;
while (entry != last_plus_one) { while (entry != last_plus_one) {
...@@ -3491,7 +3507,7 @@ static int tigon3_4gb_hwbug_workaround(struct tg3 *tp, struct sk_buff *skb, ...@@ -3491,7 +3507,7 @@ static int tigon3_4gb_hwbug_workaround(struct tg3 *tp, struct sk_buff *skb,
dev_kfree_skb(skb); dev_kfree_skb(skb);
return 0; return ret;
} }
static void tg3_set_txd(struct tg3 *tp, int entry, static void tg3_set_txd(struct tg3 *tp, int entry,
...@@ -3517,19 +3533,10 @@ static void tg3_set_txd(struct tg3 *tp, int entry, ...@@ -3517,19 +3533,10 @@ static void tg3_set_txd(struct tg3 *tp, int entry,
txd->vlan_tag = vlan_tag << TXD_VLAN_TAG_SHIFT; txd->vlan_tag = vlan_tag << TXD_VLAN_TAG_SHIFT;
} }
static inline int tg3_4g_overflow_test(dma_addr_t mapping, int len)
{
u32 base = (u32) mapping & 0xffffffff;
return ((base > 0xffffdcc0) &&
(base + len + 8 < base));
}
static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
{ {
struct tg3 *tp = netdev_priv(dev); struct tg3 *tp = netdev_priv(dev);
dma_addr_t mapping; dma_addr_t mapping;
unsigned int i;
u32 len, entry, base_flags, mss; u32 len, entry, base_flags, mss;
int would_hit_hwbug; int would_hit_hwbug;
...@@ -3624,7 +3631,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -3624,7 +3631,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
would_hit_hwbug = 0; would_hit_hwbug = 0;
if (tg3_4g_overflow_test(mapping, len)) if (tg3_4g_overflow_test(mapping, len))
would_hit_hwbug = entry + 1; would_hit_hwbug = 1;
tg3_set_txd(tp, entry, mapping, len, base_flags, tg3_set_txd(tp, entry, mapping, len, base_flags,
(skb_shinfo(skb)->nr_frags == 0) | (mss << 1)); (skb_shinfo(skb)->nr_frags == 0) | (mss << 1));
...@@ -3648,12 +3655,8 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -3648,12 +3655,8 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
tp->tx_buffers[entry].skb = NULL; tp->tx_buffers[entry].skb = NULL;
pci_unmap_addr_set(&tp->tx_buffers[entry], mapping, mapping); pci_unmap_addr_set(&tp->tx_buffers[entry], mapping, mapping);
if (tg3_4g_overflow_test(mapping, len)) { if (tg3_4g_overflow_test(mapping, len))
/* Only one should match. */ would_hit_hwbug = 1;
if (would_hit_hwbug)
BUG();
would_hit_hwbug = entry + 1;
}
if (tp->tg3_flags2 & TG3_FLG2_HW_TSO) if (tp->tg3_flags2 & TG3_FLG2_HW_TSO)
tg3_set_txd(tp, entry, mapping, len, tg3_set_txd(tp, entry, mapping, len,
...@@ -3669,34 +3672,15 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -3669,34 +3672,15 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
if (would_hit_hwbug) { if (would_hit_hwbug) {
u32 last_plus_one = entry; u32 last_plus_one = entry;
u32 start; u32 start;
unsigned int len = 0;
would_hit_hwbug -= 1;
entry = entry - 1 - skb_shinfo(skb)->nr_frags;
entry &= (TG3_TX_RING_SIZE - 1);
start = entry;
i = 0;
while (entry != last_plus_one) {
if (i == 0)
len = skb_headlen(skb);
else
len = skb_shinfo(skb)->frags[i-1].size;
if (entry == would_hit_hwbug) start = entry - 1 - skb_shinfo(skb)->nr_frags;
break; start &= (TG3_TX_RING_SIZE - 1);
i++;
entry = NEXT_TX(entry);
}
/* If the workaround fails due to memory/mapping /* If the workaround fails due to memory/mapping
* failure, silently drop this packet. * failure, silently drop this packet.
*/ */
if (tigon3_4gb_hwbug_workaround(tp, skb, if (tigon3_4gb_hwbug_workaround(tp, skb, last_plus_one,
entry, len, &start, base_flags, mss))
last_plus_one,
&start, mss))
goto out_unlock; goto out_unlock;
entry = start; entry = start;
...@@ -9271,6 +9255,8 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) ...@@ -9271,6 +9255,8 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
static struct pci_device_id write_reorder_chipsets[] = { static struct pci_device_id write_reorder_chipsets[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, { PCI_DEVICE(PCI_VENDOR_ID_AMD,
PCI_DEVICE_ID_AMD_FE_GATE_700C) }, PCI_DEVICE_ID_AMD_FE_GATE_700C) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD,
PCI_DEVICE_ID_AMD_K8_NB) },
{ }, { },
}; };
u32 misc_ctrl_reg; u32 misc_ctrl_reg;
...@@ -9285,7 +9271,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) ...@@ -9285,7 +9271,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
tp->tg3_flags2 |= TG3_FLG2_SUN_570X; tp->tg3_flags2 |= TG3_FLG2_SUN_570X;
#endif #endif
/* If we have an AMD 762 chipset, write /* If we have an AMD 762 or K8 chipset, write
* reordering to the mailbox registers done by the host * reordering to the mailbox registers done by the host
* controller can cause major troubles. We read back from * controller can cause major troubles. We read back from
* every mailbox register write to force the writes to be * every mailbox register write to force the writes to be
...@@ -9532,7 +9518,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) ...@@ -9532,7 +9518,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
tp->write32_rx_mbox = tg3_write_indirect_mbox; tp->write32_rx_mbox = tg3_write_indirect_mbox;
iounmap(tp->regs); iounmap(tp->regs);
tp->regs = 0; tp->regs = NULL;
pci_read_config_word(tp->pdev, PCI_COMMAND, &pci_cmd); pci_read_config_word(tp->pdev, PCI_COMMAND, &pci_cmd);
pci_cmd &= ~PCI_COMMAND_MEMORY; pci_cmd &= ~PCI_COMMAND_MEMORY;
...@@ -10680,7 +10666,7 @@ static int __devinit tg3_init_one(struct pci_dev *pdev, ...@@ -10680,7 +10666,7 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
err_out_iounmap: err_out_iounmap:
if (tp->regs) { if (tp->regs) {
iounmap(tp->regs); iounmap(tp->regs);
tp->regs = 0; tp->regs = NULL;
} }
err_out_free_dev: err_out_free_dev:
...@@ -10705,7 +10691,7 @@ static void __devexit tg3_remove_one(struct pci_dev *pdev) ...@@ -10705,7 +10691,7 @@ static void __devexit tg3_remove_one(struct pci_dev *pdev)
unregister_netdev(dev); unregister_netdev(dev);
if (tp->regs) { if (tp->regs) {
iounmap(tp->regs); iounmap(tp->regs);
tp->regs = 0; tp->regs = NULL;
} }
free_netdev(dev); free_netdev(dev);
pci_release_regions(pdev); pci_release_regions(pdev);
......
...@@ -4,16 +4,6 @@ ...@@ -4,16 +4,6 @@
#include <linux/types.h> #include <linux/types.h>
#include <asm/byteorder.h> #include <asm/byteorder.h>
/* Structure describing an Internet (DCCP) socket address. */
struct sockaddr_dccp {
__u16 sdccp_family; /* Address family */
__u16 sdccp_port; /* Port number */
__u32 sdccp_addr; /* Internet address */
__u32 sdccp_service; /* Service */
/* Pad to size of `struct sockaddr': 16 bytes . */
__u32 sdccp_pad;
};
/** /**
* struct dccp_hdr - generic part of DCCP packet header * struct dccp_hdr - generic part of DCCP packet header
* *
...@@ -188,6 +178,9 @@ enum { ...@@ -188,6 +178,9 @@ enum {
/* DCCP socket options */ /* DCCP socket options */
#define DCCP_SOCKOPT_PACKET_SIZE 1 #define DCCP_SOCKOPT_PACKET_SIZE 1
#define DCCP_SOCKOPT_SERVICE 2
#define DCCP_SERVICE_LIST_MAX_LEN 32
#ifdef __KERNEL__ #ifdef __KERNEL__
...@@ -382,6 +375,25 @@ enum dccp_role { ...@@ -382,6 +375,25 @@ enum dccp_role {
DCCP_ROLE_SERVER, DCCP_ROLE_SERVER,
}; };
struct dccp_service_list {
__u32 dccpsl_nr;
__u32 dccpsl_list[0];
};
#define DCCP_SERVICE_INVALID_VALUE htonl((__u32)-1)
static inline int dccp_list_has_service(const struct dccp_service_list *sl,
const u32 service)
{
if (likely(sl != NULL)) {
u32 i = sl->dccpsl_nr;
while (i--)
if (sl->dccpsl_list[i] == service)
return 1;
}
return 0;
}
/** /**
* struct dccp_sock - DCCP socket state * struct dccp_sock - DCCP socket state
* *
...@@ -417,7 +429,8 @@ struct dccp_sock { ...@@ -417,7 +429,8 @@ struct dccp_sock {
__u64 dccps_gss; __u64 dccps_gss;
__u64 dccps_gsr; __u64 dccps_gsr;
__u64 dccps_gar; __u64 dccps_gar;
unsigned long dccps_service; __u32 dccps_service;
struct dccp_service_list *dccps_service_list;
struct timeval dccps_timestamp_time; struct timeval dccps_timestamp_time;
__u32 dccps_timestamp_echo; __u32 dccps_timestamp_echo;
__u32 dccps_packet_size; __u32 dccps_packet_size;
...@@ -443,6 +456,11 @@ static inline struct dccp_sock *dccp_sk(const struct sock *sk) ...@@ -443,6 +456,11 @@ static inline struct dccp_sock *dccp_sk(const struct sock *sk)
return (struct dccp_sock *)sk; return (struct dccp_sock *)sk;
} }
static inline int dccp_service_not_initialized(const struct sock *sk)
{
return dccp_sk(sk)->dccps_service == DCCP_SERVICE_INVALID_VALUE;
}
static inline const char *dccp_role(const struct sock *sk) static inline const char *dccp_role(const struct sock *sk)
{ {
switch (dccp_sk(sk)->dccps_role) { switch (dccp_sk(sk)->dccps_role) {
......
...@@ -491,6 +491,7 @@ ...@@ -491,6 +491,7 @@
#define PCI_DEVICE_ID_AMI_MEGARAID2 0x9060 #define PCI_DEVICE_ID_AMI_MEGARAID2 0x9060
#define PCI_VENDOR_ID_AMD 0x1022 #define PCI_VENDOR_ID_AMD 0x1022
#define PCI_DEVICE_ID_AMD_K8_NB 0x1100
#define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_LANCE 0x2000
#define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001
#define PCI_DEVICE_ID_AMD_SCSI 0x2020 #define PCI_DEVICE_ID_AMD_SCSI 0x2020
......
...@@ -140,6 +140,7 @@ config BRIDGE_NETFILTER ...@@ -140,6 +140,7 @@ config BRIDGE_NETFILTER
If unsure, say N. If unsure, say N.
source "net/netfilter/Kconfig"
source "net/ipv4/netfilter/Kconfig" source "net/ipv4/netfilter/Kconfig"
source "net/ipv6/netfilter/Kconfig" source "net/ipv6/netfilter/Kconfig"
source "net/decnet/netfilter/Kconfig" source "net/decnet/netfilter/Kconfig"
...@@ -206,8 +207,6 @@ config NET_PKTGEN ...@@ -206,8 +207,6 @@ config NET_PKTGEN
To compile this code as a module, choose M here: the To compile this code as a module, choose M here: the
module will be called pktgen. module will be called pktgen.
source "net/netfilter/Kconfig"
endmenu endmenu
endmenu endmenu
......
...@@ -258,13 +258,12 @@ extern int dccp_v4_send_reset(struct sock *sk, ...@@ -258,13 +258,12 @@ extern int dccp_v4_send_reset(struct sock *sk,
extern void dccp_send_close(struct sock *sk, const int active); extern void dccp_send_close(struct sock *sk, const int active);
struct dccp_skb_cb { struct dccp_skb_cb {
__u8 dccpd_type; __u8 dccpd_type:4;
__u8 dccpd_reset_code; __u8 dccpd_ccval:4;
__u8 dccpd_service; __u8 dccpd_reset_code;
__u8 dccpd_ccval; __u16 dccpd_opt_len;
__u64 dccpd_seq; __u64 dccpd_seq;
__u64 dccpd_ack_seq; __u64 dccpd_ack_seq;
int dccpd_opt_len;
}; };
#define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0])) #define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0]))
......
...@@ -384,9 +384,9 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, ...@@ -384,9 +384,9 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
} }
out_invalid_packet: out_invalid_packet:
return 1; /* dccp_v4_do_rcv will send a reset, but... /* dccp_v4_do_rcv will send a reset */
FIXME: the reset code should be DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
DCCP_RESET_CODE_PACKET_ERROR */ return 1;
} }
static int dccp_rcv_respond_partopen_state_process(struct sock *sk, static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
...@@ -433,6 +433,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, ...@@ -433,6 +433,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
struct dccp_hdr *dh, unsigned len) struct dccp_hdr *dh, unsigned len)
{ {
struct dccp_sock *dp = dccp_sk(sk); struct dccp_sock *dp = dccp_sk(sk);
struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
const int old_state = sk->sk_state; const int old_state = sk->sk_state;
int queued = 0; int queued = 0;
...@@ -473,7 +474,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, ...@@ -473,7 +474,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
if (dh->dccph_type == DCCP_PKT_RESET) if (dh->dccph_type == DCCP_PKT_RESET)
goto discard; goto discard;
/* Caller (dccp_v4_do_rcv) will send Reset(No Connection)*/ /* Caller (dccp_v4_do_rcv) will send Reset */
dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
return 1; return 1;
} }
...@@ -487,8 +489,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, ...@@ -487,8 +489,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
if (dccp_parse_options(sk, skb)) if (dccp_parse_options(sk, skb))
goto discard; goto discard;
if (DCCP_SKB_CB(skb)->dccpd_ack_seq != if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
DCCP_PKT_WITHOUT_ACK_SEQ)
dccp_event_ack_recv(sk, skb); dccp_event_ack_recv(sk, skb);
ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
...@@ -500,7 +501,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, ...@@ -500,7 +501,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
*/ */
if (dp->dccps_options.dccpo_send_ack_vector) { if (dp->dccps_options.dccpo_send_ack_vector) {
if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk, if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk,
DCCP_SKB_CB(skb)->dccpd_seq, dcb->dccpd_seq,
DCCP_ACKPKTS_STATE_RECEIVED)) DCCP_ACKPKTS_STATE_RECEIVED))
goto discard; goto discard;
/* /*
...@@ -551,8 +552,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, ...@@ -551,8 +552,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
dh->dccph_type == DCCP_PKT_REQUEST) || dh->dccph_type == DCCP_PKT_REQUEST) ||
(sk->sk_state == DCCP_RESPOND && (sk->sk_state == DCCP_RESPOND &&
dh->dccph_type == DCCP_PKT_DATA)) { dh->dccph_type == DCCP_PKT_DATA)) {
dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC);
DCCP_PKT_SYNC);
goto discard; goto discard;
} else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) {
dccp_rcv_closereq(sk, skb); dccp_rcv_closereq(sk, skb);
...@@ -563,13 +563,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, ...@@ -563,13 +563,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
} }
if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) {
dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNCACK);
DCCP_PKT_SYNCACK);
goto discard; goto discard;
} }
switch (sk->sk_state) { switch (sk->sk_state) {
case DCCP_CLOSED: case DCCP_CLOSED:
dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
return 1; return 1;
case DCCP_REQUESTING: case DCCP_REQUESTING:
......
...@@ -246,6 +246,9 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, ...@@ -246,6 +246,9 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
dp->dccps_role = DCCP_ROLE_CLIENT; dp->dccps_role = DCCP_ROLE_CLIENT;
if (dccp_service_not_initialized(sk))
return -EPROTO;
if (addr_len < sizeof(struct sockaddr_in)) if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL; return -EINVAL;
...@@ -661,6 +664,16 @@ static inline u64 dccp_v4_init_sequence(const struct sock *sk, ...@@ -661,6 +664,16 @@ static inline u64 dccp_v4_init_sequence(const struct sock *sk,
dccp_hdr(skb)->dccph_sport); dccp_hdr(skb)->dccph_sport);
} }
static inline int dccp_bad_service_code(const struct sock *sk,
const __u32 service)
{
const struct dccp_sock *dp = dccp_sk(sk);
if (dp->dccps_service == service)
return 0;
return !dccp_list_has_service(dp->dccps_service_list, service);
}
int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
{ {
struct inet_request_sock *ireq; struct inet_request_sock *ireq;
...@@ -669,13 +682,22 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ...@@ -669,13 +682,22 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
struct dccp_request_sock *dreq; struct dccp_request_sock *dreq;
const __u32 saddr = skb->nh.iph->saddr; const __u32 saddr = skb->nh.iph->saddr;
const __u32 daddr = skb->nh.iph->daddr; const __u32 daddr = skb->nh.iph->daddr;
const __u32 service = dccp_hdr_request(skb)->dccph_req_service;
struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
__u8 reset_code = DCCP_RESET_CODE_TOO_BUSY;
struct dst_entry *dst = NULL; struct dst_entry *dst = NULL;
/* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */
if (((struct rtable *)skb->dst)->rt_flags & if (((struct rtable *)skb->dst)->rt_flags &
(RTCF_BROADCAST | RTCF_MULTICAST)) (RTCF_BROADCAST | RTCF_MULTICAST)) {
reset_code = DCCP_RESET_CODE_NO_CONNECTION;
goto drop; goto drop;
}
if (dccp_bad_service_code(sk, service)) {
reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
goto drop;
}
/* /*
* TW buckets are converted to open requests without * TW buckets are converted to open requests without
* limitations, they conserve resources and peer is * limitations, they conserve resources and peer is
...@@ -718,9 +740,9 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ...@@ -718,9 +740,9 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
* dccp_create_openreq_child. * dccp_create_openreq_child.
*/ */
dreq = dccp_rsk(req); dreq = dccp_rsk(req);
dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq; dreq->dreq_isr = dcb->dccpd_seq;
dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); dreq->dreq_iss = dccp_v4_init_sequence(sk, skb);
dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service; dreq->dreq_service = service;
if (dccp_v4_send_response(sk, req, dst)) if (dccp_v4_send_response(sk, req, dst))
goto drop_and_free; goto drop_and_free;
...@@ -735,6 +757,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ...@@ -735,6 +757,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
__reqsk_free(req); __reqsk_free(req);
drop: drop:
DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
dcb->dccpd_reset_code = reset_code;
return -1; return -1;
} }
...@@ -1005,7 +1028,6 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) ...@@ -1005,7 +1028,6 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0; return 0;
reset: reset:
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
dccp_v4_ctl_send_reset(skb); dccp_v4_ctl_send_reset(skb);
discard: discard:
kfree_skb(skb); kfree_skb(skb);
...@@ -1280,6 +1302,7 @@ static int dccp_v4_init_sock(struct sock *sk) ...@@ -1280,6 +1302,7 @@ static int dccp_v4_init_sock(struct sock *sk)
sk->sk_write_space = dccp_write_space; sk->sk_write_space = dccp_write_space;
dp->dccps_mss_cache = 536; dp->dccps_mss_cache = 536;
dp->dccps_role = DCCP_ROLE_UNDEFINED; dp->dccps_role = DCCP_ROLE_UNDEFINED;
dp->dccps_service = DCCP_SERVICE_INVALID_VALUE;
return 0; return 0;
} }
...@@ -1301,6 +1324,11 @@ static int dccp_v4_destroy_sock(struct sock *sk) ...@@ -1301,6 +1324,11 @@ static int dccp_v4_destroy_sock(struct sock *sk)
if (inet_csk(sk)->icsk_bind_hash != NULL) if (inet_csk(sk)->icsk_bind_hash != NULL)
inet_put_port(&dccp_hashinfo, sk); inet_put_port(&dccp_hashinfo, sk);
if (dp->dccps_service_list != NULL) {
kfree(dp->dccps_service_list);
dp->dccps_service_list = NULL;
}
ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts);
......
...@@ -93,9 +93,11 @@ struct sock *dccp_create_openreq_child(struct sock *sk, ...@@ -93,9 +93,11 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
struct inet_connection_sock *newicsk = inet_csk(sk); struct inet_connection_sock *newicsk = inet_csk(sk);
struct dccp_sock *newdp = dccp_sk(newsk); struct dccp_sock *newdp = dccp_sk(newsk);
newdp->dccps_role = DCCP_ROLE_SERVER;
newdp->dccps_hc_rx_ackpkts = NULL; newdp->dccps_hc_rx_ackpkts = NULL;
newdp->dccps_role = DCCP_ROLE_SERVER; newdp->dccps_service_list = NULL;
newicsk->icsk_rto = DCCP_TIMEOUT_INIT; newdp->dccps_service = dreq->dreq_service;
newicsk->icsk_rto = DCCP_TIMEOUT_INIT;
do_gettimeofday(&newdp->dccps_epoch); do_gettimeofday(&newdp->dccps_epoch);
if (newdp->dccps_options.dccpo_send_ack_vector) { if (newdp->dccps_options.dccpo_send_ack_vector) {
......
...@@ -85,7 +85,7 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) ...@@ -85,7 +85,7 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
switch (dcb->dccpd_type) { switch (dcb->dccpd_type) {
case DCCP_PKT_REQUEST: case DCCP_PKT_REQUEST:
dccp_hdr_request(skb)->dccph_req_service = dccp_hdr_request(skb)->dccph_req_service =
dcb->dccpd_service; dp->dccps_service;
break; break;
case DCCP_PKT_RESET: case DCCP_PKT_RESET:
dccp_hdr_reset(skb)->dccph_reset_code = dccp_hdr_reset(skb)->dccph_reset_code =
...@@ -270,6 +270,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, ...@@ -270,6 +270,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
struct request_sock *req) struct request_sock *req)
{ {
struct dccp_hdr *dh; struct dccp_hdr *dh;
struct dccp_request_sock *dreq;
const int dccp_header_size = sizeof(struct dccp_hdr) + const int dccp_header_size = sizeof(struct dccp_hdr) +
sizeof(struct dccp_hdr_ext) + sizeof(struct dccp_hdr_ext) +
sizeof(struct dccp_hdr_response); sizeof(struct dccp_hdr_response);
...@@ -285,8 +286,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, ...@@ -285,8 +286,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
skb->dst = dst_clone(dst); skb->dst = dst_clone(dst);
skb->csum = 0; skb->csum = 0;
dreq = dccp_rsk(req);
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
DCCP_SKB_CB(skb)->dccpd_seq = dccp_rsk(req)->dreq_iss; DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss;
dccp_insert_options(sk, skb); dccp_insert_options(sk, skb);
skb->h.raw = skb_push(skb, dccp_header_size); skb->h.raw = skb_push(skb, dccp_header_size);
...@@ -300,8 +302,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, ...@@ -300,8 +302,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
dh->dccph_type = DCCP_PKT_RESPONSE; dh->dccph_type = DCCP_PKT_RESPONSE;
dh->dccph_x = 1; dh->dccph_x = 1;
dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss); dccp_hdr_set_seq(dh, dreq->dreq_iss);
dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr); dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_isr);
dccp_hdr_response(skb)->dccph_resp_service = dreq->dreq_service;
dh->dccph_checksum = dccp_v4_checksum(skb, inet_rsk(req)->loc_addr, dh->dccph_checksum = dccp_v4_checksum(skb, inet_rsk(req)->loc_addr,
inet_rsk(req)->rmt_addr); inet_rsk(req)->rmt_addr);
...@@ -397,9 +400,6 @@ int dccp_connect(struct sock *sk) ...@@ -397,9 +400,6 @@ int dccp_connect(struct sock *sk)
skb_reserve(skb, MAX_DCCP_HEADER); skb_reserve(skb, MAX_DCCP_HEADER);
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST;
/* FIXME: set service to something meaningful, coming
* from userspace*/
DCCP_SKB_CB(skb)->dccpd_service = 0;
skb->csum = 0; skb->csum = 0;
skb_set_owner_w(skb, sk); skb_set_owner_w(skb, sk);
......
...@@ -94,7 +94,15 @@ EXPORT_SYMBOL_GPL(dccp_state_name); ...@@ -94,7 +94,15 @@ EXPORT_SYMBOL_GPL(dccp_state_name);
static inline int dccp_listen_start(struct sock *sk) static inline int dccp_listen_start(struct sock *sk)
{ {
dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN; struct dccp_sock *dp = dccp_sk(sk);
dp->dccps_role = DCCP_ROLE_LISTEN;
/*
* Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
* before calling listen()
*/
if (dccp_service_not_initialized(sk))
return -EPROTO;
return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
} }
...@@ -202,6 +210,42 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) ...@@ -202,6 +210,42 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
return -ENOIOCTLCMD; return -ENOIOCTLCMD;
} }
static int dccp_setsockopt_service(struct sock *sk, const u32 service,
char __user *optval, int optlen)
{
struct dccp_sock *dp = dccp_sk(sk);
struct dccp_service_list *sl = NULL;
if (service == DCCP_SERVICE_INVALID_VALUE ||
optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
return -EINVAL;
if (optlen > sizeof(service)) {
sl = kmalloc(optlen, GFP_KERNEL);
if (sl == NULL)
return -ENOMEM;
sl->dccpsl_nr = optlen / sizeof(u32) - 1;
if (copy_from_user(sl->dccpsl_list,
optval + sizeof(service),
optlen - sizeof(service)) ||
dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
kfree(sl);
return -EFAULT;
}
}
lock_sock(sk);
dp->dccps_service = service;
if (dp->dccps_service_list != NULL)
kfree(dp->dccps_service_list);
dp->dccps_service_list = sl;
release_sock(sk);
return 0;
}
int dccp_setsockopt(struct sock *sk, int level, int optname, int dccp_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, int optlen) char __user *optval, int optlen)
{ {
...@@ -218,8 +262,10 @@ int dccp_setsockopt(struct sock *sk, int level, int optname, ...@@ -218,8 +262,10 @@ int dccp_setsockopt(struct sock *sk, int level, int optname,
if (get_user(val, (int __user *)optval)) if (get_user(val, (int __user *)optval))
return -EFAULT; return -EFAULT;
lock_sock(sk); if (optname == DCCP_SOCKOPT_SERVICE)
return dccp_setsockopt_service(sk, val, optval, optlen);
lock_sock(sk);
dp = dccp_sk(sk); dp = dccp_sk(sk);
err = 0; err = 0;
...@@ -236,6 +282,37 @@ int dccp_setsockopt(struct sock *sk, int level, int optname, ...@@ -236,6 +282,37 @@ int dccp_setsockopt(struct sock *sk, int level, int optname,
return err; return err;
} }
static int dccp_getsockopt_service(struct sock *sk, int len,
u32 __user *optval,
int __user *optlen)
{
const struct dccp_sock *dp = dccp_sk(sk);
const struct dccp_service_list *sl;
int err = -ENOENT, slen = 0, total_len = sizeof(u32);
lock_sock(sk);
if (dccp_service_not_initialized(sk))
goto out;
if ((sl = dp->dccps_service_list) != NULL) {
slen = sl->dccpsl_nr * sizeof(u32);
total_len += slen;
}
err = -EINVAL;
if (total_len > len)
goto out;
err = 0;
if (put_user(total_len, optlen) ||
put_user(dp->dccps_service, optval) ||
(sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
err = -EFAULT;
out:
release_sock(sk);
return err;
}
int dccp_getsockopt(struct sock *sk, int level, int optname, int dccp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen) char __user *optval, int __user *optlen)
{ {
...@@ -248,6 +325,10 @@ int dccp_getsockopt(struct sock *sk, int level, int optname, ...@@ -248,6 +325,10 @@ int dccp_getsockopt(struct sock *sk, int level, int optname,
if (get_user(len, optlen)) if (get_user(len, optlen))
return -EFAULT; return -EFAULT;
if (optname == DCCP_SOCKOPT_SERVICE)
return dccp_getsockopt_service(sk, len,
(u32 __user *)optval, optlen);
len = min_t(unsigned int, len, sizeof(int)); len = min_t(unsigned int, len, sizeof(int));
if (len < 0) if (len < 0)
return -EINVAL; return -EINVAL;
......
...@@ -8,6 +8,7 @@ menu "IP: Netfilter Configuration" ...@@ -8,6 +8,7 @@ menu "IP: Netfilter Configuration"
# connection tracking, helpers and protocols # connection tracking, helpers and protocols
config IP_NF_CONNTRACK config IP_NF_CONNTRACK
tristate "Connection tracking (required for masq/NAT)" tristate "Connection tracking (required for masq/NAT)"
select NETFILTER_NETLINK if IP_NF_CONNTRACK_NETLINK!=n
---help--- ---help---
Connection tracking keeps a record of what packets have passed Connection tracking keeps a record of what packets have passed
through your machine, in order to figure out how they are related through your machine, in order to figure out how they are related
...@@ -51,6 +52,15 @@ config IP_NF_CONNTRACK_EVENTS ...@@ -51,6 +52,15 @@ config IP_NF_CONNTRACK_EVENTS
IF unsure, say `N'. IF unsure, say `N'.
config IP_NF_CONNTRACK_NETLINK
tristate 'Connection tracking netlink interface'
depends on IP_NF_CONNTRACK && NETFILTER_NETLINK
default IP_NF_CONNTRACK if NETFILTER_NETLINK=y
default m if NETFILTER_NETLINK=m
help
This option enables support for a netlink-based userspace interface
config IP_NF_CT_PROTO_SCTP config IP_NF_CT_PROTO_SCTP
tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)'
depends on IP_NF_CONNTRACK && EXPERIMENTAL depends on IP_NF_CONNTRACK && EXPERIMENTAL
...@@ -774,11 +784,5 @@ config IP_NF_ARP_MANGLE ...@@ -774,11 +784,5 @@ config IP_NF_ARP_MANGLE
Allows altering the ARP packet payload: source and destination Allows altering the ARP packet payload: source and destination
hardware and network addresses. hardware and network addresses.
config IP_NF_CONNTRACK_NETLINK
tristate 'Connection tracking netlink interface'
depends on IP_NF_CONNTRACK && NETFILTER_NETLINK
help
This option enables support for a netlink-based userspace interface
endmenu endmenu
...@@ -1143,7 +1143,10 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct, ...@@ -1143,7 +1143,10 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct,
if (del_timer(&ct->timeout)) { if (del_timer(&ct->timeout)) {
ct->timeout.expires = jiffies + extra_jiffies; ct->timeout.expires = jiffies + extra_jiffies;
add_timer(&ct->timeout); add_timer(&ct->timeout);
ip_conntrack_event_cache(IPCT_REFRESH, skb); /* FIXME: We loose some REFRESH events if this function
* is called without an skb. I'll fix this later -HW */
if (skb)
ip_conntrack_event_cache(IPCT_REFRESH, skb);
} }
ct_add_counters(ct, ctinfo, skb); ct_add_counters(ct, ctinfo, skb);
write_unlock_bh(&ip_conntrack_lock); write_unlock_bh(&ip_conntrack_lock);
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include <linux/config.h> #include <linux/config.h>
#include <linux/proc_fs.h> #include <linux/proc_fs.h>
#include <linux/jhash.h> #include <linux/jhash.h>
#include <linux/bitops.h>
#include <linux/skbuff.h> #include <linux/skbuff.h>
#include <linux/ip.h> #include <linux/ip.h>
#include <linux/tcp.h> #include <linux/tcp.h>
...@@ -30,7 +31,7 @@ ...@@ -30,7 +31,7 @@
#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
#include <linux/netfilter_ipv4/ip_conntrack.h> #include <linux/netfilter_ipv4/ip_conntrack.h>
#define CLUSTERIP_VERSION "0.7" #define CLUSTERIP_VERSION "0.8"
#define DEBUG_CLUSTERIP #define DEBUG_CLUSTERIP
...@@ -49,13 +50,14 @@ MODULE_DESCRIPTION("iptables target for CLUSTERIP"); ...@@ -49,13 +50,14 @@ MODULE_DESCRIPTION("iptables target for CLUSTERIP");
struct clusterip_config { struct clusterip_config {
struct list_head list; /* list of all configs */ struct list_head list; /* list of all configs */
atomic_t refcount; /* reference count */ atomic_t refcount; /* reference count */
atomic_t entries; /* number of entries/rules
* referencing us */
u_int32_t clusterip; /* the IP address */ u_int32_t clusterip; /* the IP address */
u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ u_int8_t clustermac[ETH_ALEN]; /* the MAC address */
struct net_device *dev; /* device */ struct net_device *dev; /* device */
u_int16_t num_total_nodes; /* total number of nodes */ u_int16_t num_total_nodes; /* total number of nodes */
u_int16_t num_local_nodes; /* number of local nodes */ unsigned long local_nodes; /* node number array */
u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; /* node number array */
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
struct proc_dir_entry *pde; /* proc dir entry */ struct proc_dir_entry *pde; /* proc dir entry */
...@@ -66,8 +68,7 @@ struct clusterip_config { ...@@ -66,8 +68,7 @@ struct clusterip_config {
static LIST_HEAD(clusterip_configs); static LIST_HEAD(clusterip_configs);
/* clusterip_lock protects the clusterip_configs list _AND_ the configurable /* clusterip_lock protects the clusterip_configs list */
* data within all structurses (num_local_nodes, local_nodes[]) */
static DEFINE_RWLOCK(clusterip_lock); static DEFINE_RWLOCK(clusterip_lock);
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
...@@ -76,23 +77,48 @@ static struct proc_dir_entry *clusterip_procdir; ...@@ -76,23 +77,48 @@ static struct proc_dir_entry *clusterip_procdir;
#endif #endif
static inline void static inline void
clusterip_config_get(struct clusterip_config *c) { clusterip_config_get(struct clusterip_config *c)
{
atomic_inc(&c->refcount); atomic_inc(&c->refcount);
} }
static inline void static inline void
clusterip_config_put(struct clusterip_config *c) { clusterip_config_put(struct clusterip_config *c)
if (atomic_dec_and_test(&c->refcount)) { {
if (atomic_dec_and_test(&c->refcount))
kfree(c);
}
/* increase the count of entries(rules) using/referencing this config */
static inline void
clusterip_config_entry_get(struct clusterip_config *c)
{
atomic_inc(&c->entries);
}
/* decrease the count of entries using/referencing this config. If last
* entry(rule) is removed, remove the config from lists, but don't free it
* yet, since proc-files could still be holding references */
static inline void
clusterip_config_entry_put(struct clusterip_config *c)
{
if (atomic_dec_and_test(&c->entries)) {
write_lock_bh(&clusterip_lock); write_lock_bh(&clusterip_lock);
list_del(&c->list); list_del(&c->list);
write_unlock_bh(&clusterip_lock); write_unlock_bh(&clusterip_lock);
dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0); dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0);
dev_put(c->dev); dev_put(c->dev);
kfree(c);
/* In case anyone still accesses the file, the open/close
* functions are also incrementing the refcount on their own,
* so it's safe to remove the entry even if it's in use. */
#ifdef CONFIG_PROC_FS
remove_proc_entry(c->pde->name, c->pde->parent);
#endif
} }
} }
static struct clusterip_config * static struct clusterip_config *
__clusterip_config_find(u_int32_t clusterip) __clusterip_config_find(u_int32_t clusterip)
{ {
...@@ -111,7 +137,7 @@ __clusterip_config_find(u_int32_t clusterip) ...@@ -111,7 +137,7 @@ __clusterip_config_find(u_int32_t clusterip)
} }
static inline struct clusterip_config * static inline struct clusterip_config *
clusterip_config_find_get(u_int32_t clusterip) clusterip_config_find_get(u_int32_t clusterip, int entry)
{ {
struct clusterip_config *c; struct clusterip_config *c;
...@@ -122,11 +148,24 @@ clusterip_config_find_get(u_int32_t clusterip) ...@@ -122,11 +148,24 @@ clusterip_config_find_get(u_int32_t clusterip)
return NULL; return NULL;
} }
atomic_inc(&c->refcount); atomic_inc(&c->refcount);
if (entry)
atomic_inc(&c->entries);
read_unlock_bh(&clusterip_lock); read_unlock_bh(&clusterip_lock);
return c; return c;
} }
static void
clusterip_config_init_nodelist(struct clusterip_config *c,
const struct ipt_clusterip_tgt_info *i)
{
int n;
for (n = 0; n < i->num_local_nodes; n++) {
set_bit(i->local_nodes[n] - 1, &c->local_nodes);
}
}
static struct clusterip_config * static struct clusterip_config *
clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
struct net_device *dev) struct net_device *dev)
...@@ -143,11 +182,11 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, ...@@ -143,11 +182,11 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
c->clusterip = ip; c->clusterip = ip;
memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
c->num_total_nodes = i->num_total_nodes; c->num_total_nodes = i->num_total_nodes;
c->num_local_nodes = i->num_local_nodes; clusterip_config_init_nodelist(c, i);
memcpy(&c->local_nodes, &i->local_nodes, sizeof(c->local_nodes));
c->hash_mode = i->hash_mode; c->hash_mode = i->hash_mode;
c->hash_initval = i->hash_initval; c->hash_initval = i->hash_initval;
atomic_set(&c->refcount, 1); atomic_set(&c->refcount, 1);
atomic_set(&c->entries, 1);
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
/* create proc dir entry */ /* create proc dir entry */
...@@ -171,53 +210,28 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, ...@@ -171,53 +210,28 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
static int static int
clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum)
{ {
int i;
write_lock_bh(&clusterip_lock);
if (c->num_local_nodes >= CLUSTERIP_MAX_NODES if (nodenum == 0 ||
|| nodenum > CLUSTERIP_MAX_NODES) { nodenum > c->num_total_nodes)
write_unlock_bh(&clusterip_lock);
return 1; return 1;
}
/* check if we alrady have this number in our array */
for (i = 0; i < c->num_local_nodes; i++) {
if (c->local_nodes[i] == nodenum) {
write_unlock_bh(&clusterip_lock);
return 1;
}
}
c->local_nodes[c->num_local_nodes++] = nodenum; /* check if we already have this number in our bitfield */
if (test_and_set_bit(nodenum - 1, &c->local_nodes))
return 1;
write_unlock_bh(&clusterip_lock);
return 0; return 0;
} }
static int static int
clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
{ {
int i; if (nodenum == 0 ||
nodenum > c->num_total_nodes)
write_lock_bh(&clusterip_lock);
if (c->num_local_nodes <= 1 || nodenum > CLUSTERIP_MAX_NODES) {
write_unlock_bh(&clusterip_lock);
return 1; return 1;
}
for (i = 0; i < c->num_local_nodes; i++) { if (test_and_clear_bit(nodenum - 1, &c->local_nodes))
if (c->local_nodes[i] == nodenum) { return 0;
int size = sizeof(u_int16_t)*(c->num_local_nodes-(i+1));
memmove(&c->local_nodes[i], &c->local_nodes[i+1], size);
c->num_local_nodes--;
write_unlock_bh(&clusterip_lock);
return 0;
}
}
write_unlock_bh(&clusterip_lock);
return 1; return 1;
} }
...@@ -285,25 +299,7 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) ...@@ -285,25 +299,7 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config)
static inline int static inline int
clusterip_responsible(struct clusterip_config *config, u_int32_t hash) clusterip_responsible(struct clusterip_config *config, u_int32_t hash)
{ {
int i; return test_bit(hash - 1, &config->local_nodes);
read_lock_bh(&clusterip_lock);
if (config->num_local_nodes == 0) {
read_unlock_bh(&clusterip_lock);
return 0;
}
for (i = 0; i < config->num_local_nodes; i++) {
if (config->local_nodes[i] == hash) {
read_unlock_bh(&clusterip_lock);
return 1;
}
}
read_unlock_bh(&clusterip_lock);
return 0;
} }
/*********************************************************************** /***********************************************************************
...@@ -415,8 +411,26 @@ checkentry(const char *tablename, ...@@ -415,8 +411,26 @@ checkentry(const char *tablename,
/* FIXME: further sanity checks */ /* FIXME: further sanity checks */
config = clusterip_config_find_get(e->ip.dst.s_addr); config = clusterip_config_find_get(e->ip.dst.s_addr, 1);
if (!config) { if (config) {
if (cipinfo->config != NULL) {
/* Case A: This is an entry that gets reloaded, since
* it still has a cipinfo->config pointer. Simply
* increase the entry refcount and return */
if (cipinfo->config != config) {
printk(KERN_ERR "CLUSTERIP: Reloaded entry "
"has invalid config pointer!\n");
return 0;
}
clusterip_config_entry_get(cipinfo->config);
} else {
/* Case B: This is a new rule referring to an existing
* clusterip config. */
cipinfo->config = config;
clusterip_config_entry_get(cipinfo->config);
}
} else {
/* Case C: This is a completely new clusterip config */
if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) {
printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr)); printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr));
return 0; return 0;
...@@ -443,10 +457,9 @@ checkentry(const char *tablename, ...@@ -443,10 +457,9 @@ checkentry(const char *tablename,
} }
dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0); dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0);
} }
cipinfo->config = config;
} }
cipinfo->config = config;
return 1; return 1;
} }
...@@ -455,13 +468,10 @@ static void destroy(void *matchinfo, unsigned int matchinfosize) ...@@ -455,13 +468,10 @@ static void destroy(void *matchinfo, unsigned int matchinfosize)
{ {
struct ipt_clusterip_tgt_info *cipinfo = matchinfo; struct ipt_clusterip_tgt_info *cipinfo = matchinfo;
/* we first remove the proc entry and then drop the reference /* if no more entries are referencing the config, remove it
* count. In case anyone still accesses the file, the open/close * from the list and destroy the proc entry */
* functions are also incrementing the refcount on their own */ clusterip_config_entry_put(cipinfo->config);
#ifdef CONFIG_PROC_FS
remove_proc_entry(cipinfo->config->pde->name,
cipinfo->config->pde->parent);
#endif
clusterip_config_put(cipinfo->config); clusterip_config_put(cipinfo->config);
} }
...@@ -533,7 +543,7 @@ arp_mangle(unsigned int hook, ...@@ -533,7 +543,7 @@ arp_mangle(unsigned int hook,
/* if there is no clusterip configuration for the arp reply's /* if there is no clusterip configuration for the arp reply's
* source ip, we don't want to mangle it */ * source ip, we don't want to mangle it */
c = clusterip_config_find_get(payload->src_ip); c = clusterip_config_find_get(payload->src_ip, 0);
if (!c) if (!c)
return NF_ACCEPT; return NF_ACCEPT;
...@@ -574,56 +584,69 @@ static struct nf_hook_ops cip_arp_ops = { ...@@ -574,56 +584,69 @@ static struct nf_hook_ops cip_arp_ops = {
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
struct clusterip_seq_position {
unsigned int pos; /* position */
unsigned int weight; /* number of bits set == size */
unsigned int bit; /* current bit */
unsigned long val; /* current value */
};
static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) static void *clusterip_seq_start(struct seq_file *s, loff_t *pos)
{ {
struct proc_dir_entry *pde = s->private; struct proc_dir_entry *pde = s->private;
struct clusterip_config *c = pde->data; struct clusterip_config *c = pde->data;
unsigned int *nodeidx; unsigned int weight;
u_int32_t local_nodes;
read_lock_bh(&clusterip_lock); struct clusterip_seq_position *idx;
if (*pos >= c->num_local_nodes)
/* FIXME: possible race */
local_nodes = c->local_nodes;
weight = hweight32(local_nodes);
if (*pos >= weight)
return NULL; return NULL;
nodeidx = kmalloc(sizeof(unsigned int), GFP_KERNEL); idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL);
if (!nodeidx) if (!idx)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
*nodeidx = *pos; idx->pos = *pos;
return nodeidx; idx->weight = weight;
idx->bit = ffs(local_nodes);
idx->val = local_nodes;
clear_bit(idx->bit - 1, &idx->val);
return idx;
} }
static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos)
{ {
struct proc_dir_entry *pde = s->private; struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v;
struct clusterip_config *c = pde->data;
unsigned int *nodeidx = (unsigned int *)v;
*pos = ++(*nodeidx); *pos = ++idx->pos;
if (*pos >= c->num_local_nodes) { if (*pos >= idx->weight) {
kfree(v); kfree(v);
return NULL; return NULL;
} }
return nodeidx; idx->bit = ffs(idx->val);
clear_bit(idx->bit - 1, &idx->val);
return idx;
} }
static void clusterip_seq_stop(struct seq_file *s, void *v) static void clusterip_seq_stop(struct seq_file *s, void *v)
{ {
kfree(v); kfree(v);
read_unlock_bh(&clusterip_lock);
} }
static int clusterip_seq_show(struct seq_file *s, void *v) static int clusterip_seq_show(struct seq_file *s, void *v)
{ {
struct proc_dir_entry *pde = s->private; struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v;
struct clusterip_config *c = pde->data;
unsigned int *nodeidx = (unsigned int *)v;
if (*nodeidx != 0) if (idx->pos != 0)
seq_putc(s, ','); seq_putc(s, ',');
seq_printf(s, "%u", c->local_nodes[*nodeidx]);
if (*nodeidx == c->num_local_nodes-1) seq_printf(s, "%u", idx->bit);
if (idx->pos == idx->weight - 1)
seq_putc(s, '\n'); seq_putc(s, '\n');
return 0; return 0;
......
...@@ -1862,7 +1862,8 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag ...@@ -1862,7 +1862,8 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag
if (err < 0) if (err < 0)
goto out_freeiov; goto out_freeiov;
} }
err = __put_user(msg_sys.msg_flags, COMPAT_FLAGS(msg)); err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
COMPAT_FLAGS(msg));
if (err) if (err)
goto out_freeiov; goto out_freeiov;
if (MSG_CMSG_COMPAT & flags) if (MSG_CMSG_COMPAT & flags)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment