Linux 2.1.100pre1

pre-100 (on ftp.kernel.org now), moves the dcache shrinking into the regular memory de-allocation loop, and while the exact shrinking speed is probably completely off, it should be able to react much better to small-memory machines than the hardcoded shrink did.. Also, for those that appear to still have SMP interrupt stability problems, Ingo pointed out that we may have problems with PCI level-triggered interrupts. Could those people please test an additional small patch that involves moving the "ack_APIC_irq();" inside arch/i386/kernel/irq.c: do_ioapic_IRQ() from the top of the function to the very bottom of that function (that will move it to outside the irq controller lock, but it should actually be perfectly ok in this case). Linus

Linux 2.1.100pre1
pre-100 (on ftp.kernel.org now), moves the dcache shrinking into the regular memory de-allocation loop, and while the exact shrinking speed is probably completely off, it should be able to react much better to small-memory machines than the hardcoded shrink did.. Also, for those that appear to still have SMP interrupt stability problems, Ingo pointed out that we may have problems with PCI level-triggered interrupts. Could those people please test an additional small patch that involves moving the "ack_APIC_irq();" inside arch/i386/kernel/irq.c: do_ioapic_IRQ() from the top of the function to the very bottom of that function (that will move it to outside the irq controller lock, but it should actually be perfectly ok in this case). Linus
5892de9e · Linus Torvalds · 686bb5a7 · 5892de9e · 5892de9e · 5892de9e
Commit 5892de9e authored Nov 23, 2007 by Linus Torvalds
32 changed files
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -10,7 +10,6 @@ pages of code to determine what is expected when writing a filesystem.
 Hopefully this helps anyone attempting such a feat, as well as clearing up
 a few important points/dependencies.

-
 register_filesystem (struct file_system_type *fstype)
 =====================================================

@@ -133,10 +132,12 @@ struct inode_operations

 	int (*follow_link) (struct inode *,struct inode *,int,int,struct inode **);
 	[optional]
-		The follow_link function is only necessary if a filesystem uses a really
-		twisted form of symbolic links - namely if the symbolic link comes from a
-		foreign filesystem that makes no sense....
-		I threw this one out - too much redundant code!
+		follow_link must be implemented if readlink is implemented.
+		Note that follow_link can return a different inode than a
+		lookup_dentry() on the result of readlink() would return.
+		The proc filesystem, in particular, uses this feature heavily.
+		For most user filesystems, however, follow_link() and readlink()
+		should return consistent results.

 	int (*readpage) (struct inode *, struct page *);	[optional]
 	int (*writepage) (struct inode *, struct page *);	[mandatory with readpage]

--- a/Makefile
+++ b/Makefile
 VERSION = 2
 PATCHLEVEL = 1
-SUBLEVEL = 99
+SUBLEVEL = 100

 ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/)


--- a/drivers/net/sdla_fr.c
+++ b/drivers/net/sdla_fr.c
@@ -831,7 +831,7 @@ static int if_header(struct sk_buff *skb, struct device *dev,
 {
 	int hdr_len = 0;
 	skb->protocol = type;
-	hdr_len = wan_encapsulate(skb, dev);
+	hdr_len = wanrouter_encapsulate(skb, dev);
 	if (hdr_len < 0) 
 	{
 		hdr_len = 0;
@@ -1486,7 +1486,7 @@ static void fr502_rx_intr(sdla_t * card)
 		/* Decapsulate packet and pass it up the protocol stack */
 		skb->dev = dev;
 		buf = skb_pull(skb, 1);		/* remove hardware header */
-		if (!wan_type_trans(skb, dev)) 
+		if (!wanrouter_type_trans(skb, dev)) 
 		{
 			/* can't decapsulate packet */
 			dev_kfree_skb(skb);
@@ -1601,7 +1601,7 @@ static void fr508_rx_intr(sdla_t * card)
 					skb->dev = dev;
 					/* remove hardware header */
 					buf = skb_pull(skb, 1);
-					if (!wan_type_trans(skb, dev)) 
+					if (!wanrouter_type_trans(skb, dev)) 
 					{
 						/* can't decapsulate packet */
 						dev_kfree_skb(skb);
@@ -2746,7 +2746,7 @@ static int process_udp_mgmt_pkt(char udp_pkt_src, sdla_t * card, struct sk_buff
 			   stack */
 			new_skb->dev = dev;
 			buf = skb_pull(new_skb, 1);	/* remove hardware header */
-			if (!wan_type_trans(new_skb, dev)) 
+			if (!wanrouter_type_trans(new_skb, dev)) 
 			{
 				++chan->UDP_FPIPE_mgmt_not_passed_to_stack;
 				/* can't decapsulate packet */
@@ -2944,7 +2944,7 @@ static int process_udp_driver_call(char udp_pkt_src, sdla_t * card, struct sk_bu
 			new_skb->dev = dev;
 			/* remove hardware header */
 			buf = skb_pull(new_skb, 1);
-			if (!wan_type_trans(new_skb, dev)) 
+			if (!wanrouter_type_trans(new_skb, dev)) 
 			{
 				/* can't decapsulate packet */
 				++chan->UDP_DRVSTATS_mgmt_not_passed_to_stack;

--- a/drivers/net/sdla_x25.c
+++ b/drivers/net/sdla_x25.c
@@ -649,7 +649,7 @@ static int if_header (struct sk_buff* skb, struct device* dev,
 	skb->protocol = type;
 	if (!chan->protocol)
 	{
-		hdr_len = wan_encapsulate(skb, dev);
+		hdr_len = wanrouter_encapsulate(skb, dev);
 		if (hdr_len < 0)
 		{
 			hdr_len = 0;
@@ -999,7 +999,7 @@ static void rx_intr (sdla_t* card)
 	chan->rx_skb = NULL;		/* dequeue packet */

 	/* Decapsulate packet, if necessary */
-	if (!skb->protocol && !wan_type_trans(skb, dev))
+	if (!skb->protocol && !wanrouter_type_trans(skb, dev))
 	{
 		/* can't decapsulate packet */
 		dev_kfree_skb(skb);

--- a/drivers/net/sdlamain.c
+++ b/drivers/net/sdlamain.c
@@ -152,7 +152,7 @@ int init_module (void)
 		wandev->setup    = &setup;
 		wandev->shutdown = &shutdown;
 		wandev->ioctl    = &ioctl;
-		err = register_wandev(wandev);
+		err = register_wan_device(wandev);
 		if (err)
 		{
 			printk(KERN_ERR
@@ -179,7 +179,7 @@ void cleanup_module (void)
 	for (i = 0; i < ncards; ++i)
 	{
 		sdla_t* card = &card_array[i];
-		unregister_wandev(card->devname);
+		unregister_wan_device(card->devname);
 	}
 	kfree(card_array);
 }

--- a/drivers/net/smc-mca.c
+++ b/drivers/net/smc-mca.c
@@ -337,7 +337,7 @@ int init_module(void)
 		dev->name = namelist+(NAMELEN*this_dev);
 		dev->irq = irq[this_dev];
 		dev->base_addr = io[this_dev];
-		dev->init = ultra_probe;
+		dev->init = ultramca_probe;
 		if (io[this_dev] == 0)
 		{
 			if (this_dev != 0)

--- a/drivers/scsi/scsi_ioctl.c
+++ b/drivers/scsi/scsi_ioctl.c
@@ -17,8 +17,13 @@
 #include "hosts.h"
 #include <scsi/scsi_ioctl.h>

-#define MAX_RETRIES 5   
-#define MAX_TIMEOUT (9 * HZ)
+#define NORMAL_RETRIES 5   
+#define NORMAL_TIMEOUT (10 * HZ)
+#define FORMAT_UNIT_TIMEOUT (2 * 60 * 60 * HZ)
+#define START_STOP_TIMEOUT (60 * HZ)
+#define MOVE_MEDIUM_TIMEOUT (5 * 60 * HZ)
+#define READ_ELEMENT_STATUS_TIMEOUT (5 * 60 * HZ)
+
 #define MAX_BUF PAGE_SIZE

 #define max(a,b) (((a) > (b)) ? (a) : (b))
@@ -61,7 +66,7 @@ static int ioctl_probe(struct Scsi_Host * host, void *buffer)
 /*
 * 
 * The SCSI_IOCTL_SEND_COMMAND ioctl sends a command out to the SCSI host.
- * The MAX_TIMEOUT and MAX_RETRIES  variables are used.  
+ * The NORMAL_TIMEOUT and NORMAL_RETRIES  variables are used.  
 * 
 * dev is the SCSI device struct ptr, *(int *) arg is the length of the
 * input data, if any, not including the command string & counts, 
@@ -94,7 +99,8 @@ static void scsi_ioctl_done (Scsi_Cmnd * SCpnt)
    }
 }   

-static int ioctl_internal_command(Scsi_Device *dev, char * cmd)
+static int ioctl_internal_command(Scsi_Device *dev, char * cmd,
+				  int timeout, int retries)
 {
    unsigned long flags;
    int result;
@@ -107,9 +113,7 @@ static int ioctl_internal_command(Scsi_Device *dev, char * cmd)
 	struct semaphore sem = MUTEX_LOCKED;
 	SCpnt->request.sem = &sem;
 	spin_lock_irqsave(&io_request_lock, flags);
-	scsi_do_cmd(SCpnt,  cmd, NULL,  0,
-		    scsi_ioctl_done,  MAX_TIMEOUT,
-		    MAX_RETRIES);
+	scsi_do_cmd(SCpnt,  cmd, NULL,  0, scsi_ioctl_done,  timeout, retries);
 	spin_unlock_irqrestore(&io_request_lock, flags);
 	down(&sem);
        SCpnt->request.sem = NULL;
@@ -250,21 +254,24 @@ int scsi_ioctl_send_command(Scsi_Device *dev, Scsi_Ioctl_Command *sic)
    switch (opcode)
      {
      case FORMAT_UNIT:
-	timeout =  2 * 60 * 60 * HZ; /* 2 Hours */
+ 	timeout = FORMAT_UNIT_TIMEOUT;
 	retries = 1;
 	break;
      case START_STOP:
-	timeout =  2 * 60 * HZ;	/* 2 minutes */
-	retries = 1;
+ 	timeout = START_STOP_TIMEOUT;
+ 	retries = NORMAL_RETRIES;
 	break;
      case MOVE_MEDIUM:
+ 	timeout = MOVE_MEDIUM_TIMEOUT;
+ 	retries = NORMAL_RETRIES;
+ 	break;
      case READ_ELEMENT_STATUS:
-	timeout =  5 * 60 * HZ;	/* 5 minutes */
-	retries = 1;
+ 	timeout = READ_ELEMENT_STATUS_TIMEOUT;
+ 	retries = NORMAL_RETRIES;
 	break;
      default:
-	timeout = MAX_TIMEOUT;
-	retries = MAX_RETRIES;
+ 	timeout = NORMAL_TIMEOUT;
+ 	retries = NORMAL_RETRIES;
 	break;
      }

@@ -395,7 +402,8 @@ int scsi_ioctl (Scsi_Device *dev, int cmd, void *arg)
 	scsi_cmd[1] = dev->lun << 5;
 	scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0;
 	scsi_cmd[4] = SCSI_REMOVAL_PREVENT;
-	return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd);
+	return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd,
+				      NORMAL_TIMEOUT, NORMAL_RETRIES);
 	break;
    case SCSI_IOCTL_DOORUNLOCK:
 	if (!dev->removable || !dev->lockable) return 0;
@@ -403,13 +411,31 @@ int scsi_ioctl (Scsi_Device *dev, int cmd, void *arg)
 	scsi_cmd[1] = dev->lun << 5;
 	scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0;
 	scsi_cmd[4] = SCSI_REMOVAL_ALLOW;
-	return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd);
+	return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd,
+				      NORMAL_TIMEOUT, NORMAL_RETRIES);
    case SCSI_IOCTL_TEST_UNIT_READY:
 	scsi_cmd[0] = TEST_UNIT_READY;
 	scsi_cmd[1] = dev->lun << 5;
 	scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0;
 	scsi_cmd[4] = 0;
-	return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd);
+	return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd,
+				      NORMAL_TIMEOUT, NORMAL_RETRIES);
+	break;
+    case SCSI_IOCTL_START_UNIT:
+	scsi_cmd[0] = START_STOP;
+	scsi_cmd[1] = dev->lun << 5;
+	scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0;
+	scsi_cmd[4] = 1;
+	return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd,
+				      START_STOP_TIMEOUT, NORMAL_RETRIES);
+	break;
+    case SCSI_IOCTL_STOP_UNIT:
+	scsi_cmd[0] = START_STOP;
+	scsi_cmd[1] = dev->lun << 5;
+	scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0;
+	scsi_cmd[4] = 0;
+	return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd,
+				      START_STOP_TIMEOUT, NORMAL_RETRIES);
 	break;
    default :           
 	if (dev->host->hostt->ioctl)

--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1060,7 +1060,14 @@ static int check_scsidisk_media_change(kdev_t full_dev){
    }

    inode.i_rdev = full_dev;  /* This is all we really need here */
-    retval = sd_ioctl(&inode, NULL, SCSI_IOCTL_TEST_UNIT_READY, 0);
+
+    /* Using Start/Stop enables differentiation between drive with
+     * no cartridge loaded - NOT READY, drive with changed cartridge -
+     * UNIT ATTENTION, or with same cartridge - GOOD STATUS.
+     * This also handles drives that auto spin down. eg iomega jaz 1GB
+     * as this will spin up the drive.
+     */
+    retval = sd_ioctl(&inode, NULL, SCSI_IOCTL_START_UNIT, 0);
    
    if(retval){ /* Unable to test, unit probably not ready.  This usually
 		 * means there is no disc in the drive.  Mark as changed,

--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -430,12 +430,17 @@ void shrink_dcache_parent(struct dentry * parent)
 * more memory, but aren't really sure how much. So we
 * carefully try to free a _bit_ of our dcache, but not
 * too much.
+ *
+ * Priority:
+ *   0 - very urgent: schrink everything
+ *  ...
+ *   6 - base-level: try to shrink a bit.
 */
-void shrink_dcache_memory(void)
+void shrink_dcache_memory(int priority, unsigned int gfp_mask)
 {
 	int count = select_dcache(32, 8);
 	if (count)
-		prune_dcache(count);
+		prune_dcache((count << 6) >> priority);
 }

 #define NAME_ALLOC_LEN(len)	((len+16) & ~15)

--- a/fs/namei.c
+++ b/fs/namei.c
@@ -43,17 +43,24 @@
 *
 * The new code replaces the old recursive symlink resolution with
 * an iterative one (in case of non-nested symlink chains).  It does
- * this by looking up the symlink name from the particular filesystem,
- * and then follows this name as if it were a user-supplied one.  This
- * is done solely in the VFS level, such that <fs>_follow_link() is not
- * used any more and could be removed in future.  As a side effect,
- * dir_namei(), _namei() and follow_link() are now replaced with a single
- * function lookup_dentry() that can handle all the special cases of the former
- * code.
+ * this with calls to <fs>_follow_link().
+ * As a side effect, dir_namei(), _namei() and follow_link() are now 
+ * replaced with a single function lookup_dentry() that can handle all 
+ * the special cases of the former code.
 *
 * With the new dcache, the pathname is stored at each inode, at least as
 * long as the refcount of the inode is positive.  As a side effect, the
 * size of the dcache depends on the inode cache and thus is dynamic.
+ *
+ * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
+ * resolution to correspond with current state of the code.
+ *
+ * Note that the symlink resolution is not *completely* iterative.
+ * There is still a significant amount of tail- and mid- recursion in
+ * the algorithm.  Also, note that <fs>_readlink() is not used in
+ * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
+ * may return different results than <fs>_follow_link().  Many virtual
+ * filesystems (including /proc) exhibit this behavior.
 */

 /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:

--- a/include/linux/icmpv6.h
+++ b/include/linux/icmpv6.h
@@ -138,9 +138,12 @@ extern void				icmpv6_send(struct sk_buff *skb,
 						    __u32 info, 
 						    struct device *dev);

-extern void				icmpv6_init(struct net_proto_family *ops);
+extern int				icmpv6_init(struct net_proto_family *ops);
 extern int				icmpv6_err_convert(int type, int code,
 							   int *err);
+extern void				icmpv6_cleanup(void);
+extern void				icmpv6_param_prob(struct sk_buff *skb,
+							  int code, void *pos);
 #endif

 #endif
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -42,6 +42,24 @@ struct ipv6_rt_hdr {
 	 */
 };

+
+struct ipv6_opt_hdr {
+	__u8 		nexthdr;
+	__u8 		hdrlen;
+	/* 
+	 * TLV encoded option data follows.
+	 */
+};
+
+#define ipv6_destopt_hdr ipv6_opt_hdr
+#define ipv6_hopopt_hdr  ipv6_opt_hdr
+
+#ifdef __KERNEL__
+#define ipv6_optlen(p)  (((p)->hdrlen+1) << 3)
+#endif
+
+
+
 /*
 *	routing header type 0 (used in cmsghdr struct)
 */
@@ -95,13 +113,13 @@ struct ipv6_options

 	/* 
 	 * protocol options 
-	 * usualy carried in IPv6 extension headers
+	 * usually carried in IPv6 extension headers
 	 */

 	struct ipv6_rt_hdr		*srcrt;	/* Routing Header */
-
 };

+
 #endif

 #endif
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -4,7 +4,7 @@
 *	Authors:
 *	Pedro Roque		<roque@di.fc.ul.pt>
 *
- *	$Id: ipv6.h,v 1.9 1998/03/08 05:55:20 davem Exp $
+ *	$Id: ipv6.h,v 1.10 1998/04/30 16:24:14 freitag Exp $
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
@@ -108,6 +108,8 @@ struct frag_queue {
 	struct frag_queue	*prev;

 	__u32			id;		/* fragment id		*/
+	struct in6_addr		saddr;
+	struct in6_addr		daddr;
 	struct timer_list	timer;		/* expire timer		*/
 	struct ipv6_frag	*fragments;
 	struct device		*dev;
@@ -248,6 +250,10 @@ extern int			ipv6opt_srt_tosin(struct ipv6_options *opt,

 extern void			ipv6opt_free(struct ipv6_options *opt);

+extern struct ipv6_opt_hdr *	ipv6_skip_exthdr(struct ipv6_opt_hdr *hdr, 
+					         u8 *nexthdrp, int len);
+
+

 /*
 *	socket options (ipv6_sockglue.c)

--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -101,6 +101,8 @@ extern int			igmp6_event_report(struct sk_buff *skb,
 						   struct icmp6hdr *hdr,
 						   int len);

+extern void			igmp6_cleanup(void);
+
 extern __inline__ struct neighbour * ndisc_get_neigh(struct device *dev, struct in6_addr *addr)
 {


--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -642,7 +642,7 @@ static __inline__ unsigned int tcp_current_mss(struct sock *sk)

 		/* PMTU discovery event has occurred. */
 		sk->mtu = dst->pmtu;
-		sk->mss = sk->mtu - mss_distance;
+		mss_now = sk->mss = sk->mtu - mss_distance;
 	}

 	if(tp->sack_ok && tp->num_sacks)

--- a/include/scsi/scsi_ioctl.h
+++ b/include/scsi/scsi_ioctl.h
@@ -5,6 +5,8 @@
 #define SCSI_IOCTL_TEST_UNIT_READY 2
 #define SCSI_IOCTL_BENCHMARK_COMMAND 3
 #define SCSI_IOCTL_SYNC 4			/* Request synchronous parameters */
+#define SCSI_IOCTL_START_UNIT 5
+#define SCSI_IOCTL_STOP_UNIT 6
 /* The door lock/unlock constants are compatible with Sun constants for
   the cdrom */
 #define SCSI_IOCTL_DOORLOCK 0x5380		/* lock the eject mechanism */

--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -455,17 +455,20 @@ static inline int do_try_to_free_page(int gfp_mask)
 	switch (state) {
 		do {
 		case 0:
-			state = 1;
 			if (shrink_mmap(i, gfp_mask))
 				return 1;
+			state = 1;
 		case 1:
-			state = 2;
 			if ((gfp_mask & __GFP_IO) && shm_swap(i, gfp_mask))
 				return 1;
-		default:
-			state = 0;
+			state = 2;
+		case 2:
 			if (swap_out(i, gfp_mask))
 				return 1;
+			state = 3;
+		case 3:
+			shrink_dcache_memory(i, gfp_mask);
+			state = 0;
 		i--;
 		} while ((i - stop) >= 0);
 	}
@@ -545,9 +548,6 @@ int kswapd(void *unused)
 		schedule();
 		swapstats.wakeups++;

-		/* This will gently shrink the dcache.. */
-		shrink_dcache_memory();
-	
 		/*
 		 * Do the background pageout: be
 		 * more aggressive if we're really

--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1764,6 +1764,9 @@ extern int baycom_init(void);
 extern int lapbeth_init(void);
 extern void arcnet_init(void);
 extern void ip_auto_config(void);
+#ifdef CONFIG_8xx
+extern int cpm_enet_init(void);
+#endif /* CONFIG_8xx */

 #ifdef CONFIG_PROC_FS
 static struct proc_dir_entry proc_net_dev = {
@@ -1845,6 +1848,9 @@ __initfunc(int net_dev_init(void))
 #endif
 #if defined(CONFIG_ARCNET)
 	arcnet_init();
+#endif
+#if defined(CONFIG_8xx)
+        cpm_enet_init();
 #endif
 	/*
 	 *	SLHC if present needs attaching so other people see it

--- a/net/ethernet/pe2.c
+++ b/net/ethernet/pe2.c
@@ -11,6 +11,7 @@ pEII_datalink_header(struct datalink_proto *dl,
 	struct device	*dev = skb->dev;

 	skb->protocol = htons (ETH_P_IPX);
+	if(dev->hard_header)
 		dev->hard_header(skb, dev, ETH_P_IPX, dest_node, NULL, skb->len);
 }


--- a/net/ipv4/ip_fw.c
+++ b/net/ipv4/ip_fw.c
@@ -6,7 +6,7 @@
 *	license in recognition of the original copyright. 
 *				-- Alan Cox.
 *
- *	$Id: ip_fw.c,v 1.34 1998/03/20 09:12:06 davem Exp $
+ *	$Id: ip_fw.c,v 1.35 1998/04/30 16:29:51 freitag Exp $
 *
 *	Ported from BSD to Linux,
 *		Alan Cox 22/Nov/1994.
@@ -62,6 +62,7 @@
 *	Wouter Gadeyne		:	Fixed masquerading support of ftp PORT commands
 *
 *	Juan Jose Ciarlante	:	Masquerading code moved to ip_masq.c
+ *	Andi Kleen :		Print frag_offsets and the ip flags properly.
 *
 *	All the real work was done by .....
 *
@@ -202,6 +203,90 @@ extern inline int port_match(unsigned short *portptr,int nports,unsigned short p

 #if defined(CONFIG_IP_ACCT) || defined(CONFIG_IP_FIREWALL)

+#ifdef CONFIG_IP_FIREWALL_VERBOSE
+
+/* 
+ *	VERY ugly piece of code which actually makes kernel printf for
+ * 	matching packets. 
+ */
+
+static char *chain_name(struct ip_fw *chain, int mode)
+{
+	switch (mode) { 
+	case IP_FW_MODE_ACCT_IN: return "acct in";
+	case IP_FW_MODE_ACCT_OUT: return "acct out";
+	default:
+		if (chain == ip_fw_fwd_chain) 
+			return "fw-fwd";
+		else if (chain == ip_fw_in_chain)
+			return "fw-in";
+		else
+			return "fw-out"; 
+	}
+}
+
+static char *rule_name(struct ip_fw *f, int mode, char *buf)
+{
+	if (mode == IP_FW_MODE_ACCT_IN || mode == IP_FW_MODE_ACCT_OUT)
+		return "";
+
+	if(f->fw_flg&IP_FW_F_ACCEPT) {
+		if(f->fw_flg&IP_FW_F_REDIR) {
+			sprintf(buf, "acc/r%d ", f->fw_pts[f->fw_nsp+f->fw_ndp]);
+			return buf;
+		} else if(f->fw_flg&IP_FW_F_MASQ)
+			return "acc/masq ";
+		else
+			return "acc ";
+	} else if(f->fw_flg&IP_FW_F_ICMPRPL) {
+		return "rej ";
+	} else {
+		return "deny ";
+	}
+}
+
+static void print_packet(struct iphdr *ip, 
+			 u16 src_port, u16 dst_port, u16 icmp_type,
+			 char *chain, char *rule, char *devname)
+{
+	__u32 *opt = (__u32 *) (ip + 1);
+	int opti;
+	__u16 foff = ntohs(ip->frag_off); 
+
+	printk(KERN_INFO "IP %s %s%s", chain, rule, devname); 
+
+	switch(ip->protocol)
+	{
+	case IPPROTO_TCP:
+		printk(" TCP ");
+		break;
+	case IPPROTO_UDP:
+		printk(" UDP ");
+		break;
+	case IPPROTO_ICMP:
+		printk(" ICMP/%d ", icmp_type);
+		break;
+	default:
+		printk(" PROTO=%d ", ip->protocol);
+		break;
+	}
+	print_ip(ip->saddr);
+	if(ip->protocol == IPPROTO_TCP || ip->protocol == IPPROTO_UDP)
+		printk(":%hu", src_port);
+	printk(" ");
+	print_ip(ip->daddr);
+	if(ip->protocol == IPPROTO_TCP || ip->protocol == IPPROTO_UDP)
+		printk(":%hu", dst_port);
+	printk(" L=%hu S=0x%2.2hX I=%hu FO=0x%4.4hX T=%hu",
+	       ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
+	       foff & IP_OFFSET, ip->ttl); 
+	if (foff & IP_DF) printk(" DF=1");
+	if (foff & IP_MF) printk(" MF=1"); 
+	for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
+		printk(" O=0x%8.8X", *opt++);
+	printk("\n");	
+}
+#endif

 /*
 *	Returns one of the generic firewall policies, like FW_ACCEPT.
@@ -483,68 +568,14 @@ int ip_fw_chk(struct iphdr *ip, struct device *rif, __u16 *redirport, struct ip_
 		}

 #ifdef CONFIG_IP_FIREWALL_VERBOSE
-		/*
-		 * VERY ugly piece of code which actually
-		 * makes kernel printf for matching packets...
-		 */
-
 		if (f->fw_flg & IP_FW_F_PRN)
 		{
-			__u32 *opt = (__u32 *) (ip + 1);
-			int opti;
+			char buf[16]; 

-			if(mode == IP_FW_MODE_ACCT_IN)
-				printk(KERN_INFO "IP acct in ");
-			else if(mode == IP_FW_MODE_ACCT_OUT)
-				printk(KERN_INFO "IP acct out ");
-			else {
-				if(chain == ip_fw_fwd_chain)
-					printk(KERN_INFO "IP fw-fwd ");
-				else if(chain == ip_fw_in_chain)
-					printk(KERN_INFO "IP fw-in ");
-				else
-					printk(KERN_INFO "IP fw-out ");
-				if(f->fw_flg&IP_FW_F_ACCEPT) {
-					if(f->fw_flg&IP_FW_F_REDIR)
-						printk("acc/r%d ", f->fw_pts[f->fw_nsp+f->fw_ndp]);
-					else if(f->fw_flg&IP_FW_F_MASQ)
-						printk("acc/masq ");
-					else
-						printk("acc ");
-				} else if(f->fw_flg&IP_FW_F_ICMPRPL)
-					printk("rej ");
-				else
-					printk("deny ");
-			}
-			printk(rif ? rif->name : "-");
-			switch(ip->protocol)
-			{
-				case IPPROTO_TCP:
-					printk(" TCP ");
-					break;
-				case IPPROTO_UDP:
-					printk(" UDP ");
-					break;
-				case IPPROTO_ICMP:
-					printk(" ICMP/%d ", icmp_type);
-					break;
-				default:
-					printk(" PROTO=%d ", ip->protocol);
-					break;
-			}
-			print_ip(ip->saddr);
-			if(ip->protocol == IPPROTO_TCP || ip->protocol == IPPROTO_UDP)
-				printk(":%hu", src_port);
-			printk(" ");
-			print_ip(ip->daddr);
-			if(ip->protocol == IPPROTO_TCP || ip->protocol == IPPROTO_UDP)
-				printk(":%hu", dst_port);
-			printk(" L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
-				ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
-				ip->frag_off, ip->ttl);
-			for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
-				printk(" O=0x%8.8X", *opt++);
-			printk("\n");
+			print_packet(ip, src_port, dst_port, icmp_type,
+				     chain_name(chain, mode), 
+				     rule_name(f, mode, buf), 
+				     rif ? rif->name : "-");
 		}
 #endif		
 		if (mode != IP_FW_MODE_CHK) {

--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5,7 +5,7 @@
 *
 *		Implementation of the Transmission Control Protocol(TCP).
 *
- * Version:	$Id: tcp_input.c,v 1.114 1998/04/28 06:42:22 davem Exp $
+ * Version:	$Id: tcp_input.c,v 1.116 1998/05/02 14:50:11 davem Exp $
 *
 * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -421,33 +421,6 @@ static __inline__ int tcp_fast_parse_options(struct sock *sk, struct tcphdr *th,
 	return 1;
 }

-#if 0 /* Not working yet... -DaveM */
-static void tcp_compute_tsack(struct sock *sk, struct tcp_opt *tp)
-{
-	struct sk_buff *skb = skb_peek(&sk->write_queue);
-	__u32 tstamp = tp->rcv_tsecr;
-	int fack_count = 0;
-
-	while((skb != NULL) &&
-	      (skb != tp->send_head) &&
-	      (skb != (struct sk_buff *)&sk->write_queue)) {
-		if(TCP_SKB_CB(skb)->when == tstamp) {
-			__u8 sacked = TCP_SKB_CB(skb)->sacked;
-
-			sacked |= TCPCB_SACKED_ACKED;
-			if(sacked & TCPCB_SACKED_RETRANS)
-				tp->retrans_out--;
-			TCP_SKB_CB(skb)->sacked = sacked;
-		}
-		if(!before(TCP_SKB_CB(skb)->when, tstamp))
-			fack_count++;
-		skb = skb->next;
-	}
-	if(fack_count > tp->fackets_out)
-		tp->fackets_out = fack_count;
-}
-#endif
-
 #define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
 #define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/
 #define FLAG_DATA_ACKED		0x04 /* This ACK acknowledged new data.		*/
@@ -481,13 +454,6 @@ static void tcp_fast_retrans(struct sock *sk, u32 ack, int not_dup)
 	if (ack == tp->snd_una && tp->packets_out && (not_dup == 0)) {
 		/* This is the standard reno style fast retransmit branch. */

-#if 0	/* Not working yet... -DaveM */
-		/* If not doing SACK, but doing timestamps, compute timestamp
-		 * based pseudo-SACKs when we see duplicate ACKs.
-		 */
-		if(!tp->sack_ok && tp->saw_tstamp)
-			tcp_compute_tsack(sk, tp);
-#endif
                /* 1. When the third duplicate ack is received, set ssthresh 
                 * to one half the current congestion window, but no less 
                 * than two segments. Retransmit the missing segment.
@@ -611,6 +577,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __u32 ack,

 	while((skb=skb_peek(&sk->write_queue)) && (skb != tp->send_head)) {
 		struct tcp_skb_cb *scb = TCP_SKB_CB(skb); 
+		__u8 sacked = scb->sacked;
 		
 		/* If our packet is before the ack sequence we can
 		 * discard it as it's confirmed to have arrived at
@@ -626,22 +593,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, __u32 ack,
 		 * connection startup slow start one packet too
 		 * quickly.  This is severely frowned upon behavior.
 		 */
+		if(sacked & TCPCB_SACKED_RETRANS && tp->retrans_out)
+			tp->retrans_out--;
 		if(!(scb->flags & TCPCB_FLAG_SYN)) {
-			__u8 sacked = scb->sacked;
-
 			acked |= FLAG_DATA_ACKED;
-			if(sacked & TCPCB_SACKED_RETRANS) {
+			if(sacked & TCPCB_SACKED_RETRANS)
 				acked |= FLAG_RETRANS_DATA_ACKED;
-
-				/* XXX The race is, fast retrans frame -->
-				 * XXX retrans timeout sends older frame -->
-				 * XXX ACK arrives for fast retrans frame -->
-				 * XXX retrans_out goes negative --> splat.
-				 * XXX Please help me find a better way -DaveM
-				 */
-				if(tp->retrans_out)
-					tp->retrans_out--;
-			}
 			if(tp->fackets_out)
 				tp->fackets_out--;
 		} else {

--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -5,7 +5,7 @@
 *
 *		Implementation of the Transmission Control Protocol(TCP).
 *
- * Version:	$Id: tcp_ipv4.c,v 1.142 1998/04/30 12:00:45 davem Exp $
+ * Version:	$Id: tcp_ipv4.c,v 1.145 1998/05/02 12:47:13 davem Exp $
 *
 *		IPv4 specific functions
 *
@@ -48,7 +48,6 @@

 #include <linux/config.h>
 #include <linux/types.h>
-#include <linux/stddef.h>
 #include <linux/fcntl.h>
 #include <linux/random.h>
 #include <linux/init.h>
@@ -61,12 +60,15 @@
 #include <asm/segment.h>

 #include <linux/inet.h>
+#include <linux/stddef.h>

 extern int sysctl_tcp_timestamps;
 extern int sysctl_tcp_window_scaling;
 extern int sysctl_tcp_sack;
 extern int sysctl_tcp_syncookies;
 extern int sysctl_ip_dynaddr;
+extern __u32 sysctl_wmem_max;
+extern __u32 sysctl_rmem_max;

 /* Check TCP sequence numbers in ICMP packets. */
 #define ICMP_MIN_LENGTH 8
@@ -166,17 +168,21 @@ struct tcp_bind_bucket *tcp_bucket_create(unsigned short snum)
 	return tb;
 }

+#ifdef CONFIG_IP_TRANSPARENT_PROXY
 /* Ensure that the bound bucket for the port exists.
 * Return 0 on success.
 */
 static __inline__ int tcp_bucket_check(unsigned short snum)
 {
-	if (tcp_bound_hash[tcp_bhashfn(snum)] == NULL &&
-			tcp_bucket_create(snum) == NULL)
+	struct tcp_bind_bucket *tb = tcp_bound_hash[tcp_bhashfn(snum)];
+	for( ; (tb && (tb->port != snum)); tb = tb->next)
+		;
+	if(tb == NULL && tcp_bucket_create(snum) == NULL)
 		return 1;
 	else
 		return 0;
 }
+#endif

 static int tcp_v4_verify_bind(struct sock *sk, unsigned short snum)
 {
@@ -215,10 +221,21 @@ static int tcp_v4_verify_bind(struct sock *sk, unsigned short snum)
 				result = 1;
 		}
 	}
-	if((result == 0) &&
-	   (tb == NULL) &&
-	   (tcp_bucket_create(snum) == NULL))
+	if(result == 0) {
+		if(tb == NULL) {
+			if(tcp_bucket_create(snum) == NULL)
 				result = 1;
+		} else {
+			/* It could be pending garbage collection, this
+			 * kills the race and prevents it from disappearing
+			 * out from under us by the time we use it.  -DaveM
+			 */
+			if(tb->owners == NULL && !(tb->flags & TCPB_FLAG_LOCKED)) {
+				tb->flags = TCPB_FLAG_LOCKED;
+				tcp_dec_slow_timer(TCP_SLT_BUCKETGC);
+			}
+		}
+	}
 go_like_smoke:
 	SOCKHASH_UNLOCK();
 	return result;
@@ -1308,6 +1325,11 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	if (!newsk) 
 		goto exit;

+	if (newsk->rcvbuf < (3 * newsk->mtu))
+		newsk->rcvbuf = min ((3 * newsk->mtu), sysctl_rmem_max);
+	if (newsk->sndbuf < (3 * newsk->mtu))
+		newsk->sndbuf = min ((3 * newsk->mtu), sysctl_wmem_max);
+ 
 	sk->tp_pinfo.af_tcp.syn_backlog--;
 	sk->ack_backlog++;


--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -5,7 +5,7 @@
 *
 *		Implementation of the Transmission Control Protocol(TCP).
 *
- * Version:	$Id: tcp_timer.c,v 1.50 1998/04/14 09:08:59 davem Exp $
+ * Version:	$Id: tcp_timer.c,v 1.51 1998/05/02 15:19:26 davem Exp $
 *
 * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -448,26 +448,24 @@ void tcp_retransmit_timer(unsigned long data)
 	 */
 	if(tp->sack_ok) {
 		struct sk_buff *skb = skb_peek(&sk->write_queue);
-		__u8 toclear = TCPCB_SACKED_ACKED;

-		if(tp->retransmits == 0)
-			toclear |= TCPCB_SACKED_RETRANS;
 		while((skb != NULL) &&
 		      (skb != tp->send_head) &&
 		      (skb != (struct sk_buff *)&sk->write_queue)) {
-			TCP_SKB_CB(skb)->sacked &= ~(toclear);
+			TCP_SKB_CB(skb)->sacked &=
+				~(TCPCB_SACKED_ACKED | TCPCB_SACKED_RETRANS);
 			skb = skb->next;
 		}
-		tp->fackets_out = 0;
 	}

 	/* Retransmission. */
 	tp->retrans_head = NULL;
+	tp->fackets_out = 0;
+	tp->retrans_out = 0;
 	if (tp->retransmits == 0) {
 		/* remember window where we lost
 		 * "one half of the current window but at least 2 segments"
 		 */
-		tp->retrans_out = 0;
 		tp->snd_ssthresh = max(tp->snd_cwnd >> (1 + TCP_CWND_SHIFT), 2);
 		tp->snd_cwnd = (1 << TCP_CWND_SHIFT);
 	}

--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -4,8 +4,9 @@
 *
 *	Authors:
 *	Pedro Roque		<roque@di.fc.ul.pt>
+ *	Andi Kleen		<ak@muc.de>
 *
- *	$Id: exthdrs.c,v 1.5 1998/02/12 07:43:39 davem Exp $
+ *	$Id: exthdrs.c,v 1.6 1998/04/30 16:24:20 freitag Exp $
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
@@ -34,6 +35,10 @@
 #include <net/ip6_route.h>
 #include <net/addrconf.h>

+#include <asm/uaccess.h>
+
+#define swap(a,b) do { typeof (a) tmp; tmp = (a); (a) = (b); (b) = (tmp); } while(0)
+
 /*
 *	inbound
 */
@@ -135,7 +140,7 @@ int ipv6_routing_header(struct sk_buff **skb_ptr, struct device *dev,
 */

 int ipv6opt_bld_rthdr(struct sk_buff *skb, struct ipv6_options *opt,
-		      struct in6_addr *addr, int proto)		      
+		      struct in6_addr *addr)		      
 {
 	struct rt0_hdr *phdr, *ihdr;
 	int hops;
@@ -154,7 +159,75 @@ int ipv6opt_bld_rthdr(struct sk_buff *skb, struct ipv6_options *opt,
 	ipv6_addr_copy(phdr->addr + (hops - 1), addr);
 	
 	phdr->rt_hdr.nexthdr = proto; 
-
 	return NEXTHDR_ROUTING;
 }
 #endif
+
+/* 
+ * find out if nexthdr is an extension header or a protocol
+ */
+
+static __inline__ int ipv6_ext_hdr(u8 nexthdr)
+{
+	/* 
+	 * find out if nexthdr is an extension header or a protocol
+	 */
+	return ( (nexthdr == NEXTHDR_HOP)	||
+		 (nexthdr == NEXTHDR_ROUTING)	||
+		 (nexthdr == NEXTHDR_FRAGMENT)	||
+		 (nexthdr == NEXTHDR_ESP)	||
+		 (nexthdr == NEXTHDR_AUTH)	||
+		 (nexthdr == NEXTHDR_NONE)	||
+		 (nexthdr == NEXTHDR_DEST) );
+		 
+}
+
+/*
+ * Skip any extension headers. This is used by the ICMP module.
+ *
+ * Note that strictly speaking this conflicts with RFC1883 4.0:
+ * ...The contents and semantics of each extension header determine whether 
+ * or not to proceed to the next header.  Therefore, extension headers must
+ * be processed strictly in the order they appear in the packet; a
+ * receiver must not, for example, scan through a packet looking for a
+ * particular kind of extension header and process that header prior to
+ * processing all preceding ones.
+ * 
+ * We do exactly this. This is a protocol bug. We can't decide after a
+ * seeing an unknown discard-with-error flavour TLV option if it's a 
+ * ICMP error message or not (errors should never be send in reply to
+ * ICMP error messages).
+ * 
+ * But I see no other way to do this. This might need to be reexamined
+ * when Linux implements ESP (and maybe AUTH) headers.
+ */
+struct ipv6_opt_hdr *ipv6_skip_exthdr(struct ipv6_opt_hdr *hdr, 
+				      u8 *nexthdrp, int len)
+{
+	u8 nexthdr = *nexthdrp;
+
+	while (ipv6_ext_hdr(nexthdr)) {
+		int hdrlen; 
+		
+		if (nexthdr == NEXTHDR_NONE)
+			return NULL;
+		if (len < sizeof(struct ipv6_opt_hdr)) /* be anal today */
+			return NULL;
+
+		hdrlen = ipv6_optlen(hdr); 
+		if (len < hdrlen)
+			return NULL; 
+
+		nexthdr = hdr->nexthdr;
+		hdr = (struct ipv6_opt_hdr *) ((u8*)hdr + hdrlen);
+		len -= hdrlen;
+	}
+
+	/* Hack.. Do the same for AUTH headers? */
+	if (nexthdr == NEXTHDR_ESP) 
+		return NULL; 
+
+	*nexthdrp = nexthdr;
+	return hdr;
+}
+
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -5,7 +5,7 @@
 *	Authors:
 *	Pedro Roque		<roque@di.fc.ul.pt>
 *
- *	$Id: icmp.c,v 1.15 1998/03/21 07:28:03 davem Exp $
+ *	$Id: icmp.c,v 1.17 1998/05/01 10:31:41 davem Exp $
 *
 *	Based on net/ipv4/icmp.c
 *
@@ -21,6 +21,8 @@
 *	Changes:
 *
 *	Andi Kleen		:	exception handling
+ *	Andi Kleen			add rate limits. never reply to a icmp.
+ *					add more length checks and other fixes.
 */

 #define __NO_VERSION__
@@ -51,6 +53,7 @@
 #include <net/transp_v6.h>
 #include <net/ip6_route.h>
 #include <net/addrconf.h>
+#include <net/icmp.h>

 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -129,6 +132,62 @@ static int icmpv6_getfrag(const void *data, struct in6_addr *saddr,
 	return 0; 
 }

+
+/* 
+ * Slightly more convenient version of icmpv6_send.
+ */
+void icmpv6_param_prob(struct sk_buff *skb, int code, void *pos)
+{
+	int offset = (u8*)pos - (u8*)skb->nh.ipv6h; 
+	
+	icmpv6_send(skb, ICMPV6_PARAMPROB, code, offset, skb->dev);
+	kfree_skb(skb);
+}
+
+static inline int is_icmp(struct ipv6hdr *hdr, int len)
+{
+	__u8 nexthdr = hdr->nexthdr; 
+
+	if (!ipv6_skip_exthdr((struct ipv6_opt_hdr *)(hdr+1), &nexthdr, len))
+		return 0; 
+	return nexthdr == IPPROTO_ICMP; 
+}
+
+int sysctl_icmpv6_time = 1*HZ; 
+
+/* 
+ * Check the ICMP output rate limit 
+ */
+static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
+				     struct flowi *fl)
+{
+#if 0
+	struct dst_entry *dst; 
+	int allow = 0;
+#endif
+	/* Informational messages are not limited. */
+	if (type & 0x80)
+		return 1; 
+
+#if 0 /* not yet, first fix routing COW */
+
+	/* 
+	 * Look up the output route.
+	 * XXX: perhaps the expire for routing entries cloned by
+	 * this lookup should be more aggressive (not longer than timeout).
+	 */
+	dst = ip6_route_output(sk, fl, 1);
+	if (dst->error) 
+		ipv6_statistics.Ip6OutNoRoutes++;
+	else 
+		allow = xrlim_allow(dst, sysctl_icmpv6_time);
+	dst_release(dst);
+	return allow;
+#else
+	return 1;
+#endif
+}
+
 /*
 *	an inline helper for the "simple" if statement below
 *	checks if parameter problem report is caused by an
@@ -214,6 +273,24 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
 		return;
 	}

+	/* 
+	 *	Never answer to a ICMP packet.
+	 */
+	if (is_icmp(hdr, (u8*)skb->tail - (u8*)hdr)) {
+		printk(KERN_DEBUG "icmpv6_send: no reply to icmp\n"); 
+		return;
+	}
+
+	fl.proto = IPPROTO_ICMPV6;
+	fl.nl_u.ip6_u.daddr = &hdr->saddr;
+	fl.nl_u.ip6_u.saddr = saddr;
+	fl.oif = iif;
+	fl.uli_u.icmpt.type = type;
+	fl.uli_u.icmpt.code = code;
+
+	if (!icmpv6_xrlim_allow(sk, type, &fl)) 
+		return; 
+
 	/*
 	 *	ok. kick it. checksum will be provided by the 
 	 *	getfrag_t callback.
@@ -248,13 +325,6 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,

 	msg.len = len;

-	fl.proto = IPPROTO_ICMPV6;
-	fl.nl_u.ip6_u.daddr = &hdr->saddr;
-	fl.nl_u.ip6_u.saddr = saddr;
-	fl.oif = iif;
-	fl.uli_u.icmpt.type = type;
-	fl.uli_u.icmpt.code = code;
-
 	ip6_build_xmit(sk, icmpv6_getfrag, &msg, &fl, len, NULL, -1,
 		       MSG_DONTWAIT);

@@ -312,21 +382,6 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 	dst_release(xchg(&sk->dst_cache, NULL));
 }

-static __inline__ int ipv6_ext_hdr(u8 nexthdr)
-{
-	/* 
-	 * find out if nexthdr is an extension header or a protocol
-	 */
-	return ( (nexthdr == NEXTHDR_HOP)	||
-		 (nexthdr == NEXTHDR_ROUTING)	||
-		 (nexthdr == NEXTHDR_FRAGMENT)	||
-		 (nexthdr == NEXTHDR_ESP)	||
-		 (nexthdr == NEXTHDR_AUTH)	||
-		 (nexthdr == NEXTHDR_NONE)	||
-		 (nexthdr == NEXTHDR_DEST) );
-		 
-}
-
 static void icmpv6_notify(struct sk_buff *skb,
 			  int type, int code, unsigned char *buff, int len,
 			  struct in6_addr *saddr, struct in6_addr *daddr, 
@@ -335,40 +390,23 @@ static void icmpv6_notify(struct sk_buff *skb,
 	struct ipv6hdr *hdr = (struct ipv6hdr *) buff;
 	struct inet6_protocol *ipprot;
 	struct sock *sk;
-	char * pbuff;
+	struct ipv6_opt_hdr *pb;
 	__u32 info = 0;
 	int hash;
 	u8 nexthdr;

-	/* now skip over extension headers */
-
 	nexthdr = hdr->nexthdr;

-	pbuff = (char *) (hdr + 1);
+	pb = (struct ipv6_opt_hdr *) (hdr + 1);
 	len -= sizeof(struct ipv6hdr);
-
-	while (ipv6_ext_hdr(nexthdr)) {
-		int hdrlen;
-
-		if (nexthdr == NEXTHDR_NONE)
+	if (len < 0)
 		return;

-		nexthdr = *pbuff;
-
-		/* Header length is size in 8-octet units, not
-		 * including the first 8 octets.
-		 */
-		hdrlen = *(pbuff+1);
-		hdrlen = (hdrlen + 1) << 3;
-
-		if (hdrlen > len)
+	/* now skip over extension headers */
+	pb = ipv6_skip_exthdr(pb, &nexthdr, len);
+	if (!pb)
 		return;

-		/* Now this is right. */
-		pbuff += hdrlen;
-		len -= hdrlen;
-	}
-
 	hash = nexthdr & (MAX_INET_PROTOS - 1);

 	for (ipprot = (struct inet6_protocol *) inet6_protos[hash]; 
@@ -378,7 +416,7 @@ static void icmpv6_notify(struct sk_buff *skb,
 			continue;

 		if (ipprot->err_handler) 
-			ipprot->err_handler(skb, type, code, pbuff, info,
+			ipprot->err_handler(skb, type, code, (u8*)pb, info,
 					    saddr, daddr, ipprot);
 		return;
 	}
@@ -391,7 +429,7 @@ static void icmpv6_notify(struct sk_buff *skb,
 		return;

 	while((sk = raw_v6_lookup(sk, nexthdr, daddr, saddr))) {
-		rawv6_err(sk, type, code, pbuff, saddr, daddr);
+		rawv6_err(sk, type, code, (char*)pb, saddr, daddr);
 		sk = sk->next;
 	}
 }
@@ -514,7 +552,7 @@ int icmpv6_rcv(struct sk_buff *skb, struct device *dev,
 	return 0;
 }

-__initfunc(void icmpv6_init(struct net_proto_family *ops))
+__initfunc(int icmpv6_init(struct net_proto_family *ops))
 {
 	struct sock *sk;
 	int err;
@@ -528,11 +566,11 @@ __initfunc(void icmpv6_init(struct net_proto_family *ops))
 	icmpv6_socket->state = SS_UNCONNECTED;
 	icmpv6_socket->type=SOCK_RAW;

-	if((err=ops->create(icmpv6_socket, IPPROTO_ICMPV6))<0)
+	if((err=ops->create(icmpv6_socket, IPPROTO_ICMPV6))<0) {
 		printk(KERN_DEBUG 
 		       "Failed to create the ICMP6 control socket.\n");
-
-	MOD_DEC_USE_COUNT;
+		return 1;
+	}

 	sk = icmpv6_socket->sk;
 	sk->allocation = GFP_ATOMIC;
@@ -542,6 +580,16 @@ __initfunc(void icmpv6_init(struct net_proto_family *ops))

 	ndisc_init(ops);
 	igmp6_init(ops);
+	return 0; 
+}
+
+void icmpv6_cleanup(void)
+{
+	inet6_del_protocol(&icmpv6_protocol);
+#if 0
+	ndisc_cleanup();
+#endif
+	igmp6_cleanup();
 }

 static struct icmp6_err {

--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -6,7 +6,7 @@
 *	Pedro Roque		<roque@di.fc.ul.pt>
 *	Ian P. Morris		<I.P.Morris@soton.ac.uk>
 *
- *	$Id: ip6_input.c,v 1.8 1998/02/12 07:43:43 davem Exp $
+ *	$Id: ip6_input.c,v 1.9 1998/04/30 16:24:24 freitag Exp $
 *
 *	Based in linux/net/ipv4/ip_input.c
 *
@@ -70,12 +70,6 @@ struct ipv6_tlvtype {
 	u8 len;
 };

-struct ipv6_destopt_hdr {
-	u8 nexthdr;
-	u8 hdrlen;
-};
-
-
 struct tlvtype_proc {
 	u8	type;
 	int	(*func) (struct sk_buff *, struct device *dev, __u8 *ptr,

--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -7,7 +7,7 @@
 *
 *	Based on linux/net/ipv4/ip_sockglue.c
 *
- *	$Id: ipv6_sockglue.c,v 1.18 1998/03/20 09:12:18 davem Exp $
+ *	$Id: ipv6_sockglue.c,v 1.19 1998/04/30 16:24:26 freitag Exp $
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
@@ -242,7 +242,7 @@ void ipv6_cleanup(void)
 	ipv6_sysctl_unregister();	
 #endif
 	ip6_route_cleanup();
-	ndisc_cleanup();
+	icmpv6_cleanup();
 	addrconf_cleanup();	
 }
 #endif

--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -5,7 +5,7 @@
 *	Authors:
 *	Pedro Roque		<roque@di.fc.ul.pt>	
 *
- *	$Id: mcast.c,v 1.14 1998/03/20 09:12:18 davem Exp $
+ *	$Id: mcast.c,v 1.15 1998/04/30 16:24:28 freitag Exp $
 *
 *	Based on linux/ipv4/igmp.c and linux/ipv4/ip_sockglue.c 
 *
@@ -619,8 +619,6 @@ __initfunc(void igmp6_init(struct net_proto_family *ops))
 		printk(KERN_DEBUG 
 		       "Failed to create the IGMP6 control socket.\n");

-	MOD_DEC_USE_COUNT;
-
 	sk = igmp6_socket->sk;
 	sk->allocation = GFP_ATOMIC;
 	sk->num = 256;			/* Don't receive any data */
@@ -632,3 +630,9 @@ __initfunc(void igmp6_init(struct net_proto_family *ops))
 #endif
 }

+void igmp6_cleanup(void)
+{
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("net/igmp6", 0); 
+#endif
+}
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1152,9 +1152,6 @@ __initfunc(void ndisc_init(struct net_proto_family *ops))
 		printk(KERN_DEBUG 
 		       "Failed to create the NDISC control socket.\n");

-	/* Eeeh... What is it? --ANK */
-	MOD_DEC_USE_COUNT;
-
 	sk = ndisc_socket->sk;
 	sk->allocation = GFP_ATOMIC;
 	sk->net_pinfo.af_inet6.hop_limit = 255;

--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -5,7 +5,7 @@
 *	Authors:
 *	Pedro Roque		<roque@di.fc.ul.pt>	
 *
- *	$Id: reassembly.c,v 1.9 1998/02/12 07:43:48 davem Exp $
+ *	$Id: reassembly.c,v 1.10 1998/04/30 16:24:32 freitag Exp $
 *
 *	Based on: net/ipv4/ip_fragment.c
 *
@@ -15,6 +15,11 @@
 *      2 of the License, or (at your option) any later version.
 */

+/* 
+ *	Fixes:	
+ *	Andi Kleen	Make it work with multiple hosts.
+ *			More RFC compliance.
+ */
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/socket.h>
@@ -39,8 +44,9 @@

 static struct frag_queue ipv6_frag_queue = {
 	&ipv6_frag_queue, &ipv6_frag_queue,
-	0, {0}, NULL, NULL,
-	0
+	0, {{{0}}}, {{{0}}},
+	{0}, NULL, NULL,
+	0, 0, NULL
 };

 static void			create_frag_entry(struct sk_buff *skb, 
@@ -72,12 +78,11 @@ static int reasm_frag(struct frag_queue *fq, struct sk_buff **skb,
 	 *	one it's the kmalloc for a struct ipv6_frag.
 	 *	Feel free to try other alternatives...
 	 */
-	reasm_queue(fq, *skb, fhdr);
-
 	if ((fhdr->frag_off & __constant_htons(0x0001)) == 0) {
 		fq->last_in = 1;
 		fq->nhptr = nhptr;
 	}
+	reasm_queue(fq, *skb, fhdr);

 	if (fq->last_in) {
 		if ((nh = reasm_frag_1(fq, skb)))
@@ -90,18 +95,27 @@ static int reasm_frag(struct frag_queue *fq, struct sk_buff **skb,
 	return 0;
 }

-int ipv6_reassembly(struct sk_buff **skb, struct device *dev, __u8 *nhptr,
+int ipv6_reassembly(struct sk_buff **skbp, struct device *dev, __u8 *nhptr,
 		    struct ipv6_options *opt)
 {
-	struct frag_hdr *fhdr = (struct frag_hdr *) ((*skb)->h.raw);
+	struct sk_buff *skb = *skbp; 
+	struct frag_hdr *fhdr = (struct frag_hdr *) (skb->h.raw);
 	struct frag_queue *fq;
+	struct ipv6hdr *hdr;

+	if ((u8 *)(fhdr+1) > skb->tail) {
+		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw);
+		return 0;
+	}
+	hdr = skb->nh.ipv6h;
 	for (fq = ipv6_frag_queue.next; fq != &ipv6_frag_queue; fq = fq->next) {
-		if (fq->id == fhdr->identification)
-			return reasm_frag(fq, skb, nhptr,fhdr);
+		if (fq->id == fhdr->identification && 
+		    !ipv6_addr_cmp(&hdr->saddr, &fq->saddr) &&
+		    !ipv6_addr_cmp(&hdr->daddr, &fq->daddr))
+			return reasm_frag(fq, skbp, nhptr,fhdr);
 	}
 	
-	create_frag_entry(*skb, dev, nhptr, fhdr);
+	create_frag_entry(skb, dev, nhptr, fhdr);

 	return 0;
 }
@@ -154,6 +168,7 @@ static void create_frag_entry(struct sk_buff *skb, struct device *dev,
 			      struct frag_hdr *fhdr)
 {
 	struct frag_queue *fq;
+	struct ipv6hdr *hdr; 

 	fq = (struct frag_queue *) kmalloc(sizeof(struct frag_queue), 
 					   GFP_ATOMIC);
@@ -167,6 +182,10 @@ static void create_frag_entry(struct sk_buff *skb, struct device *dev,

 	fq->id = fhdr->identification;

+	hdr = skb->nh.ipv6h;
+	ipv6_addr_copy(&fq->saddr, &hdr->saddr);
+	ipv6_addr_copy(&fq->daddr, &hdr->daddr);
+
 	fq->dev = dev;

 	/* init_timer has been done by the memset */
@@ -209,6 +228,10 @@ static void reasm_queue(struct frag_queue *fq, struct sk_buff *skb,
 	nfp->len = (ntohs(skb->nh.ipv6h->payload_len) -
 		    ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1)));

+	if ((u32)nfp->offset + (u32)nfp->len > 65536) {
+		icmpv6_param_prob(skb,ICMPV6_HDR_FIELD, (u8*)&fhdr->frag_off); 
+		goto err;
+	}

 	nfp->skb  = skb;
 	nfp->fhdr = fhdr;
@@ -224,19 +247,37 @@ static void reasm_queue(struct frag_queue *fq, struct sk_buff *skb,
 	}
 	
 	if (fp && fp->offset == nfp->offset) {
-		if (fp->len != nfp->len) {
-			/* this cannot happen */
+		if (nfp->len != fp->len) {
 			printk(KERN_DEBUG "reasm_queue: dup with wrong len\n");
 		}

 		/* duplicate. discard it. */
-		kfree_skb(skb);
-		kfree(nfp);
-		return;
+		goto err;
 	}
 	
 	*bptr = nfp;
 	nfp->next = fp;
+
+#ifdef STRICT_RFC
+	if (fhdr->frag_off & __constant_htons(0x0001)) {
+		/* Check if the fragment is rounded to 8 bytes.
+		 * Required by the RFC.
+		 */
+		if (nfp->len & 0x7) {
+			printk(KERN_DEBUG "fragment not rounded to 8bytes\n");
+
+			icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, 
+					  &skb->nh.ipv6h->payload_len);
+			goto err;
+		}
+	}
+#endif 
+
+	return;
+
+err:
+	kfree(nfp);
+	kfree_skb(skb);
 }

 /*
@@ -303,6 +344,8 @@ static int reasm_frag_1(struct frag_queue *fq, struct sk_buff **skb_in)
 	/*
 	 *	FIXME: If we don't have a checksum we ought to be able
 	 *	to defragment and checksum in this pass. [AC]
+	 *	Note that we don't really know yet whether the protocol
+	 *	needs checksums at all. It might still be a good idea. -AK
 	 */
 	for(fp = fq->fragments; fp; ) {
 		struct ipv6_frag *back;

--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -5,7 +5,7 @@
 *	Authors:
 *	Pedro Roque		<roque@di.fc.ul.pt>	
 *
- *	$Id: tcp_ipv6.c,v 1.78 1998/04/16 16:29:22 freitag Exp $
+ *	$Id: tcp_ipv6.c,v 1.80 1998/05/02 12:47:15 davem Exp $
 *
 *	Based on: 
 *	linux/net/ipv4/tcp.c
@@ -123,10 +123,21 @@ static int tcp_v6_verify_bind(struct sock *sk, unsigned short snum)
 				result = 1;
 		}
 	}
-	if((result == 0) &&
-	   (tb == NULL) &&
-	   (tcp_bucket_create(snum) == NULL))
+	if(result == 0) {
+		if(tb == NULL) {
+			if(tcp_bucket_create(snum) == NULL)
 				result = 1;
+		} else {
+			/* It could be pending garbage collection, this
+			 * kills the race and prevents it from disappearing
+			 * out from under us by the time we use it.  -DaveM
+			 */
+			if(tb->owners == NULL && !(tb->flags & TCPB_FLAG_LOCKED)) {
+				tb->flags = TCPB_FLAG_LOCKED;
+				tcp_dec_slow_timer(TCP_SLT_BUCKETGC);
+			}
+		}
+	}
 go_like_smoke:
 	SOCKHASH_UNLOCK();
 	return result;

--- a/net/netsyms.c
+++ b/net/netsyms.c
@@ -458,7 +458,7 @@ EXPORT_SYMBOL(qdisc_put_rtab);
 EXPORT_SYMBOL(qdisc_new_estimator);
 EXPORT_SYMBOL(qdisc_kill_estimator);
 #endif
-#ifdef CONFIG_NET_POLICE
+#ifdef CONFIG_NET_CLS_POLICE
 EXPORT_SYMBOL(tcf_police);
 EXPORT_SYMBOL(tcf_police_locate);
 EXPORT_SYMBOL(tcf_police_destroy);