Import 2.2.2pre5

351ae16b · Linus Torvalds · 724170c9 · 351ae16b · 351ae16b · 351ae16b
Commit 351ae16b authored Nov 23, 2007 by Linus Torvalds
35 changed files
--- a/Documentation/Configure.help
+++ b/Documentation/Configure.help
--- a/arch/i386/defconfig
+++ b/arch/i386/defconfig
@@ -241,7 +241,7 @@ CONFIG_EEXPRESS_PRO100=y
 # CONFIG_ISDN is not set

 #
-# CD-ROM drivers (not for SCSI or IDE/ATAPI drives)
+# Old CD-ROM drivers (not SCSI, not IDE)
 #
 # CONFIG_CD_NO_IDESCSI is not set


--- a/arch/i386/kernel/i386_ksyms.c
+++ b/arch/i386/kernel/i386_ksyms.c
@@ -43,6 +43,7 @@ EXPORT_SYMBOL(kernel_thread);

 EXPORT_SYMBOL_NOVERS(__down_failed);
 EXPORT_SYMBOL_NOVERS(__down_failed_interruptible);
+EXPORT_SYMBOL_NOVERS(__down_failed_trylock);
 EXPORT_SYMBOL_NOVERS(__up_wakeup);
 /* Networking helper routines. */
 EXPORT_SYMBOL(csum_partial_copy);

--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -569,6 +569,9 @@ static int __init assign_irq_vector(int irq)
 		printk("WARNING: ASSIGN_IRQ_VECTOR wrapped back to %02X\n",
 		       current_vector);
 	}
+	if (current_vector == SYSCALL_VECTOR)
+		panic("ran out of interrupt sources!");
+
 	IO_APIC_VECTOR(irq) = current_vector;
 	return current_vector;
 }
@@ -693,9 +696,11 @@ void __init print_IO_APIC(void)

 	printk(".... register #01: %08X\n", *(int *)&reg_01);
 	printk(".......     : max redirection entries: %04X\n", reg_01.entries);
-	if (	(reg_01.entries != 0x0f) && /* ISA-only Neptune boards */
-		(reg_01.entries != 0x17) && /* ISA+PCI boards */
-		(reg_01.entries != 0x3F)    /* Xeon boards */
+	if (	(reg_01.entries != 0x0f) && /* older (Neptune) boards */
+		(reg_01.entries != 0x17) && /* typical ISA+PCI boards */
+		(reg_01.entries != 0x1b) && /* Compaq Proliant boards */
+		(reg_01.entries != 0x1f) && /* dual Xeon boards */
+		(reg_01.entries != 0x3F)    /* bigger Xeon boards */
 	)
 		UNEXPECTED_IO_APIC();
 	if (reg_01.entries == 0x0f)
@@ -1163,7 +1168,7 @@ static inline void init_IO_APIC_traps(void)
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
 	for (i = 0; i < NR_IRQS ; i++) {
-		if (IO_APIC_IRQ(i)) {
+		if (IO_APIC_VECTOR(i) > 0) {
 			if (IO_APIC_irq_trigger(i))
 				irq_desc[i].handler = &ioapic_level_irq_type;
 			else
@@ -1173,8 +1178,15 @@ static inline void init_IO_APIC_traps(void)
 			 */
 			if (i < 16)
 				disable_8259A_irq(i);
-		}
+		} else
+			/*
+			 * we have no business changing low ISA
+			 * IRQs.
+			 */
+			if (IO_APIC_IRQ(i))
+				irq_desc[i].handler = &no_irq_type;
 	}
+	init_IRQ_SMP();
 }

 /*
@@ -1278,14 +1290,12 @@ void __init setup_IO_APIC(void)
 		construct_default_ISA_mptable();
 	}

-	init_IO_APIC_traps();
-
 	/*
 	 * Set up the IO-APIC IRQ routing table by parsing the MP-BIOS
 	 * mptable:
 	 */
 	setup_IO_APIC_irqs();
-	init_IRQ_SMP();
+	init_IO_APIC_traps();
 	check_timer();

 	print_IO_APIC();

--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -70,11 +70,34 @@ atomic_t nmi_counter;
 */
 spinlock_t irq_controller_lock;

-
 /*
 * Dummy controller type for unused interrupts
 */
-static void do_none(unsigned int irq, struct pt_regs * regs) { }
+static void do_none(unsigned int irq, struct pt_regs * regs)
+{
+	/*
+	 * we are careful. While for ISA irqs it's common to happen
+	 * outside of any driver (think autodetection), this is not
+	 * at all nice for PCI interrupts. So we are stricter and
+	 * print a warning when such spurious interrupts happen.
+	 * Spurious interrupts can confuse other drivers if the PCI
+	 * IRQ line is shared.
+	 *
+	 * Such spurious interrupts are either driver bugs, or
+	 * sometimes hw (chipset) bugs.
+	 */
+	printk("unexpected IRQ vector %d on CPU#%d!\n",irq, smp_processor_id());
+
+#ifdef __SMP__
+	/*
+	 * [currently unexpected vectors happen only on SMP and APIC.
+	 *  if we want to have non-APIC and non-8259A controllers
+	 *  in the future with unexpected vectors, this ack should
+	 *  probably be made controller-specific.]
+	 */
+	ack_APIC_irq();
+#endif
+}
 static void enable_none(unsigned int irq) { }
 static void disable_none(unsigned int irq) { }

@@ -82,7 +105,7 @@ static void disable_none(unsigned int irq) { }
 #define startup_none	enable_none
 #define shutdown_none	disable_none

-static struct hw_interrupt_type no_irq_type = {
+struct hw_interrupt_type no_irq_type = {
 	"none",
 	startup_none,
 	shutdown_none,
@@ -141,10 +164,10 @@ static unsigned int cached_irq_mask = 0xffff;
 * fed to the CPU IRQ line directly.
 *
 * Any '1' bit in this mask means the IRQ is routed through the IO-APIC.
- * this 'mixed mode' IRQ handling costs us one more branch in do_IRQ,
- * but we have _much_ higher compatibility and robustness this way.
+ * this 'mixed mode' IRQ handling costs nothing because it's only used
+ * at IRQ setup time.
 */
-unsigned long long io_apic_irqs = 0;
+unsigned long io_apic_irqs = 0;

 /*
 * These have to be protected by the irq controller spinlock
@@ -254,32 +277,43 @@ static void do_8259A_IRQ(unsigned int irq, struct pt_regs * regs)


 BUILD_COMMON_IRQ()
+
+#define BI(x,y) \
+	BUILD_IRQ(##x##y)
+
+#define BUILD_16_IRQS(x) \
+	BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
+	BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
+	BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
+	BI(x,c) BI(x,d) BI(x,e) BI(x,f)
+
 /*
 * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
+ * (these are usually mapped to vectors 0x20-0x30)
 */
-BUILD_IRQ(0)  BUILD_IRQ(1)  BUILD_IRQ(2)  BUILD_IRQ(3)
-BUILD_IRQ(4)  BUILD_IRQ(5)  BUILD_IRQ(6)  BUILD_IRQ(7)
-BUILD_IRQ(8)  BUILD_IRQ(9)  BUILD_IRQ(10) BUILD_IRQ(11)
-BUILD_IRQ(12) BUILD_IRQ(13) BUILD_IRQ(14) BUILD_IRQ(15)
+BUILD_16_IRQS(0x0)

 #ifdef CONFIG_X86_IO_APIC
 /*
- * The IO-APIC gives us many more interrupt sources..
+ * The IO-APIC gives us many more interrupt sources. Most of these 
+ * are unused but an SMP system is supposed to have enough memory ...
+ * sometimes (mostly wrt. hw bugs) we get corrupted vectors all
+ * across the spectrum, so we really want to be prepared to get all
+ * of these. Plus, more powerful systems might have more than 64
+ * IO-APIC registers.
+ *
+ * (these are usually mapped into the 0x30-0xff vector range)
 */
-BUILD_IRQ(16) BUILD_IRQ(17) BUILD_IRQ(18) BUILD_IRQ(19)
-BUILD_IRQ(20) BUILD_IRQ(21) BUILD_IRQ(22) BUILD_IRQ(23)
-BUILD_IRQ(24) BUILD_IRQ(25) BUILD_IRQ(26) BUILD_IRQ(27)
-BUILD_IRQ(28) BUILD_IRQ(29) BUILD_IRQ(30) BUILD_IRQ(31)
-BUILD_IRQ(32) BUILD_IRQ(33) BUILD_IRQ(34) BUILD_IRQ(35)
-BUILD_IRQ(36) BUILD_IRQ(37) BUILD_IRQ(38) BUILD_IRQ(39)
-BUILD_IRQ(40) BUILD_IRQ(41) BUILD_IRQ(42) BUILD_IRQ(43)
-BUILD_IRQ(44) BUILD_IRQ(45) BUILD_IRQ(46) BUILD_IRQ(47)
-BUILD_IRQ(48) BUILD_IRQ(49) BUILD_IRQ(50) BUILD_IRQ(51)
-BUILD_IRQ(52) BUILD_IRQ(53) BUILD_IRQ(54) BUILD_IRQ(55)
-BUILD_IRQ(56) BUILD_IRQ(57) BUILD_IRQ(58) BUILD_IRQ(59)
-BUILD_IRQ(60) BUILD_IRQ(61) BUILD_IRQ(62) BUILD_IRQ(63)
+                   BUILD_16_IRQS(0x1) BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
+BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
+BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
+BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd)
 #endif

+#undef BUILD_16_IRQS
+#undef BI
+
+
 #ifdef __SMP__
 /*
 * The following vectors are part of the Linux architecture, there
@@ -303,37 +337,35 @@ BUILD_SMP_TIMER_INTERRUPT(apic_timer_interrupt)

 #endif

+#define IRQ(x,y) \
+	IRQ##x##y##_interrupt
+
+#define IRQLIST_16(x) \
+	IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
+	IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
+	IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
+	IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
+
 static void (*interrupt[NR_IRQS])(void) = {
-	IRQ0_interrupt, IRQ1_interrupt, IRQ2_interrupt, IRQ3_interrupt,
-	IRQ4_interrupt, IRQ5_interrupt, IRQ6_interrupt, IRQ7_interrupt,
-	IRQ8_interrupt, IRQ9_interrupt, IRQ10_interrupt, IRQ11_interrupt,
-	IRQ12_interrupt, IRQ13_interrupt, IRQ14_interrupt, IRQ15_interrupt
+	IRQLIST_16(0x0),
+
 #ifdef CONFIG_X86_IO_APIC
-	,IRQ16_interrupt, IRQ17_interrupt, IRQ18_interrupt, IRQ19_interrupt,
-	IRQ20_interrupt, IRQ21_interrupt, IRQ22_interrupt, IRQ23_interrupt,
-	IRQ24_interrupt, IRQ25_interrupt, IRQ26_interrupt, IRQ27_interrupt,
-	IRQ28_interrupt, IRQ29_interrupt,
-	IRQ30_interrupt, IRQ31_interrupt, IRQ32_interrupt, IRQ33_interrupt,
-	IRQ34_interrupt, IRQ35_interrupt, IRQ36_interrupt, IRQ37_interrupt,
-	IRQ38_interrupt, IRQ39_interrupt,
-	IRQ40_interrupt, IRQ41_interrupt, IRQ42_interrupt, IRQ43_interrupt,
-	IRQ44_interrupt, IRQ45_interrupt, IRQ46_interrupt, IRQ47_interrupt,
-	IRQ48_interrupt, IRQ49_interrupt,
-	IRQ50_interrupt, IRQ51_interrupt, IRQ52_interrupt, IRQ53_interrupt,
-	IRQ54_interrupt, IRQ55_interrupt, IRQ56_interrupt, IRQ57_interrupt,
-	IRQ58_interrupt, IRQ59_interrupt,
-	IRQ60_interrupt, IRQ61_interrupt, IRQ62_interrupt, IRQ63_interrupt
+	                 IRQLIST_16(0x1), IRQLIST_16(0x2), IRQLIST_16(0x3),
+	IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
+	IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
+	IRQLIST_16(0xc), IRQLIST_16(0xd)
 #endif
 };

+#undef IRQ
+#undef IRQLIST_16
+

 /*
- * Initial irq handlers.
+ * Special irq handlers.
 */

-void no_action(int cpl, void *dev_id, struct pt_regs *regs)
-{
-}
+void no_action(int cpl, void *dev_id, struct pt_regs *regs) { }

 #ifndef CONFIG_VISWS
 /*
@@ -770,7 +802,7 @@ asmlinkage void do_IRQ(struct pt_regs regs)
 	 * 0 return value means that this irq is already being
 	 * handled by some other CPU. (or is disabled)
 	 */
-	unsigned int irq = regs.orig_eax & 0xff;
+	int irq = regs.orig_eax & 0xff; /* subtle, see irq.h */
 	int cpu = smp_processor_id();

 	kstat.irqs[cpu][irq]++;
@@ -986,42 +1018,6 @@ int probe_irq_off(unsigned long unused)
 	return irq_found;
 }

-/*
- * Silly, horrible hack
- */
-static char uglybuffer[10*256];
-
-__asm__("\n" __ALIGN_STR"\n"
-	"common_unexpected:\n\t"
-	SAVE_ALL
-	"pushl $ret_from_intr\n\t"
-	"jmp strange_interrupt");
-
-void strange_interrupt(int irqnum)
-{
-	printk("Unexpected interrupt %d\n", irqnum & 255);
-	for (;;);
-}
-
-extern int common_unexpected;
-__initfunc(void init_unexpected_irq(void))
-{
-	int i;
-	for (i = 0; i < 256; i++) {
-		char *code = uglybuffer + 10*i;
-		unsigned long jumpto = (unsigned long) &common_unexpected;
-
-		jumpto -= (unsigned long)(code+10);
-		code[0] = 0x68;		/* pushl */
-		*(int *)(code+1) = i - 512;
-		code[5] = 0xe9;		/* jmp */
-		*(int *)(code+6) = jumpto;
-
-		set_intr_gate(i,code);
-	}
-}
-
-
 void init_ISA_irqs (void)
 {
 	int i;
@@ -1033,7 +1029,7 @@ void init_ISA_irqs (void)

 		if (i < 16) {
 			/*
-			 * 16 old-style INTA-cycle interrupt gates:
+			 * 16 old-style INTA-cycle interrupts:
 			 */
 			irq_desc[i].handler = &i8259A_irq_type;
 		} else {
@@ -1054,9 +1050,16 @@ __initfunc(void init_IRQ(void))
 #else
 	init_VISWS_APIC_irqs();
 #endif
-
-	for (i = 0; i < 16; i++)
-		set_intr_gate(0x20+i,interrupt[i]);
+	/*
+	 * Cover the whole vector space, no vector can escape
+	 * us. (some of these will be overridden and become
+	 * 'special' SMP interrupts)
+	 */
+	for (i = 0; i < NR_IRQS; i++) {
+		int vector = FIRST_EXTERNAL_VECTOR + i;
+		if (vector != SYSCALL_VECTOR) 
+			set_intr_gate(vector, interrupt[i]);
+	}

 #ifdef __SMP__	

@@ -1067,13 +1070,9 @@ __initfunc(void init_IRQ(void))
 	set_intr_gate(IRQ0_TRAP_VECTOR, interrupt[0]);

 	/*
-	 * The reschedule interrupt slowly changes it's functionality,
-	 * while so far it was a kind of broadcasted timer interrupt,
-	 * in the future it should become a CPU-to-CPU rescheduling IPI,
-	 * driven by schedule() ?
+	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
+	 * IPI, driven by wakeup.
 	 */
-
-	/* IPI for rescheduling */
 	set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);

 	/* IPI for invalidation */

--- a/arch/i386/kernel/irq.h
+++ b/arch/i386/kernel/irq.h
@@ -16,6 +16,7 @@ struct hw_interrupt_type {
 	void (*disable)(unsigned int irq);
 };

+extern struct hw_interrupt_type no_irq_type;

 /*
 * IRQ line status.
@@ -40,6 +41,18 @@ typedef struct {
 	unsigned int depth;			/* Disable depth for nested irq disables */
 } irq_desc_t;

+/*
+ * IDT vectors usable for external interrupt sources start
+ * at 0x20:
+ */
+#define FIRST_EXTERNAL_VECTOR	0x20
+
+#define SYSCALL_VECTOR		0x80
+
+/*
+ * Vectors 0x20-0x2f are used for ISA interrupts.
+ */
+
 /*
 * Special IRQ vectors used by the SMP architecture:
 *
@@ -54,7 +67,7 @@ typedef struct {
 #define MTRR_CHANGE_VECTOR	0x50

 /*
- * First vector available to drivers: (vectors 0x51-0xfe)
+ * First APIC vector available to drivers: (vectors 0x51-0xfe)
 */
 #define IRQ0_TRAP_VECTOR	0x51

@@ -94,7 +107,9 @@ extern void send_IPI(int dest, int vector);
 extern void init_pic_mode(void);
 extern void print_IO_APIC(void);

-extern unsigned long long io_apic_irqs;
+extern unsigned long io_apic_irqs;
+
+extern char _stext, _etext;

 #define MAX_IRQ_SOURCES 128
 #define MAX_MP_BUSSES 32
@@ -126,7 +141,7 @@ static inline void irq_exit(int cpu, unsigned int irq)
 	hardirq_exit(cpu);
 }

-#define IO_APIC_IRQ(x) ((1<<x) & io_apic_irqs)
+#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))

 #else

@@ -201,6 +216,13 @@ __asm__( \
 	"pushl $ret_from_intr\n\t" \
 	"jmp "SYMBOL_NAME_STR(do_IRQ));

+/*
+ * subtle. orig_eax is used by the signal code to distinct between
+ * system calls and interrupted 'random user-space'. Thus we have
+ * to put a negative value into orig_eax here. (the problem is that
+ * both system calls and IRQs want to have small integer numbers in
+ * orig_eax, and the syscall code has won the optimization conflict ;)
+ */
 #define BUILD_IRQ(nr) \
 asmlinkage void IRQ_NAME(nr); \
 __asm__( \
@@ -216,7 +238,6 @@ SYMBOL_NAME_STR(IRQ) #nr "_interrupt:\n\t" \
 static inline void x86_do_profile (unsigned long eip)
 {
 	if (prof_buffer && current->pid) {
-		extern int _stext;
 		eip -= (unsigned long) &_stext;
 		eip >>= prof_shift;
 		/*

--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -807,7 +807,7 @@ int get_cpuinfo(char * buffer)
 			       c->x86_model,
 			       c->x86_model_id[0] ? c->x86_model_id : "unknown");
 		
-		if (c->x86_mask)
+		if (c->x86_mask || c->cpuid_level >= 0)
 			p += sprintf(p, "stepping\t: %d\n", c->x86_mask);
 		else
 			p += sprintf(p, "stepping\t: unknown\n");

--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -42,7 +42,7 @@

 #include "irq.h"

-extern unsigned long start_kernel, _etext;
+extern unsigned long start_kernel;
 extern void update_one_process( struct task_struct *p,
 				unsigned long ticks, unsigned long user,
 				unsigned long system, int cpu);
@@ -319,8 +319,17 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
 						printk("Processor #%d unused. (Max %d processors).\n",m->mpc_apicid, NR_CPUS);
 					else
 					{
+						int ver = m->mpc_apicver;
+
 						cpu_present_map|=(1<<m->mpc_apicid);
-						apic_version[m->mpc_apicid]=m->mpc_apicver;
+						/*
+						 * Validate version
+						 */
+						if (ver == 0x0) {
+							printk("BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
+							ver = 0x10;
+						}
+						apic_version[m->mpc_apicid] = ver;
 					}
 				}
 				mpt+=sizeof(*m);
@@ -1806,8 +1815,10 @@ asmlinkage void smp_mtrr_interrupt(void)
 */
 asmlinkage void smp_spurious_interrupt(void)
 {
-	/* ack_APIC_irq();   see sw-dev-man vol 3, chapter 7.4.13.5 */
-	printk("spurious APIC interrupt, ayiee, should never happen.\n");
+	ack_APIC_irq();
+	/* see sw-dev-man vol 3, chapter 7.4.13.5 */
+	printk("spurious APIC interrupt on CPU#%d, should never happen.\n",
+			smp_processor_id());
 }

 /*
@@ -2058,3 +2069,4 @@ int setup_profiling_timer(unsigned int multiplier)
 }

 #undef APIC_DIVISOR
+
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -42,6 +42,8 @@
 #include <asm/lithium.h>
 #endif

+#include "irq.h"
+
 asmlinkage int system_call(void);
 asmlinkage void lcall7(void);

@@ -125,7 +127,6 @@ static void show_registers(struct pt_regs *regs)
 	unsigned long esp;
 	unsigned short ss;
 	unsigned long *stack, addr, module_start, module_end;
-	extern char _stext, _etext;

 	esp = (unsigned long) (1+regs);
 	ss = __KERNEL_DS;
@@ -669,9 +670,6 @@ cobalt_init(void)
 #endif
 void __init trap_init(void)
 {
-	/* Initially up all of the IDT to jump to unexpected */
-	init_unexpected_irq();
-
 	if (readl(0x0FFFD9) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24))
 		EISA_bus = 1;
 	set_call_gate(&default_ldt,lcall7);
@@ -693,7 +691,7 @@ void __init trap_init(void)
 	set_trap_gate(15,&spurious_interrupt_bug);
 	set_trap_gate(16,&coprocessor_error);
 	set_trap_gate(17,&alignment_check);
-	set_system_gate(0x80,&system_call);
+	set_system_gate(SYSCALL_VECTOR,&system_call);

 	/* set up GDT task & ldt entries */
 	set_tss_desc(0, &init_task.tss);

--- a/arch/i386/lib/semaphore.S
+++ b/arch/i386/lib/semaphore.S
@@ -31,6 +31,15 @@ ENTRY(__down_failed_interruptible)
 	popl %edx	/* restore %edx */
 	ret

+/* Don't save/restore %eax, because that will be our return value */
+ENTRY(__down_failed_trylock)
+	pushl %edx	/* save %edx */
+	pushl %ecx	/* save %ecx (and argument) */
+	call SYMBOL_NAME(__down_trylock)
+	popl %ecx	/* restore %ecx (count on __down_trylock not changing it) */
+	popl %edx	/* restore %edx */
+	ret
+
 ENTRY(__up_wakeup)
 	pushl %eax	/* save %eax */
 	pushl %edx	/* save %edx */

--- a/drivers/char/bttv.c
+++ b/drivers/char/bttv.c
@@ -120,6 +120,7 @@ static struct bttv bttvs[BTTV_MAX];

 #define I2C_TIMING (0x7<<4)
 #define I2C_DELAY   10
+
 #define I2C_SET(CTRL,DATA) \
    { btwrite((CTRL<<1)|(DATA), BT848_I2C); udelay(I2C_DELAY); }
 #define I2C_GET()   (btread(BT848_I2C)&1)
@@ -244,6 +245,7 @@ static void i2c_setlines(struct i2c_bus *bus,int ctrl,int data)
 {
        struct bttv *btv = (struct bttv*)bus->data;
 	btwrite((ctrl<<1)|data, BT848_I2C);
+	btread(BT848_I2C); /* flush buffers */
 	udelay(I2C_DELAY);
 }


--- a/drivers/char/msp3400.c
+++ b/drivers/char/msp3400.c
@@ -774,7 +774,6 @@ static int msp3410d_thread(void *data)
 			goto done;
 		dprintk("msp3410: thread: sleep\n");
 		down_interruptible(&sem);
-		sem.owner = 0;
 		dprintk("msp3410: thread: wakeup\n");
 		if (msp->rmmod)
 			goto done;

--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -731,9 +731,6 @@ int idescsi_queue (Scsi_Cmnd *cmd, void (*done)(Scsi_Cmnd *))
 		printk (KERN_ERR "ide-scsi: drive id %d not present\n", cmd->target);
 		goto abort;
 	}
-	if (cmd->lun != 0) {		/* Only respond to LUN 0. Drop others */
-		goto abort;
-	}
 	scsi = drive->driver_data;
 	pc = kmalloc (sizeof (idescsi_pc_t), GFP_ATOMIC);
 	rq = kmalloc (sizeof (struct request), GFP_ATOMIC);

--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1972,7 +1972,6 @@ scsi_error_handler(void * data)
 	     */
            SCSI_LOG_ERROR_RECOVERY(1,printk("Error handler sleeping\n"));
 	    down_interruptible (&sem);
-	    sem.owner = 0;

 	    if (signal_pending(current) )
 	      break;

--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -385,7 +385,9 @@ asmlinkage int sys_fdatasync(unsigned int fd)
 		goto out_putf;

 	/* this needs further work, at the moment it is identical to fsync() */
+	down(&inode->i_sem);
 	err = file->f_op->fsync(file, dentry);
+	up(&inode->i_sem);

 out_putf:
 	fput(file);
@@ -812,8 +814,8 @@ void refile_buffer(struct buffer_head * buf)
 			 * If too high a percentage of the buffers are dirty...
 			 */
 			if (nr_buffers_type[BUF_DIRTY] > too_many ||
-			    (size_buffers_type[BUF_DIRTY] + size_buffers_type[BUF_LOCKED])/PAGE_SIZE > too_large) {
-				if (nr_buffers_type[BUF_LOCKED] > 2 * bdf_prm.b_un.ndirty)
+			    size_buffers_type[BUF_DIRTY]/PAGE_SIZE > too_large) {
+				if (nr_buffers_type[BUF_LOCKED] > 3 * bdf_prm.b_un.ndirty)
 					wakeup_bdflush(1);
 				else
 					wakeup_bdflush(0);
@@ -1767,7 +1769,7 @@ int bdflush(void * unused)
 #ifdef DEBUG
 		for(nlist = 0; nlist < NR_LIST; nlist++)
 #else
-		for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
+		for(nlist = BUF_LOCKED; nlist <= BUF_DIRTY; nlist++)
 #endif
 		 {
 			 ndirty = 0;
@@ -1786,11 +1788,16 @@ int bdflush(void * unused)
 					  }
 					  
 					  /* Clean buffer on dirty list?  Refile it */
-					  if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh))
-					   {
-						   refile_buffer(bh);
-						   continue;
-					   }
+					  if (nlist == BUF_DIRTY && !buffer_dirty(bh)) {
+						  refile_buffer(bh);
+						  continue;
+					  }
+					  
+					  /* Unlocked buffer on locked list?  Refile it */
+					  if (nlist == BUF_LOCKED && !buffer_locked(bh)) {
+						  refile_buffer(bh);
+						  continue;
+					  }
 					  
 					  if (buffer_locked(bh) || !buffer_dirty(bh))
 						   continue;

--- a/fs/hfs/ChangeLog
+++ b/fs/hfs/ChangeLog
+1999-01-30  a sun  <asun@hecate.darksunrising.blah>
+
+	* catalog.c (hfs_cat_move): fixed corruption problem with
+	renames.
+
+1999-01-27  a sun  <asun@hecate.darksunrising.blah>
+
+	* file_hdr.c (get/set_dates): got rid of broken afpd times. NOTE:
+	you must use netatalk-1.4b2+asun2.1.2 or newer for this.
+
 1998-12-20  a sun  <asun@hecate.darksunrising.blah>

 	* bdelete.c (del_root): assign bthLNode and bthFNode only if the

--- a/fs/hfs/catalog.c
+++ b/fs/hfs/catalog.c
@@ -1348,7 +1348,7 @@ int hfs_cat_move(struct hfs_cat_entry *old_dir, struct hfs_cat_entry *new_dir,
 		hfs_sleep_on(&mdb->rename_wait);
 	}
 	spin_lock(&entry_lock);
-	mdb->rename_lock = 1;
+	mdb->rename_lock = 1; /* XXX: should be atomic_inc */
 	spin_unlock(&entry_lock);

 	/* keep readers from getting confused by changing dir size */
@@ -1385,7 +1385,6 @@ int hfs_cat_move(struct hfs_cat_entry *old_dir, struct hfs_cat_entry *new_dir,
 restart:
 	/* see if the destination exists, getting it if it does */
 	dest = hfs_cat_get(mdb, new_key);
-
 	if (!dest) {
 		/* destination doesn't exist, so create it */
 		struct hfs_cat_rec new_record;
@@ -1408,14 +1407,16 @@ int hfs_cat_move(struct hfs_cat_entry *old_dir, struct hfs_cat_entry *new_dir,
 			goto bail3;
 		}

-		/* build the new record */
+		/* build the new record. make sure to zero out the
+                   record. */
+		memset(&new_record, 0, sizeof(new_record));
 		new_record.cdrType = entry->type;
 		__write_entry(entry, &new_record);

 		/* insert the new record */
 		error = hfs_binsert(mdb->cat_tree, HFS_BKEY(new_key),
 				    &new_record, is_dir ? 2 + sizeof(DIR_REC) :
-							  2 + sizeof(FIL_REC));
+				    2 + sizeof(FIL_REC));
 		if (error == -EEXIST) {
 			delete_entry(dest);
 			unlock_entry(dest);
@@ -1565,7 +1566,7 @@ int hfs_cat_move(struct hfs_cat_entry *old_dir, struct hfs_cat_entry *new_dir,
 	}
 	end_write(new_dir);
 	spin_lock(&entry_lock);
-	mdb->rename_lock = 0;
+	mdb->rename_lock = 0; /* XXX: should use atomic_dec */
 	hfs_wake_up(&mdb->rename_wait);
 	spin_unlock(&entry_lock);


--- a/fs/hfs/file_hdr.c
+++ b/fs/hfs/file_hdr.c
@@ -303,16 +303,9 @@ static inline void adjust_forks(struct hfs_cat_entry *entry,
 static void get_dates(const struct hfs_cat_entry *entry,
 		      const struct inode *inode,  hfs_u32 dates[3])
 {
-	if (HFS_SB(inode->i_sb)->s_afpd) {
-		/* AFPD compatible: use un*x times */
-		dates[0] = htonl(hfs_m_to_utime(entry->create_date));
-		dates[1] = htonl(hfs_m_to_utime(entry->modify_date));
-		dates[2] = htonl(hfs_m_to_utime(entry->backup_date));
-	} else {
-		dates[0] = hfs_m_to_htime(entry->create_date);
-		dates[1] = hfs_m_to_htime(entry->modify_date);
-		dates[2] = hfs_m_to_htime(entry->backup_date);
-	}
+	dates[0] = hfs_m_to_htime(entry->create_date);
+	dates[1] = hfs_m_to_htime(entry->modify_date);
+	dates[2] = hfs_m_to_htime(entry->backup_date);
 }

 /*
@@ -322,43 +315,23 @@ static void set_dates(struct hfs_cat_entry *entry, struct inode *inode,
 		      const hfs_u32 *dates)
 {
 	hfs_u32 tmp;
-	if (HFS_SB(inode->i_sb)->s_afpd) {
-		/* AFPD compatible: use un*x times */
-		tmp = hfs_u_to_mtime(ntohl(dates[0]));
-		if (entry->create_date != tmp) {
-			entry->create_date = tmp;
-			hfs_cat_mark_dirty(entry);
-		}
-		tmp = hfs_u_to_mtime(ntohl(dates[1]));
-		if (entry->modify_date != tmp) {
-			entry->modify_date = tmp;
-			inode->i_ctime = inode->i_atime = inode->i_mtime =
-				ntohl(dates[1]);
-			hfs_cat_mark_dirty(entry);
-		}
-		tmp = hfs_u_to_mtime(ntohl(dates[2]));
-		if (entry->backup_date != tmp) {
-			entry->backup_date = tmp;
-			hfs_cat_mark_dirty(entry);
-		}
-	} else {
-		tmp = hfs_h_to_mtime(dates[0]);
-		if (entry->create_date != tmp) {
-			entry->create_date = tmp;
-			hfs_cat_mark_dirty(entry);
-		}
-		tmp = hfs_h_to_mtime(dates[1]);
-		if (entry->modify_date != tmp) {
-			entry->modify_date = tmp;
-			inode->i_ctime = inode->i_atime = inode->i_mtime = 
-				hfs_h_to_utime(dates[1]);
-			hfs_cat_mark_dirty(entry);
-		}
-		tmp = hfs_h_to_mtime(dates[2]);
-		if (entry->backup_date != tmp) {
-			entry->backup_date = tmp;
-			hfs_cat_mark_dirty(entry);
-		}
+
+	tmp = hfs_h_to_mtime(dates[0]);
+	if (entry->create_date != tmp) {
+		entry->create_date = tmp;
+		hfs_cat_mark_dirty(entry);
+	}
+	tmp = hfs_h_to_mtime(dates[1]);
+	if (entry->modify_date != tmp) {
+		entry->modify_date = tmp;
+		inode->i_ctime = inode->i_atime = inode->i_mtime = 
+			hfs_h_to_utime(dates[1]);
+		hfs_cat_mark_dirty(entry);
+	}
+	tmp = hfs_h_to_mtime(dates[2]);
+	if (entry->backup_date != tmp) {
+		entry->backup_date = tmp;
+		hfs_cat_mark_dirty(entry);
 	}
 }


--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -478,7 +478,7 @@ nlmclnt_unlock_callback(struct rpc_task *task)
 	int		status = req->a_res.status;

 	if (RPC_ASSASSINATED(task))
-		return;
+		goto die;

 	if (task->tk_status < 0) {
 		dprintk("lockd: unlock failed (err = %d)\n", -task->tk_status);
@@ -490,6 +490,9 @@ nlmclnt_unlock_callback(struct rpc_task *task)
 	 && status != NLM_LCK_DENIED_GRACE_PERIOD) {
 		printk("lockd: unexpected unlock status: %d\n", status);
 	}
+
+die:
+	rpc_release_task(task);
 }

 /*
@@ -565,6 +568,7 @@ nlmclnt_cancel_callback(struct rpc_task *task)
 	}

 die:
+	rpc_release_task(task);
 	nlm_release_host(req->a_host);
 	kfree(req);
 	return;

--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -561,6 +561,7 @@ nlmsvc_grant_callback(struct rpc_task *task)
 	block->b_incall = 0;

 	nlm_release_host(call->a_host);
+	rpc_release_task(task);
 }

 /*

--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -492,6 +492,7 @@ nlmsvc_callback_exit(struct rpc_task *task)
 					task->tk_pid, -task->tk_status);
 	}
 	nlm_release_host(call->a_host);
+	rpc_release_task(task);
 	kfree(call);
 }


--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -734,7 +734,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	 * directories via NFS.
 	 */
 	err = 0;
-	if ((iap->ia_valid &= (ATTR_UID|ATTR_GID|ATTR_MODE)) != 0)
+	if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0)
 		err = nfsd_setattr(rqstp, resfhp, iap);
 out:
 	return err;

--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -144,7 +144,7 @@ dentry->d_parent->d_name.name, dentry->d_name.name, count, offset, wsize);

 		result = smb_proc_write(dentry, offset, wsize, buffer);
 		if (result < 0)
-			goto io_error;
+			break;
 		/* N.B. what if result < wsize?? */
 #ifdef SMBFS_PARANOIA
 if (result < wsize)
@@ -162,15 +162,7 @@ printk("smb_writepage_sync: short write, wsize=%d, result=%d\n", wsize, result);
 			inode->i_size = offset;
 		inode->u.smbfs_i.cache_valid |= SMB_F_LOCALWRITE;
 	} while (count);
-
-out:
-	smb_unlock_page(page);
 	return written ? written : result;
-
-io_error:
-	/* Must mark the page invalid after I/O error */
-	clear_bit(PG_uptodate, &page->flags);
-	goto out;
 }

 /*
@@ -190,6 +182,7 @@ smb_writepage(struct file *file, struct page *page)
 	set_bit(PG_locked, &page->flags);
 	atomic_inc(&page->count);
 	result = smb_writepage_sync(dentry, page, 0, PAGE_SIZE);
+	smb_unlock_page(page);
 	free_page(page_address(page));
 	return result;
 }

--- a/include/asm-i386/irq.h
+++ b/include/asm-i386/irq.h
@@ -13,11 +13,15 @@
 #define TIMER_IRQ 0

 /*
- * 16 XT IRQ's, 8 potential APIC interrupt sources.
- * Right now the APIC is only used for SMP, but this
- * may change.
+ * 16 8259A IRQ's, 240 potential APIC interrupt sources.
+ * Right now the APIC is mostly only used for SMP.
+ * 256 vectors is an architectural limit. (we can have
+ * more than 256 devices theoretically, but they will
+ * have to use shared interrupts)
+ * Since vectors 0x00-0x1f are used/reserved for the CPU,
+ * the usable vector space is 0x20-0xff (224 vectors)
 */
-#define NR_IRQS 64
+#define NR_IRQS 224

 static __inline__ int irq_cannonicalize(int irq)
 {

--- a/include/asm-i386/semaphore-helper.h
+++ b/include/asm-i386/semaphore-helper.h
+#ifndef _I386_SEMAPHORE_HELPER_H
+#define _I386_SEMAPHORE_HELPER_H
+
+/*
+ * SMP- and interrupt-safe semaphores helper functions.
+ *
+ * (C) Copyright 1996 Linus Torvalds
+ * (C) Copyright 1999 Andrea Arcangeli
+ */
+
+/*
+ * These two _must_ execute atomically wrt each other.
+ *
+ * This is trivially done with load_locked/store_cond,
+ * but on the x86 we need an external synchronizer.
+ */
+static inline void wake_one_more(struct semaphore * sem)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&semaphore_wake_lock, flags);
+	if (atomic_read(&sem->count) <= 0)
+		sem->waking++;
+	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
+}
+
+static inline int waking_non_zero(struct semaphore *sem)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&semaphore_wake_lock, flags);
+	if (sem->waking > 0) {
+		sem->waking--;
+		ret = 1;
+	}
+	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
+	return ret;
+}
+
+/*
+ * waking_non_zero_interruptible:
+ *	1	got the lock
+ *	0	go to sleep
+ *	-EINTR	interrupted
+ *
+ * We must undo the sem->count down_interruptible() increment while we are
+ * protected by the spinlock in order to make atomic this atomic_inc() with the
+ * atomic_read() in wake_one_more(), otherwise we can race. -arca
+ */
+static inline int waking_non_zero_interruptible(struct semaphore *sem,
+						struct task_struct *tsk)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&semaphore_wake_lock, flags);
+	if (sem->waking > 0) {
+		sem->waking--;
+		ret = 1;
+	} else if (signal_pending(tsk)) {
+		atomic_inc(&sem->count);
+		ret = -EINTR;
+	}
+	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
+	return ret;
+}
+
+/*
+ * waking_non_zero_trylock:
+ *	1	failed to lock
+ *	0	got the lock
+ *
+ * We must undo the sem->count down_trylock() increment while we are
+ * protected by the spinlock in order to make atomic this atomic_inc() with the
+ * atomic_read() in wake_one_more(), otherwise we can race. -arca
+ */
+static inline int waking_non_zero_trylock(struct semaphore *sem)
+{
+	unsigned long flags;
+	int ret = 1;
+
+	spin_lock_irqsave(&semaphore_wake_lock, flags);
+	if (sem->waking <= 0)
+		atomic_inc(&sem->count);
+	else {
+		sem->waking--;
+		ret = 0;
+	}
+	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
+	return ret;
+}
+
+#endif
--- a/include/asm-i386/semaphore.h
+++ b/include/asm-i386/semaphore.h
@@ -12,6 +12,11 @@
 *                     the original code and to make semaphore waits
 *                     interruptible so that processes waiting on
 *                     semaphores can be killed.
+ * Modified 1999-02-14 by Andrea Arcangeli, split the sched.c helper
+ *		       functions in asm/sempahore-helper.h while fixing a
+ *		       potential and subtle race discovered by Ulrich Schmid
+ *		       in down_interruptible(). Since I started to play here I
+ *		       also implemented the `trylock' semaphore operation.
 *
 * If you would like to see an analysis of this implementation, please
 * ftp to gcom.com and download the file
@@ -23,131 +28,29 @@
 #include <asm/atomic.h>
 #include <asm/spinlock.h>

-/*
- * Semaphores are recursive: we allow the holder process
- * to recursively do down() operations on a semaphore that
- * the process already owns. In order to do that, we need
- * to keep a semaphore-local copy of the owner and the
- * "depth of ownership".
- *
- * NOTE! Nasty memory ordering rules:
- *  - "owner" and "owner_count" may only be modified once you hold the
- *    lock. 
- *  - "owner_count" must be written _after_ modifying owner, and
- *    must be read _before_ reading owner. There must be appropriate
- *    write and read barriers to enforce this.
- *
- * On an x86, writes are always ordered, so the only enformcement
- * necessary is to make sure that the owner_depth is written after
- * the owner value in program order.
- *
- * For read ordering guarantees, the semaphore wake_lock spinlock
- * is already giving us ordering guarantees.
- *
- * Other (saner) architectures would use "wmb()" and "rmb()" to
- * do this in a more obvious manner.
- */
 struct semaphore {
 	atomic_t count;
-	unsigned long owner, owner_depth;
 	int waking;
 	struct wait_queue * wait;
 };

-/*
- * Because we want the non-contention case to be
- * fast, we save the stack pointer into the "owner"
- * field, and to get the true task pointer we have
- * to do the bit masking. That moves the masking
- * operation into the slow path.
- */
-#define semaphore_owner(sem) \
-	((struct task_struct *)((2*PAGE_MASK) & (sem)->owner))
-
-#define MUTEX ((struct semaphore) { ATOMIC_INIT(1), 0, 0, 0, NULL })
-#define MUTEX_LOCKED ((struct semaphore) { ATOMIC_INIT(0), 0, 1, 0, NULL })
+#define MUTEX ((struct semaphore) { ATOMIC_INIT(1), 0, NULL })
+#define MUTEX_LOCKED ((struct semaphore) { ATOMIC_INIT(0), 0, NULL })

 asmlinkage void __down_failed(void /* special register calling convention */);
 asmlinkage int  __down_failed_interruptible(void  /* params in registers */);
+asmlinkage int  __down_failed_trylock(void  /* params in registers */);
 asmlinkage void __up_wakeup(void /* special register calling convention */);

 asmlinkage void __down(struct semaphore * sem);
 asmlinkage int  __down_interruptible(struct semaphore * sem);
+asmlinkage int  __down_trylock(struct semaphore * sem);
 asmlinkage void __up(struct semaphore * sem);

 extern spinlock_t semaphore_wake_lock;

 #define sema_init(sem, val)	atomic_set(&((sem)->count), (val))

-/*
- * These two _must_ execute atomically wrt each other.
- *
- * This is trivially done with load_locked/store_cond,
- * but on the x86 we need an external synchronizer.
- */
-static inline void wake_one_more(struct semaphore * sem)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	sem->waking++;
-	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
-}
-
-/*
- * NOTE NOTE NOTE!
- *
- * We read owner-count _before_ getting the semaphore. This
- * is important, because the semaphore also acts as a memory
- * ordering point between reading owner_depth and reading
- * the owner.
- *
- * Why is this necessary? The "owner_depth" essentially protects
- * us from using stale owner information - in the case that this
- * process was the previous owner but somebody else is racing to
- * aquire the semaphore, the only way we can see ourselves as an
- * owner is with "owner_depth" of zero (so that we know to avoid
- * the stale value).
- *
- * In the non-race case (where we really _are_ the owner), there
- * is not going to be any question about what owner_depth is.
- *
- * In the race case, the race winner will not even get here, because
- * it will have successfully gotten the semaphore with the locked
- * decrement operation.
- *
- * Basically, we have two values, and we cannot guarantee that either
- * is really up-to-date until we have aquired the semaphore. But we
- * _can_ depend on a ordering between the two values, so we can use
- * one of them to determine whether we can trust the other:
- *
- * Cases:
- *  - owner_depth == zero: ignore the semaphore owner, because it
- *    cannot possibly be us. Somebody else may be in the process
- *    of modifying it and the zero may be "stale", but it sure isn't
- *    going to say that "we" are the owner anyway, so who cares?
- *  - owner_depth is non-zero. That means that even if somebody
- *    else wrote the non-zero count value, the write ordering requriement
- *    means that they will have written themselves as the owner, so
- *    if we now see ourselves as an owner we can trust it to be true.
- */
-static inline int waking_non_zero(struct semaphore *sem, struct task_struct *tsk)
-{
-	unsigned long flags;
-	unsigned long owner_depth = sem->owner_depth;
-	int ret = 0;
-
-	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	if (sem->waking > 0 || (owner_depth && semaphore_owner(sem) == tsk)) {
-		sem->owner = (unsigned long) tsk;
-		sem->owner_depth++;	/* Don't use the possibly stale value */
-		sem->waking--;
-		ret = 1;
-	}
-	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
-	return ret;
-}
-
 /*
 * This is ugly, but we want the default case to fall through.
 * "down_failed" is a special asm handler that calls the C
@@ -161,9 +64,7 @@ extern inline void down(struct semaphore * sem)
 		"lock ; "
 #endif
 		"decl 0(%0)\n\t"
-		"js 2f\n\t"
-		"movl %%esp,4(%0)\n"
-		"movl $1,8(%0)\n\t"
+		"js 2f\n"
 		"1:\n"
 		".section .text.lock,\"ax\"\n"
 		"2:\tpushl $1b\n\t"
@@ -185,8 +86,6 @@ extern inline int down_interruptible(struct semaphore * sem)
 #endif
 		"decl 0(%1)\n\t"
 		"js 2f\n\t"
-		"movl %%esp,4(%1)\n\t"
-		"movl $1,8(%1)\n\t"
 		"xorl %0,%0\n"
 		"1:\n"
 		".section .text.lock,\"ax\"\n"
@@ -199,6 +98,28 @@ extern inline int down_interruptible(struct semaphore * sem)
 	return result;
 }

+extern inline int down_trylock(struct semaphore * sem)
+{
+	int result;
+
+	__asm__ __volatile__(
+		"# atomic interruptible down operation\n\t"
+#ifdef __SMP__
+		"lock ; "
+#endif
+		"decl 0(%1)\n\t"
+		"js 2f\n\t"
+		"xorl %0,%0\n"
+		"1:\n"
+		".section .text.lock,\"ax\"\n"
+		"2:\tpushl $1b\n\t"
+		"jmp __down_failed_trylock\n"
+		".previous"
+		:"=a" (result)
+		:"c" (sem)
+		:"memory");
+	return result;
+}

 /*
 * Note! This is subtle. We jump to wake people up only if
@@ -210,7 +131,6 @@ extern inline void up(struct semaphore * sem)
 {
 	__asm__ __volatile__(
 		"# atomic up operation\n\t"
-		"decl 8(%0)\n\t"
 #ifdef __SMP__
 		"lock ; "
 #endif

--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -453,8 +453,7 @@ struct sock {

 #ifdef CONFIG_FILTER
 	/* Socket Filtering Instructions */
-	int			filter;
-	struct sock_filter      *filter_data;
+	struct sk_filter      	*filter;
 #endif /* CONFIG_FILTER */

 	/* This is where all the private (optional) areas that don't
@@ -790,11 +789,11 @@ extern void sklist_destroy_socket(struct sock **list, struct sock *sk);
 * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
 * than pkt_len we keep whole skb->data.
 */
-extern __inline__ int sk_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
+extern __inline__ int sk_filter(struct sk_buff *skb, struct sk_filter *filter)
 {
 	int pkt_len;

-        pkt_len = sk_run_filter(skb->data, skb->len, filter, flen);
+        pkt_len = sk_run_filter(skb, filter->insns, filter->len);
        if(!pkt_len)
                return 1;	/* Toss Packet */
        else
@@ -802,6 +801,23 @@ extern __inline__ int sk_filter(struct sk_buff *skb, struct sock_filter *filter,

 	return 0;
 }
+
+extern __inline__ void sk_filter_release(struct sock *sk, struct sk_filter *fp)
+{
+	unsigned int size = sk_filter_len(fp);
+
+	atomic_sub(size, &sk->omem_alloc);
+
+	if (atomic_dec_and_test(&fp->refcnt))
+		kfree_s(fp, size);
+}
+
+extern __inline__ void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
+{
+	atomic_inc(&fp->refcnt);
+	atomic_add(sk_filter_len(fp), &sk->omem_alloc);
+}
+
 #endif /* CONFIG_FILTER */

 /*
@@ -837,11 +853,8 @@ extern __inline__ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
                return -ENOMEM;

 #ifdef CONFIG_FILTER
-	if (sk->filter)
-	{
-		if (sk_filter(skb, sk->filter_data, sk->filter))
-			return -EPERM;	/* Toss packet */
-	}
+	if (sk->filter && sk_filter(skb, sk->filter))
+		return -EPERM;	/* Toss packet */
 #endif /* CONFIG_FILTER */

 	skb_set_owner_r(skb, sk);

--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -912,7 +912,7 @@ static __inline__ void tcp_build_and_update_options(__u32 *ptr, struct tcp_opt *
 * can generate.
 */
 extern __inline__ void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack,
-					     int offer_wscale, int wscale, __u32 tstamp)
+					     int offer_wscale, int wscale, __u32 tstamp, __u32 ts_recent)
 {
 	/* We always get an MSS option.
 	 * The option bytes which will be seen in normal data
@@ -936,7 +936,7 @@ extern __inline__ void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sa
 			*ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 						  (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
 		*ptr++ = htonl(tstamp);		/* TSVAL */
-		*ptr++ = __constant_htonl(0);	/* TSECR */
+		*ptr++ = htonl(ts_recent);	/* TSECR */
 	} else if(sack)
 		*ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 					  (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM);

--- a/kernel/ksyms.c
+++ b/kernel/ksyms.c
@@ -61,7 +61,7 @@ extern int request_dma(unsigned int dmanr, char * deviceID);
 extern void free_dma(unsigned int dmanr);
 extern spinlock_t dma_spin_lock;

-#ifdef MODVERSIONS
+#ifdef CONFIG_MODVERSIONS
 const struct module_symbol __export_Using_Versions
 __attribute__((section("__ksymtab"))) = {
 	1 /* Version version */, "Using_Versions"
@@ -322,6 +322,8 @@ EXPORT_SYMBOL(printk);
 EXPORT_SYMBOL(sprintf);
 EXPORT_SYMBOL(vsprintf);
 EXPORT_SYMBOL(kdevname);
+EXPORT_SYMBOL(bdevname);
+EXPORT_SYMBOL(cdevname);
 EXPORT_SYMBOL(simple_strtoul);
 EXPORT_SYMBOL(system_utsname);	/* UTS data */
 EXPORT_SYMBOL(uts_sem);		/* UTS semaphore */
@@ -370,6 +372,7 @@ EXPORT_SYMBOL(is_bad_inode);
 EXPORT_SYMBOL(event);
 EXPORT_SYMBOL(__down);
 EXPORT_SYMBOL(__down_interruptible);
+EXPORT_SYMBOL(__down_trylock);
 EXPORT_SYMBOL(__up);
 EXPORT_SYMBOL(brw_page);


--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -36,6 +36,7 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/mmu_context.h>
+#include <asm/semaphore-helper.h>

 #include <linux/timex.h>

@@ -863,30 +864,28 @@ void __up(struct semaphore *sem)
 	struct task_struct *tsk = current;	\
 	struct wait_queue wait = { tsk, NULL };

-#define DOWN_HEAD(task_state)						 \
-									 \
-									 \
-	tsk->state = (task_state);					 \
-	add_wait_queue(&sem->wait, &wait);				 \
-									 \
-	/*								 \
-	 * Ok, we're set up.  sem->count is known to be less than zero	 \
-	 * so we must wait.						 \
-	 *								 \
-	 * We can let go the lock for purposes of waiting.		 \
-	 * We re-acquire it after awaking so as to protect		 \
-	 * all semaphore operations.					 \
-	 *								 \
-	 * If "up()" is called before we call waking_non_zero() then	 \
-	 * we will catch it right away.  If it is called later then	 \
-	 * we will have to go through a wakeup cycle to catch it.	 \
-	 *								 \
-	 * Multiple waiters contend for the semaphore lock to see	 \
-	 * who gets to gate through and who has to wait some more.	 \
-	 */								 \
-	for (;;) {							 \
-		if (waking_non_zero(sem, tsk))	/* are we waking up?  */ \
-			break;			/* yes, exit loop */
+#define DOWN_HEAD(task_state)						\
+									\
+									\
+	tsk->state = (task_state);					\
+	add_wait_queue(&sem->wait, &wait);				\
+									\
+	/*								\
+	 * Ok, we're set up.  sem->count is known to be less than zero	\
+	 * so we must wait.						\
+	 *								\
+	 * We can let go the lock for purposes of waiting.		\
+	 * We re-acquire it after awaking so as to protect		\
+	 * all semaphore operations.					\
+	 *								\
+	 * If "up()" is called before we call waking_non_zero() then	\
+	 * we will catch it right away.  If it is called later then	\
+	 * we will have to go through a wakeup cycle to catch it.	\
+	 *								\
+	 * Multiple waiters contend for the semaphore lock to see	\
+	 * who gets to gate through and who has to wait some more.	\
+	 */								\
+	for (;;) {

 #define DOWN_TAIL(task_state)			\
 		tsk->state = (task_state);	\
@@ -898,6 +897,8 @@ void __down(struct semaphore * sem)
 {
 	DOWN_VAR
 	DOWN_HEAD(TASK_UNINTERRUPTIBLE)
+	if (waking_non_zero(sem))
+		break;
 	schedule();
 	DOWN_TAIL(TASK_UNINTERRUPTIBLE)
 }
@@ -907,10 +908,13 @@ int __down_interruptible(struct semaphore * sem)
 	DOWN_VAR
 	int ret = 0;
 	DOWN_HEAD(TASK_INTERRUPTIBLE)
-	if (signal_pending(tsk))
+
+	ret = waking_non_zero_interruptible(sem, tsk);
+	if (ret)
 	{
-		ret = -EINTR;			/* interrupted */
-		atomic_inc(&sem->count);	/* give up on down operation */
+		if (ret == 1)
+			/* ret != 0 only if we get interrupted -arca */
+			ret = 0;
 		break;
 	}
 	schedule();
@@ -918,6 +922,11 @@ int __down_interruptible(struct semaphore * sem)
 	return ret;
 }

+int __down_trylock(struct semaphore * sem)
+{
+	return waking_non_zero_trylock(sem);
+}
+
 #define	SLEEP_ON_VAR				\
 	unsigned long flags;			\
 	struct wait_queue wait;

--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -11,6 +11,8 @@
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
+ *
+ * Andi Kleen - Fix a few bad bugs and races.
 */

 #include <linux/config.h>
@@ -36,6 +38,22 @@
 #include <asm/uaccess.h>
 #include <linux/filter.h>

+/* No hurry in this branch */
+
+static u8 *load_pointer(struct sk_buff *skb, int k)
+{
+	u8 *ptr = NULL;
+
+	if (k>=SKF_NET_OFF)
+		ptr = skb->nh.raw + k - SKF_NET_OFF;
+	else if (k>=SKF_LL_OFF)
+		ptr = skb->mac.raw + k - SKF_LL_OFF;
+
+	if (ptr<skb->head && ptr < skb->tail)
+		return ptr;
+	return NULL;
+}
+
 /*
 * Decode and apply filter instructions to the skb->data.
 * Return length to keep, 0 for none. skb is the data we are
@@ -43,15 +61,19 @@
 * len is the number of filter blocks in the array.
 */
 
-int sk_run_filter(unsigned char *data, int len, struct sock_filter *filter, int flen)
+int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
 {
+	unsigned char *data = skb->data;
+	/* len is UNSIGNED. Byte wide insns relies only on implicit
+	   type casts to prevent reading arbitrary memory locations.
+	 */
+	unsigned int len = skb->len;
 	struct sock_filter *fentry;	/* We walk down these */
 	u32 A = 0;	   		/* Accumulator */
 	u32 X = 0;   			/* Index Register */
 	u32 mem[BPF_MEMWORDS];		/* Scratch Memory Store */
 	int k;
 	int pc;
-	int *t;

 	/*
 	 * Process array of filter instructions.
@@ -60,53 +82,75 @@ int sk_run_filter(unsigned char *data, int len, struct sock_filter *filter, int
 	for(pc = 0; pc < flen; pc++)
 	{
 		fentry = &filter[pc];
-		if(fentry->code & BPF_X)
-			t=&X;
-		else
-			t=&fentry->k;
 			
 		switch(fentry->code)
 		{
 			case BPF_ALU|BPF_ADD|BPF_X:
+				A += X;
+				continue;
+
 			case BPF_ALU|BPF_ADD|BPF_K:
-				A += *t;
+				A += fentry->k;
 				continue;

 			case BPF_ALU|BPF_SUB|BPF_X:
+				A -= X;
+				continue;
+
 			case BPF_ALU|BPF_SUB|BPF_K:
-				A -= *t;
+				A -= fentry->k;
 				continue;

 			case BPF_ALU|BPF_MUL|BPF_X:
+				A *= X;
+				continue;
+
 			case BPF_ALU|BPF_MUL|BPF_K:
-				A *= *t;
+				A *= X;
 				continue;

 			case BPF_ALU|BPF_DIV|BPF_X:
+				if(X == 0)
+					return (0);
+				A /= X;
+				continue;
+
 			case BPF_ALU|BPF_DIV|BPF_K:
-				if(*t == 0)
+				if(fentry->k == 0)
 					return (0);
-				A /= *t;
+				A /= fentry->k;
 				continue;

 			case BPF_ALU|BPF_AND|BPF_X:
+				A &= X;
+				continue;
+
 			case BPF_ALU|BPF_AND|BPF_K:
-				A &= *t;
+				A &= fentry->k;
 				continue;

 			case BPF_ALU|BPF_OR|BPF_X:
+				A |= X;
+				continue;
+
 			case BPF_ALU|BPF_OR|BPF_K:
-				A |= *t;
+				A |= fentry->k;
 				continue;

 			case BPF_ALU|BPF_LSH|BPF_X:
+				A <<= X;
+				continue;
+
 			case BPF_ALU|BPF_LSH|BPF_K:
-				A <<= *t;
+				A <<= fentry->k;
 				continue;

 			case BPF_ALU|BPF_RSH|BPF_X:
+				A >>= X;
+				continue;
+
 			case BPF_ALU|BPF_RSH|BPF_K:
-				A >>= *t;
+				A >>= fentry->k;
 				continue;

 			case BPF_ALU|BPF_NEG:
@@ -148,26 +192,62 @@ int sk_run_filter(unsigned char *data, int len, struct sock_filter *filter, int
 			case BPF_JMP|BPF_JSET|BPF_X:
 				pc += (A & X) ? fentry->jt : fentry->jf;
 				continue;
+
 			case BPF_LD|BPF_W|BPF_ABS:
 				k = fentry->k;
-				if(k + sizeof(long) > len)
-					return (0);
-				A = ntohl(*(long*)&data[k]);
-				continue;
+load_w:
+				if(k+sizeof(u32) <= len) {
+					A = ntohl(*(u32*)&data[k]);
+					continue;
+				}
+				if (k<0) {
+					u8 *ptr;
+
+					if (k>=SKF_AD_OFF)
+						break;
+					if ((ptr = load_pointer(skb, k)) != NULL) {
+						A = ntohl(*(u32*)ptr);
+						continue;
+					}
+				}
+				return 0;

 			case BPF_LD|BPF_H|BPF_ABS:
 				k = fentry->k;
-				if(k + sizeof(short) > len)
-					return (0);
-				A = ntohs(*(short*)&data[k]);
-				continue;
+load_h:
+				if(k + sizeof(u16) <= len) {
+					A = ntohs(*(u16*)&data[k]);
+					continue;
+				}
+				if (k<0) {
+					u8 *ptr;
+
+					if (k>=SKF_AD_OFF)
+						break;
+					if ((ptr = load_pointer(skb, k)) != NULL) {
+						A = ntohs(*(u16*)ptr);
+						continue;
+					}
+				}
+				return 0;

 			case BPF_LD|BPF_B|BPF_ABS:
 				k = fentry->k;
-				if(k >= len)
-					return (0);
-				A = data[k];
-				continue;
+load_b:
+				if(k < len) {
+					A = data[k];
+					continue;
+				}
+				if (k<0) {
+					u8 *ptr;
+
+					if (k>=SKF_AD_OFF)
+						break;
+					if ((ptr = load_pointer(skb, k)) != NULL) {
+						A = *ptr;
+						continue;
+					}
+				}

 			case BPF_LD|BPF_W|BPF_LEN:
 				A = len;
@@ -177,35 +257,23 @@ int sk_run_filter(unsigned char *data, int len, struct sock_filter *filter, int
 				X = len;
 				continue;

-                      case BPF_LD|BPF_W|BPF_IND:
+			case BPF_LD|BPF_W|BPF_IND:
 				k = X + fentry->k;
-				if(k + sizeof(u32) > len)
-					return (0);
-                                A = ntohl(*(u32 *)&data[k]);
-				continue;
+				goto load_w;

                       case BPF_LD|BPF_H|BPF_IND:
 				k = X + fentry->k;
-				if(k + sizeof(u16) > len)
-					return (0);
-				A = ntohs(*(u16*)&data[k]);
-				continue;
+				goto load_h;

                       case BPF_LD|BPF_B|BPF_IND:
 				k = X + fentry->k;
-				if(k >= len)
-					return (0);
-				A = data[k];
-				continue;
+				goto load_b;

 			case BPF_LDX|BPF_B|BPF_MSH:
-				/*
-				 *	Hack for BPF to handle TOS etc
-				 */
 				k = fentry->k;
 				if(k >= len)
 					return (0);
-				X = (data[fentry->k] & 0xf) << 2;
+				X = (data[k] & 0xf) << 2;
 				continue;

 			case BPF_LD|BPF_IMM:
@@ -216,7 +284,7 @@ int sk_run_filter(unsigned char *data, int len, struct sock_filter *filter, int
 				X = fentry->k;
 				continue;

-                       case BPF_LD|BPF_MEM:
+			case BPF_LD|BPF_MEM:
 				A = mem[fentry->k];
 				continue;

@@ -246,15 +314,29 @@ int sk_run_filter(unsigned char *data, int len, struct sock_filter *filter, int
 				mem[fentry->k] = X;
 				continue;

-
-
 			default:
 				/* Invalid instruction counts as RET */
 				return (0);
 		}
+
+		/* Handle ancillary data, which are impossible
+		   (or very difficult) to get parsing packet contents.
+		 */
+		switch (k-SKF_AD_OFF) {
+		case SKF_AD_PROTOCOL:
+			A = htons(skb->protocol);
+			continue;
+		case SKF_AD_PKTTYPE:
+			A = skb->pkt_type;
+			continue;
+		case SKF_AD_IFINDEX:
+			A = skb->dev->ifindex;
+			continue;
+		default:
+			return 0;
+		}
 	}

-	printk(KERN_ERR "Filter ruleset ran off the end.\n");
 	return (0);
 }

@@ -279,13 +361,17 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
                 
                ftest = &filter[pc];
 		if(BPF_CLASS(ftest->code) == BPF_JMP)
-		{	
+		{
 			/*
 			 *	But they mustn't jump off the end.
 			 */
 			if(BPF_OP(ftest->code) == BPF_JA)
 			{
-				if(pc + ftest->k + 1>= (unsigned)flen)
+				/* Note, the large ftest->k might cause
+				   loops. Compare this with conditional
+				   jumps below, where offsets are limited. --ANK (981016)
+				 */
+				if (ftest->k >= (unsigned)(flen-pc-1))
 					return (-EINVAL);
 			}
                        else
@@ -302,17 +388,18 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
                 *	Check that memory operations use valid addresses.
                 */
                 
-                if(ftest->k <0 || ftest->k >= BPF_MEMWORDS)
+                if (ftest->k >= BPF_MEMWORDS)
                {
                	/*
                	 *	But it might not be a memory operation...
                	 */
-                	 
-                	if (BPF_CLASS(ftest->code) == BPF_ST)
+			switch (ftest->code) {
+			case BPF_ST:	
+			case BPF_STX:	
+			case BPF_LD|BPF_MEM:	
+			case BPF_LDX|BPF_MEM:	
                		return -EINVAL;
-			if((BPF_CLASS(ftest->code) == BPF_LD) && 
-				(BPF_MODE(ftest->code) == BPF_MEM))
-	                        	return (-EINVAL);
+			}
 		}
        }

@@ -332,35 +419,36 @@ int sk_chk_filter(struct sock_filter *filter, int flen)

 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 {
-	struct sock_filter *fp, *old_filter; 
-	int fsize = sizeof(struct sock_filter) * fprog->len;
+	struct sk_filter *fp; 
+	unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
 	int err;

 	/* Make sure new filter is there and in the right amounts. */
-        if(fprog->filter == NULL || fprog->len == 0 || fsize > BPF_MAXINSNS)
+        if (fprog->filter == NULL || fprog->len > BPF_MAXINSNS)
                return (-EINVAL);

-	if((err = sk_chk_filter(fprog->filter, fprog->len))==0)
-	{
-		/* If existing filter, remove it first */
-		if(sk->filter)
-		{
-			old_filter = sk->filter_data;
-			kfree_s(old_filter, (sizeof(old_filter) * sk->filter));
-			sk->filter_data = NULL;
-		}
+	fp = (struct sk_filter *)sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
+	if(fp == NULL)
+		return (-ENOMEM);

-		fp = (struct sock_filter *)kmalloc(fsize, GFP_KERNEL);
-		if(fp == NULL)
-			return (-ENOMEM);
+	if (copy_from_user(fp->insns, fprog->filter, fsize)) {
+		sock_kfree_s(sk, fp, fsize+sizeof(*fp)); 
+		return -EFAULT;
+	}

-		memset(fp,0,sizeof(*fp));
-		memcpy(fp, fprog->filter, fsize);	/* Copy instructions */
+	atomic_set(&fp->refcnt, 1);
+	fp->len = fprog->len;

-		sk->filter = fprog->len;	/* Number of filter blocks */
-		sk->filter_data = fp;		/* Filter instructions */
+	if ((err = sk_chk_filter(fp->insns, fp->len))==0) {
+		struct sk_filter *old_fp = sk->filter;
+		sk->filter = fp;
+		wmb();
+		fp = old_fp;
 	}

+	if (fp)
+		sk_filter_release(sk, fp);
+
 	return (err);
 }
 #endif /* CONFIG_FILTER */
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -155,10 +155,6 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 	int err;
 	struct linger ling;
 	int ret = 0;
-
-#ifdef CONFIG_FILTER
-	struct sock_fprog fprog;
-#endif
 	
 	/*
 	 *	Options without arguments
@@ -256,12 +252,13 @@ int sock_setsockopt(struct socket *sock, int level, int optname,

 		case SO_PRIORITY:
 			if (val >= 0 && val <= 7) 
+			{
+				if(val==7 && !capable(CAP_NET_ADMIN))
+					return -EPERM;
 				sk->priority = val;
-			else
-				return(-EINVAL);
+			}			
 			break;

-
 		case SO_LINGER:
 			if(optlen<sizeof(ling))
 				return -EINVAL;	/* 1003.1g */
@@ -310,10 +307,12 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 				if (optlen > IFNAMSIZ) 
 					optlen = IFNAMSIZ; 
 				if (copy_from_user(devname, optval, optlen))
-				    return -EFAULT;
-				    
+					return -EFAULT;
+
 				/* Remove any cached route for this socket. */
+				lock_sock(sk);
 				dst_release(xchg(&sk->dst_cache, NULL));
+				release_sock(sk);

 				if (devname[0] == '\0') {
 					sk->bound_dev_if = 0;
@@ -331,30 +330,32 @@ int sock_setsockopt(struct socket *sock, int level, int optname,

 #ifdef CONFIG_FILTER
 		case SO_ATTACH_FILTER:
-			if(optlen < sizeof(struct sock_fprog))
-				return -EINVAL;
+			ret = -EINVAL;
+			if (optlen == sizeof(struct sock_fprog)) {
+				struct sock_fprog fprog;

-			if(copy_from_user(&fprog, optval, sizeof(fprog)))
-			{
 				ret = -EFAULT;
-				break;
-			}
+				if (copy_from_user(&fprog, optval, sizeof(fprog)))
+					break;

-			ret = sk_attach_filter(&fprog, sk);
+				ret = sk_attach_filter(&fprog, sk);
+			}
 			break;

 		case SO_DETACH_FILTER:
-                        if(sk->filter)
-			{
-				fprog.filter = sk->filter_data;
-				kfree_s(fprog.filter, (sizeof(fprog.filter) * sk->filter));
-				sk->filter_data = NULL;
-				sk->filter = 0;
+                        if(sk->filter) {
+				struct sk_filter *filter;
+
+				filter = sk->filter;
+
+				sk->filter = NULL;
+				wmb();
+				
+				if (filter)
+					sk_filter_release(sk, filter);
 				return 0;
 			}
-			else
-				return -EINVAL;
-			break;
+			return -ENOENT;
 #endif
 		/* We implement the SO_SNDLOWAT etc to
 		   not be settable (1003.1g 5.3) */
@@ -504,6 +505,16 @@ void sk_free(struct sock *sk)
 	if (sk->destruct)
 		sk->destruct(sk);

+#ifdef CONFIG_FILTER
+	if (sk->filter) {
+		sk_filter_release(sk, sk->filter);
+		sk->filter = NULL;
+	}
+#endif
+
+	if (atomic_read(&sk->omem_alloc))
+		printk(KERN_DEBUG "sk_free: optmem leakage (%d bytes) detected.\n", atomic_read(&sk->omem_alloc));
+
 	kmem_cache_free(sk_cachep, sk);
 }


--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1323,6 +1323,10 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
 		newsk->pair = NULL;
 		skb_queue_head_init(&newsk->back_log);
 		skb_queue_head_init(&newsk->error_queue);
+#ifdef CONFIG_FILTER
+		if (newsk->filter)
+			sk_filter_charge(newsk, newsk->filter);
+#endif

 		/* Now setup tcp_opt */
 		newtp = &(newsk->tp_pinfo.af_tcp);
@@ -1553,12 +1557,10 @@ static inline struct sock *tcp_v4_hnd_req(struct sock *sk,struct sk_buff *skb)

 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 {
+
 #ifdef CONFIG_FILTER
-	if (sk->filter)
-	{
-		if (sk_filter(skb, sk->filter_data, sk->filter))
-			goto discard;
-	}
+	if (sk->filter && sk_filter(skb, sk->filter))
+		goto discard;
 #endif /* CONFIG_FILTER */

 	/* 

--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -30,6 +30,7 @@
 *		David S. Miller	:	Charge memory using the right skb
 *					during syn/ack processing.
 *		David S. Miller :	Output engine completely rewritten.
+ *		Andrea Arcangeli:	SYNACK carry ts_recent in tsecr.
 *
 */

@@ -135,7 +136,8 @@ void tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 					      (sysctl_flags & SYSCTL_FLAG_SACK),
 					      (sysctl_flags & SYSCTL_FLAG_WSCALE),
 					      tp->rcv_wscale,
-					      TCP_SKB_CB(skb)->when);
+					      TCP_SKB_CB(skb)->when,
+		      			      tp->ts_recent);
 		} else {
 			tcp_build_and_update_options((__u32 *)(th + 1),
 						     tp, TCP_SKB_CB(skb)->when);
@@ -862,7 +864,8 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	TCP_SKB_CB(skb)->when = jiffies;
 	tcp_syn_build_options((__u32 *)(th + 1), req->mss, req->tstamp_ok,
 			      req->sack_ok, req->wscale_ok, req->rcv_wscale,
-			      TCP_SKB_CB(skb)->when);
+			      TCP_SKB_CB(skb)->when,
+			      req->ts_recent);

 	skb->csum = 0;
 	th->doff = (tcp_header_size >> 2);

--- a/net/ipv4/timer.c
+++ b/net/ipv4/timer.c
@@ -75,8 +75,7 @@ void net_timer (unsigned long data)
 	/* Only process if socket is not in use. */
 	if (atomic_read(&sk->sock_readers)) {
 		/* Try again later. */ 
-		sk->timer.expires = jiffies+HZ/20;
-		add_timer(&sk->timer);
+		mod_timer(&sk->timer, jiffies+HZ/20);
 		return;
 	}