Commit 351ae16b authored by Linus Torvalds's avatar Linus Torvalds

Import 2.2.2pre5

parent 724170c9
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -241,7 +241,7 @@ CONFIG_EEXPRESS_PRO100=y
# CONFIG_ISDN is not set
#
# CD-ROM drivers (not for SCSI or IDE/ATAPI drives)
# Old CD-ROM drivers (not SCSI, not IDE)
#
# CONFIG_CD_NO_IDESCSI is not set
......
......@@ -43,6 +43,7 @@ EXPORT_SYMBOL(kernel_thread);
EXPORT_SYMBOL_NOVERS(__down_failed);
EXPORT_SYMBOL_NOVERS(__down_failed_interruptible);
EXPORT_SYMBOL_NOVERS(__down_failed_trylock);
EXPORT_SYMBOL_NOVERS(__up_wakeup);
/* Networking helper routines. */
EXPORT_SYMBOL(csum_partial_copy);
......
......@@ -569,6 +569,9 @@ static int __init assign_irq_vector(int irq)
printk("WARNING: ASSIGN_IRQ_VECTOR wrapped back to %02X\n",
current_vector);
}
if (current_vector == SYSCALL_VECTOR)
panic("ran out of interrupt sources!");
IO_APIC_VECTOR(irq) = current_vector;
return current_vector;
}
......@@ -693,9 +696,11 @@ void __init print_IO_APIC(void)
printk(".... register #01: %08X\n", *(int *)&reg_01);
printk("....... : max redirection entries: %04X\n", reg_01.entries);
if ( (reg_01.entries != 0x0f) && /* ISA-only Neptune boards */
(reg_01.entries != 0x17) && /* ISA+PCI boards */
(reg_01.entries != 0x3F) /* Xeon boards */
if ( (reg_01.entries != 0x0f) && /* older (Neptune) boards */
(reg_01.entries != 0x17) && /* typical ISA+PCI boards */
(reg_01.entries != 0x1b) && /* Compaq Proliant boards */
(reg_01.entries != 0x1f) && /* dual Xeon boards */
(reg_01.entries != 0x3F) /* bigger Xeon boards */
)
UNEXPECTED_IO_APIC();
if (reg_01.entries == 0x0f)
......@@ -1163,7 +1168,7 @@ static inline void init_IO_APIC_traps(void)
* 0x80, because int 0x80 is hm, kind of importantish. ;)
*/
for (i = 0; i < NR_IRQS ; i++) {
if (IO_APIC_IRQ(i)) {
if (IO_APIC_VECTOR(i) > 0) {
if (IO_APIC_irq_trigger(i))
irq_desc[i].handler = &ioapic_level_irq_type;
else
......@@ -1173,8 +1178,15 @@ static inline void init_IO_APIC_traps(void)
*/
if (i < 16)
disable_8259A_irq(i);
}
} else
/*
* we have no business changing low ISA
* IRQs.
*/
if (IO_APIC_IRQ(i))
irq_desc[i].handler = &no_irq_type;
}
init_IRQ_SMP();
}
/*
......@@ -1278,14 +1290,12 @@ void __init setup_IO_APIC(void)
construct_default_ISA_mptable();
}
init_IO_APIC_traps();
/*
* Set up the IO-APIC IRQ routing table by parsing the MP-BIOS
* mptable:
*/
setup_IO_APIC_irqs();
init_IRQ_SMP();
init_IO_APIC_traps();
check_timer();
print_IO_APIC();
......
......@@ -70,11 +70,34 @@ atomic_t nmi_counter;
*/
spinlock_t irq_controller_lock;
/*
* Dummy controller type for unused interrupts
*/
static void do_none(unsigned int irq, struct pt_regs * regs) { }
static void do_none(unsigned int irq, struct pt_regs * regs)
{
/*
* we are careful. While for ISA irqs it's common to happen
* outside of any driver (think autodetection), this is not
* at all nice for PCI interrupts. So we are stricter and
* print a warning when such spurious interrupts happen.
* Spurious interrupts can confuse other drivers if the PCI
* IRQ line is shared.
*
* Such spurious interrupts are either driver bugs, or
* sometimes hw (chipset) bugs.
*/
printk("unexpected IRQ vector %d on CPU#%d!\n",irq, smp_processor_id());
#ifdef __SMP__
/*
* [currently unexpected vectors happen only on SMP and APIC.
* if we want to have non-APIC and non-8259A controllers
* in the future with unexpected vectors, this ack should
* probably be made controller-specific.]
*/
ack_APIC_irq();
#endif
}
static void enable_none(unsigned int irq) { }
static void disable_none(unsigned int irq) { }
......@@ -82,7 +105,7 @@ static void disable_none(unsigned int irq) { }
#define startup_none enable_none
#define shutdown_none disable_none
static struct hw_interrupt_type no_irq_type = {
struct hw_interrupt_type no_irq_type = {
"none",
startup_none,
shutdown_none,
......@@ -141,10 +164,10 @@ static unsigned int cached_irq_mask = 0xffff;
* fed to the CPU IRQ line directly.
*
* Any '1' bit in this mask means the IRQ is routed through the IO-APIC.
* this 'mixed mode' IRQ handling costs us one more branch in do_IRQ,
* but we have _much_ higher compatibility and robustness this way.
* this 'mixed mode' IRQ handling costs nothing because it's only used
* at IRQ setup time.
*/
unsigned long long io_apic_irqs = 0;
unsigned long io_apic_irqs = 0;
/*
* These have to be protected by the irq controller spinlock
......@@ -254,32 +277,43 @@ static void do_8259A_IRQ(unsigned int irq, struct pt_regs * regs)
BUILD_COMMON_IRQ()
#define BI(x,y) \
BUILD_IRQ(##x##y)
#define BUILD_16_IRQS(x) \
BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
BI(x,c) BI(x,d) BI(x,e) BI(x,f)
/*
* ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
* (these are usually mapped to vectors 0x20-0x30)
*/
BUILD_IRQ(0) BUILD_IRQ(1) BUILD_IRQ(2) BUILD_IRQ(3)
BUILD_IRQ(4) BUILD_IRQ(5) BUILD_IRQ(6) BUILD_IRQ(7)
BUILD_IRQ(8) BUILD_IRQ(9) BUILD_IRQ(10) BUILD_IRQ(11)
BUILD_IRQ(12) BUILD_IRQ(13) BUILD_IRQ(14) BUILD_IRQ(15)
BUILD_16_IRQS(0x0)
#ifdef CONFIG_X86_IO_APIC
/*
* The IO-APIC gives us many more interrupt sources..
* The IO-APIC gives us many more interrupt sources. Most of these
* are unused but an SMP system is supposed to have enough memory ...
* sometimes (mostly wrt. hw bugs) we get corrupted vectors all
* across the spectrum, so we really want to be prepared to get all
* of these. Plus, more powerful systems might have more than 64
* IO-APIC registers.
*
* (these are usually mapped into the 0x30-0xff vector range)
*/
BUILD_IRQ(16) BUILD_IRQ(17) BUILD_IRQ(18) BUILD_IRQ(19)
BUILD_IRQ(20) BUILD_IRQ(21) BUILD_IRQ(22) BUILD_IRQ(23)
BUILD_IRQ(24) BUILD_IRQ(25) BUILD_IRQ(26) BUILD_IRQ(27)
BUILD_IRQ(28) BUILD_IRQ(29) BUILD_IRQ(30) BUILD_IRQ(31)
BUILD_IRQ(32) BUILD_IRQ(33) BUILD_IRQ(34) BUILD_IRQ(35)
BUILD_IRQ(36) BUILD_IRQ(37) BUILD_IRQ(38) BUILD_IRQ(39)
BUILD_IRQ(40) BUILD_IRQ(41) BUILD_IRQ(42) BUILD_IRQ(43)
BUILD_IRQ(44) BUILD_IRQ(45) BUILD_IRQ(46) BUILD_IRQ(47)
BUILD_IRQ(48) BUILD_IRQ(49) BUILD_IRQ(50) BUILD_IRQ(51)
BUILD_IRQ(52) BUILD_IRQ(53) BUILD_IRQ(54) BUILD_IRQ(55)
BUILD_IRQ(56) BUILD_IRQ(57) BUILD_IRQ(58) BUILD_IRQ(59)
BUILD_IRQ(60) BUILD_IRQ(61) BUILD_IRQ(62) BUILD_IRQ(63)
BUILD_16_IRQS(0x1) BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd)
#endif
#undef BUILD_16_IRQS
#undef BI
#ifdef __SMP__
/*
* The following vectors are part of the Linux architecture, there
......@@ -303,37 +337,35 @@ BUILD_SMP_TIMER_INTERRUPT(apic_timer_interrupt)
#endif
#define IRQ(x,y) \
IRQ##x##y##_interrupt
#define IRQLIST_16(x) \
IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
static void (*interrupt[NR_IRQS])(void) = {
IRQ0_interrupt, IRQ1_interrupt, IRQ2_interrupt, IRQ3_interrupt,
IRQ4_interrupt, IRQ5_interrupt, IRQ6_interrupt, IRQ7_interrupt,
IRQ8_interrupt, IRQ9_interrupt, IRQ10_interrupt, IRQ11_interrupt,
IRQ12_interrupt, IRQ13_interrupt, IRQ14_interrupt, IRQ15_interrupt
IRQLIST_16(0x0),
#ifdef CONFIG_X86_IO_APIC
,IRQ16_interrupt, IRQ17_interrupt, IRQ18_interrupt, IRQ19_interrupt,
IRQ20_interrupt, IRQ21_interrupt, IRQ22_interrupt, IRQ23_interrupt,
IRQ24_interrupt, IRQ25_interrupt, IRQ26_interrupt, IRQ27_interrupt,
IRQ28_interrupt, IRQ29_interrupt,
IRQ30_interrupt, IRQ31_interrupt, IRQ32_interrupt, IRQ33_interrupt,
IRQ34_interrupt, IRQ35_interrupt, IRQ36_interrupt, IRQ37_interrupt,
IRQ38_interrupt, IRQ39_interrupt,
IRQ40_interrupt, IRQ41_interrupt, IRQ42_interrupt, IRQ43_interrupt,
IRQ44_interrupt, IRQ45_interrupt, IRQ46_interrupt, IRQ47_interrupt,
IRQ48_interrupt, IRQ49_interrupt,
IRQ50_interrupt, IRQ51_interrupt, IRQ52_interrupt, IRQ53_interrupt,
IRQ54_interrupt, IRQ55_interrupt, IRQ56_interrupt, IRQ57_interrupt,
IRQ58_interrupt, IRQ59_interrupt,
IRQ60_interrupt, IRQ61_interrupt, IRQ62_interrupt, IRQ63_interrupt
IRQLIST_16(0x1), IRQLIST_16(0x2), IRQLIST_16(0x3),
IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
IRQLIST_16(0xc), IRQLIST_16(0xd)
#endif
};
#undef IRQ
#undef IRQLIST_16
/*
* Initial irq handlers.
* Special irq handlers.
*/
void no_action(int cpl, void *dev_id, struct pt_regs *regs)
{
}
void no_action(int cpl, void *dev_id, struct pt_regs *regs) { }
#ifndef CONFIG_VISWS
/*
......@@ -770,7 +802,7 @@ asmlinkage void do_IRQ(struct pt_regs regs)
* 0 return value means that this irq is already being
* handled by some other CPU. (or is disabled)
*/
unsigned int irq = regs.orig_eax & 0xff;
int irq = regs.orig_eax & 0xff; /* subtle, see irq.h */
int cpu = smp_processor_id();
kstat.irqs[cpu][irq]++;
......@@ -986,42 +1018,6 @@ int probe_irq_off(unsigned long unused)
return irq_found;
}
/*
* Silly, horrible hack
*/
static char uglybuffer[10*256];
__asm__("\n" __ALIGN_STR"\n"
"common_unexpected:\n\t"
SAVE_ALL
"pushl $ret_from_intr\n\t"
"jmp strange_interrupt");
void strange_interrupt(int irqnum)
{
printk("Unexpected interrupt %d\n", irqnum & 255);
for (;;);
}
extern int common_unexpected;
__initfunc(void init_unexpected_irq(void))
{
int i;
for (i = 0; i < 256; i++) {
char *code = uglybuffer + 10*i;
unsigned long jumpto = (unsigned long) &common_unexpected;
jumpto -= (unsigned long)(code+10);
code[0] = 0x68; /* pushl */
*(int *)(code+1) = i - 512;
code[5] = 0xe9; /* jmp */
*(int *)(code+6) = jumpto;
set_intr_gate(i,code);
}
}
void init_ISA_irqs (void)
{
int i;
......@@ -1033,7 +1029,7 @@ void init_ISA_irqs (void)
if (i < 16) {
/*
* 16 old-style INTA-cycle interrupt gates:
* 16 old-style INTA-cycle interrupts:
*/
irq_desc[i].handler = &i8259A_irq_type;
} else {
......@@ -1054,9 +1050,16 @@ __initfunc(void init_IRQ(void))
#else
init_VISWS_APIC_irqs();
#endif
for (i = 0; i < 16; i++)
set_intr_gate(0x20+i,interrupt[i]);
/*
* Cover the whole vector space, no vector can escape
* us. (some of these will be overridden and become
* 'special' SMP interrupts)
*/
for (i = 0; i < NR_IRQS; i++) {
int vector = FIRST_EXTERNAL_VECTOR + i;
if (vector != SYSCALL_VECTOR)
set_intr_gate(vector, interrupt[i]);
}
#ifdef __SMP__
......@@ -1067,13 +1070,9 @@ __initfunc(void init_IRQ(void))
set_intr_gate(IRQ0_TRAP_VECTOR, interrupt[0]);
/*
* The reschedule interrupt slowly changes it's functionality,
* while so far it was a kind of broadcasted timer interrupt,
* in the future it should become a CPU-to-CPU rescheduling IPI,
* driven by schedule() ?
* The reschedule interrupt is a CPU-to-CPU reschedule-helper
* IPI, driven by wakeup.
*/
/* IPI for rescheduling */
set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
/* IPI for invalidation */
......
......@@ -16,6 +16,7 @@ struct hw_interrupt_type {
void (*disable)(unsigned int irq);
};
extern struct hw_interrupt_type no_irq_type;
/*
* IRQ line status.
......@@ -40,6 +41,18 @@ typedef struct {
unsigned int depth; /* Disable depth for nested irq disables */
} irq_desc_t;
/*
* IDT vectors usable for external interrupt sources start
* at 0x20:
*/
#define FIRST_EXTERNAL_VECTOR 0x20
#define SYSCALL_VECTOR 0x80
/*
* Vectors 0x20-0x2f are used for ISA interrupts.
*/
/*
* Special IRQ vectors used by the SMP architecture:
*
......@@ -54,7 +67,7 @@ typedef struct {
#define MTRR_CHANGE_VECTOR 0x50
/*
* First vector available to drivers: (vectors 0x51-0xfe)
* First APIC vector available to drivers: (vectors 0x51-0xfe)
*/
#define IRQ0_TRAP_VECTOR 0x51
......@@ -94,7 +107,9 @@ extern void send_IPI(int dest, int vector);
extern void init_pic_mode(void);
extern void print_IO_APIC(void);
extern unsigned long long io_apic_irqs;
extern unsigned long io_apic_irqs;
extern char _stext, _etext;
#define MAX_IRQ_SOURCES 128
#define MAX_MP_BUSSES 32
......@@ -126,7 +141,7 @@ static inline void irq_exit(int cpu, unsigned int irq)
hardirq_exit(cpu);
}
#define IO_APIC_IRQ(x) ((1<<x) & io_apic_irqs)
#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))
#else
......@@ -201,6 +216,13 @@ __asm__( \
"pushl $ret_from_intr\n\t" \
"jmp "SYMBOL_NAME_STR(do_IRQ));
/*
* subtle. orig_eax is used by the signal code to distinct between
* system calls and interrupted 'random user-space'. Thus we have
* to put a negative value into orig_eax here. (the problem is that
* both system calls and IRQs want to have small integer numbers in
* orig_eax, and the syscall code has won the optimization conflict ;)
*/
#define BUILD_IRQ(nr) \
asmlinkage void IRQ_NAME(nr); \
__asm__( \
......@@ -216,7 +238,6 @@ SYMBOL_NAME_STR(IRQ) #nr "_interrupt:\n\t" \
static inline void x86_do_profile (unsigned long eip)
{
if (prof_buffer && current->pid) {
extern int _stext;
eip -= (unsigned long) &_stext;
eip >>= prof_shift;
/*
......
......@@ -807,7 +807,7 @@ int get_cpuinfo(char * buffer)
c->x86_model,
c->x86_model_id[0] ? c->x86_model_id : "unknown");
if (c->x86_mask)
if (c->x86_mask || c->cpuid_level >= 0)
p += sprintf(p, "stepping\t: %d\n", c->x86_mask);
else
p += sprintf(p, "stepping\t: unknown\n");
......
......@@ -42,7 +42,7 @@
#include "irq.h"
extern unsigned long start_kernel, _etext;
extern unsigned long start_kernel;
extern void update_one_process( struct task_struct *p,
unsigned long ticks, unsigned long user,
unsigned long system, int cpu);
......@@ -319,8 +319,17 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
printk("Processor #%d unused. (Max %d processors).\n",m->mpc_apicid, NR_CPUS);
else
{
int ver = m->mpc_apicver;
cpu_present_map|=(1<<m->mpc_apicid);
apic_version[m->mpc_apicid]=m->mpc_apicver;
/*
* Validate version
*/
if (ver == 0x0) {
printk("BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
ver = 0x10;
}
apic_version[m->mpc_apicid] = ver;
}
}
mpt+=sizeof(*m);
......@@ -1806,8 +1815,10 @@ asmlinkage void smp_mtrr_interrupt(void)
*/
asmlinkage void smp_spurious_interrupt(void)
{
/* ack_APIC_irq(); see sw-dev-man vol 3, chapter 7.4.13.5 */
printk("spurious APIC interrupt, ayiee, should never happen.\n");
ack_APIC_irq();
/* see sw-dev-man vol 3, chapter 7.4.13.5 */
printk("spurious APIC interrupt on CPU#%d, should never happen.\n",
smp_processor_id());
}
/*
......@@ -2058,3 +2069,4 @@ int setup_profiling_timer(unsigned int multiplier)
}
#undef APIC_DIVISOR
......@@ -42,6 +42,8 @@
#include <asm/lithium.h>
#endif
#include "irq.h"
asmlinkage int system_call(void);
asmlinkage void lcall7(void);
......@@ -125,7 +127,6 @@ static void show_registers(struct pt_regs *regs)
unsigned long esp;
unsigned short ss;
unsigned long *stack, addr, module_start, module_end;
extern char _stext, _etext;
esp = (unsigned long) (1+regs);
ss = __KERNEL_DS;
......@@ -669,9 +670,6 @@ cobalt_init(void)
#endif
void __init trap_init(void)
{
/* Initially up all of the IDT to jump to unexpected */
init_unexpected_irq();
if (readl(0x0FFFD9) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24))
EISA_bus = 1;
set_call_gate(&default_ldt,lcall7);
......@@ -693,7 +691,7 @@ void __init trap_init(void)
set_trap_gate(15,&spurious_interrupt_bug);
set_trap_gate(16,&coprocessor_error);
set_trap_gate(17,&alignment_check);
set_system_gate(0x80,&system_call);
set_system_gate(SYSCALL_VECTOR,&system_call);
/* set up GDT task & ldt entries */
set_tss_desc(0, &init_task.tss);
......
......@@ -31,6 +31,15 @@ ENTRY(__down_failed_interruptible)
popl %edx /* restore %edx */
ret
/* Don't save/restore %eax, because that will be our return value */
ENTRY(__down_failed_trylock)
pushl %edx /* save %edx */
pushl %ecx /* save %ecx (and argument) */
call SYMBOL_NAME(__down_trylock)
popl %ecx /* restore %ecx (count on __down_trylock not changing it) */
popl %edx /* restore %edx */
ret
ENTRY(__up_wakeup)
pushl %eax /* save %eax */
pushl %edx /* save %edx */
......
......@@ -120,6 +120,7 @@ static struct bttv bttvs[BTTV_MAX];
#define I2C_TIMING (0x7<<4)
#define I2C_DELAY 10
#define I2C_SET(CTRL,DATA) \
{ btwrite((CTRL<<1)|(DATA), BT848_I2C); udelay(I2C_DELAY); }
#define I2C_GET() (btread(BT848_I2C)&1)
......@@ -244,6 +245,7 @@ static void i2c_setlines(struct i2c_bus *bus,int ctrl,int data)
{
struct bttv *btv = (struct bttv*)bus->data;
btwrite((ctrl<<1)|data, BT848_I2C);
btread(BT848_I2C); /* flush buffers */
udelay(I2C_DELAY);
}
......
......@@ -774,7 +774,6 @@ static int msp3410d_thread(void *data)
goto done;
dprintk("msp3410: thread: sleep\n");
down_interruptible(&sem);
sem.owner = 0;
dprintk("msp3410: thread: wakeup\n");
if (msp->rmmod)
goto done;
......
......@@ -731,9 +731,6 @@ int idescsi_queue (Scsi_Cmnd *cmd, void (*done)(Scsi_Cmnd *))
printk (KERN_ERR "ide-scsi: drive id %d not present\n", cmd->target);
goto abort;
}
if (cmd->lun != 0) { /* Only respond to LUN 0. Drop others */
goto abort;
}
scsi = drive->driver_data;
pc = kmalloc (sizeof (idescsi_pc_t), GFP_ATOMIC);
rq = kmalloc (sizeof (struct request), GFP_ATOMIC);
......
......@@ -1972,7 +1972,6 @@ scsi_error_handler(void * data)
*/
SCSI_LOG_ERROR_RECOVERY(1,printk("Error handler sleeping\n"));
down_interruptible (&sem);
sem.owner = 0;
if (signal_pending(current) )
break;
......
......@@ -385,7 +385,9 @@ asmlinkage int sys_fdatasync(unsigned int fd)
goto out_putf;
/* this needs further work, at the moment it is identical to fsync() */
down(&inode->i_sem);
err = file->f_op->fsync(file, dentry);
up(&inode->i_sem);
out_putf:
fput(file);
......@@ -812,8 +814,8 @@ void refile_buffer(struct buffer_head * buf)
* If too high a percentage of the buffers are dirty...
*/
if (nr_buffers_type[BUF_DIRTY] > too_many ||
(size_buffers_type[BUF_DIRTY] + size_buffers_type[BUF_LOCKED])/PAGE_SIZE > too_large) {
if (nr_buffers_type[BUF_LOCKED] > 2 * bdf_prm.b_un.ndirty)
size_buffers_type[BUF_DIRTY]/PAGE_SIZE > too_large) {
if (nr_buffers_type[BUF_LOCKED] > 3 * bdf_prm.b_un.ndirty)
wakeup_bdflush(1);
else
wakeup_bdflush(0);
......@@ -1767,7 +1769,7 @@ int bdflush(void * unused)
#ifdef DEBUG
for(nlist = 0; nlist < NR_LIST; nlist++)
#else
for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
for(nlist = BUF_LOCKED; nlist <= BUF_DIRTY; nlist++)
#endif
{
ndirty = 0;
......@@ -1786,11 +1788,16 @@ int bdflush(void * unused)
}
/* Clean buffer on dirty list? Refile it */
if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh))
{
refile_buffer(bh);
continue;
}
if (nlist == BUF_DIRTY && !buffer_dirty(bh)) {
refile_buffer(bh);
continue;
}
/* Unlocked buffer on locked list? Refile it */
if (nlist == BUF_LOCKED && !buffer_locked(bh)) {
refile_buffer(bh);
continue;
}
if (buffer_locked(bh) || !buffer_dirty(bh))
continue;
......
1999-01-30 a sun <asun@hecate.darksunrising.blah>
* catalog.c (hfs_cat_move): fixed corruption problem with
renames.
1999-01-27 a sun <asun@hecate.darksunrising.blah>
* file_hdr.c (get/set_dates): got rid of broken afpd times. NOTE:
you must use netatalk-1.4b2+asun2.1.2 or newer for this.
1998-12-20 a sun <asun@hecate.darksunrising.blah>
* bdelete.c (del_root): assign bthLNode and bthFNode only if the
......
......@@ -1348,7 +1348,7 @@ int hfs_cat_move(struct hfs_cat_entry *old_dir, struct hfs_cat_entry *new_dir,
hfs_sleep_on(&mdb->rename_wait);
}
spin_lock(&entry_lock);
mdb->rename_lock = 1;
mdb->rename_lock = 1; /* XXX: should be atomic_inc */
spin_unlock(&entry_lock);
/* keep readers from getting confused by changing dir size */
......@@ -1385,7 +1385,6 @@ int hfs_cat_move(struct hfs_cat_entry *old_dir, struct hfs_cat_entry *new_dir,
restart:
/* see if the destination exists, getting it if it does */
dest = hfs_cat_get(mdb, new_key);
if (!dest) {
/* destination doesn't exist, so create it */
struct hfs_cat_rec new_record;
......@@ -1408,14 +1407,16 @@ int hfs_cat_move(struct hfs_cat_entry *old_dir, struct hfs_cat_entry *new_dir,
goto bail3;
}
/* build the new record */
/* build the new record. make sure to zero out the
record. */
memset(&new_record, 0, sizeof(new_record));
new_record.cdrType = entry->type;
__write_entry(entry, &new_record);
/* insert the new record */
error = hfs_binsert(mdb->cat_tree, HFS_BKEY(new_key),
&new_record, is_dir ? 2 + sizeof(DIR_REC) :
2 + sizeof(FIL_REC));
2 + sizeof(FIL_REC));
if (error == -EEXIST) {
delete_entry(dest);
unlock_entry(dest);
......@@ -1565,7 +1566,7 @@ int hfs_cat_move(struct hfs_cat_entry *old_dir, struct hfs_cat_entry *new_dir,
}
end_write(new_dir);
spin_lock(&entry_lock);
mdb->rename_lock = 0;
mdb->rename_lock = 0; /* XXX: should use atomic_dec */
hfs_wake_up(&mdb->rename_wait);
spin_unlock(&entry_lock);
......
......@@ -303,16 +303,9 @@ static inline void adjust_forks(struct hfs_cat_entry *entry,
static void get_dates(const struct hfs_cat_entry *entry,
const struct inode *inode, hfs_u32 dates[3])
{
if (HFS_SB(inode->i_sb)->s_afpd) {
/* AFPD compatible: use un*x times */
dates[0] = htonl(hfs_m_to_utime(entry->create_date));
dates[1] = htonl(hfs_m_to_utime(entry->modify_date));
dates[2] = htonl(hfs_m_to_utime(entry->backup_date));
} else {
dates[0] = hfs_m_to_htime(entry->create_date);
dates[1] = hfs_m_to_htime(entry->modify_date);
dates[2] = hfs_m_to_htime(entry->backup_date);
}
dates[0] = hfs_m_to_htime(entry->create_date);
dates[1] = hfs_m_to_htime(entry->modify_date);
dates[2] = hfs_m_to_htime(entry->backup_date);
}
/*
......@@ -322,43 +315,23 @@ static void set_dates(struct hfs_cat_entry *entry, struct inode *inode,
const hfs_u32 *dates)
{
hfs_u32 tmp;
if (HFS_SB(inode->i_sb)->s_afpd) {
/* AFPD compatible: use un*x times */
tmp = hfs_u_to_mtime(ntohl(dates[0]));
if (entry->create_date != tmp) {
entry->create_date = tmp;
hfs_cat_mark_dirty(entry);
}
tmp = hfs_u_to_mtime(ntohl(dates[1]));
if (entry->modify_date != tmp) {
entry->modify_date = tmp;
inode->i_ctime = inode->i_atime = inode->i_mtime =
ntohl(dates[1]);
hfs_cat_mark_dirty(entry);
}
tmp = hfs_u_to_mtime(ntohl(dates[2]));
if (entry->backup_date != tmp) {
entry->backup_date = tmp;
hfs_cat_mark_dirty(entry);
}
} else {
tmp = hfs_h_to_mtime(dates[0]);
if (entry->create_date != tmp) {
entry->create_date = tmp;
hfs_cat_mark_dirty(entry);
}
tmp = hfs_h_to_mtime(dates[1]);
if (entry->modify_date != tmp) {
entry->modify_date = tmp;
inode->i_ctime = inode->i_atime = inode->i_mtime =
hfs_h_to_utime(dates[1]);
hfs_cat_mark_dirty(entry);
}
tmp = hfs_h_to_mtime(dates[2]);
if (entry->backup_date != tmp) {
entry->backup_date = tmp;
hfs_cat_mark_dirty(entry);
}
tmp = hfs_h_to_mtime(dates[0]);
if (entry->create_date != tmp) {
entry->create_date = tmp;
hfs_cat_mark_dirty(entry);
}
tmp = hfs_h_to_mtime(dates[1]);
if (entry->modify_date != tmp) {
entry->modify_date = tmp;
inode->i_ctime = inode->i_atime = inode->i_mtime =
hfs_h_to_utime(dates[1]);
hfs_cat_mark_dirty(entry);
}
tmp = hfs_h_to_mtime(dates[2]);
if (entry->backup_date != tmp) {
entry->backup_date = tmp;
hfs_cat_mark_dirty(entry);
}
}
......
......@@ -478,7 +478,7 @@ nlmclnt_unlock_callback(struct rpc_task *task)
int status = req->a_res.status;
if (RPC_ASSASSINATED(task))
return;
goto die;
if (task->tk_status < 0) {
dprintk("lockd: unlock failed (err = %d)\n", -task->tk_status);
......@@ -490,6 +490,9 @@ nlmclnt_unlock_callback(struct rpc_task *task)
&& status != NLM_LCK_DENIED_GRACE_PERIOD) {
printk("lockd: unexpected unlock status: %d\n", status);
}
die:
rpc_release_task(task);
}
/*
......@@ -565,6 +568,7 @@ nlmclnt_cancel_callback(struct rpc_task *task)
}
die:
rpc_release_task(task);
nlm_release_host(req->a_host);
kfree(req);
return;
......
......@@ -561,6 +561,7 @@ nlmsvc_grant_callback(struct rpc_task *task)
block->b_incall = 0;
nlm_release_host(call->a_host);
rpc_release_task(task);
}
/*
......
......@@ -492,6 +492,7 @@ nlmsvc_callback_exit(struct rpc_task *task)
task->tk_pid, -task->tk_status);
}
nlm_release_host(call->a_host);
rpc_release_task(task);
kfree(call);
}
......
......@@ -734,7 +734,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
* directories via NFS.
*/
err = 0;
if ((iap->ia_valid &= (ATTR_UID|ATTR_GID|ATTR_MODE)) != 0)
if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0)
err = nfsd_setattr(rqstp, resfhp, iap);
out:
return err;
......
......@@ -144,7 +144,7 @@ dentry->d_parent->d_name.name, dentry->d_name.name, count, offset, wsize);
result = smb_proc_write(dentry, offset, wsize, buffer);
if (result < 0)
goto io_error;
break;
/* N.B. what if result < wsize?? */
#ifdef SMBFS_PARANOIA
if (result < wsize)
......@@ -162,15 +162,7 @@ printk("smb_writepage_sync: short write, wsize=%d, result=%d\n", wsize, result);
inode->i_size = offset;
inode->u.smbfs_i.cache_valid |= SMB_F_LOCALWRITE;
} while (count);
out:
smb_unlock_page(page);
return written ? written : result;
io_error:
/* Must mark the page invalid after I/O error */
clear_bit(PG_uptodate, &page->flags);
goto out;
}
/*
......@@ -190,6 +182,7 @@ smb_writepage(struct file *file, struct page *page)
set_bit(PG_locked, &page->flags);
atomic_inc(&page->count);
result = smb_writepage_sync(dentry, page, 0, PAGE_SIZE);
smb_unlock_page(page);
free_page(page_address(page));
return result;
}
......
......@@ -13,11 +13,15 @@
#define TIMER_IRQ 0
/*
* 16 XT IRQ's, 8 potential APIC interrupt sources.
* Right now the APIC is only used for SMP, but this
* may change.
* 16 8259A IRQ's, 240 potential APIC interrupt sources.
* Right now the APIC is mostly only used for SMP.
* 256 vectors is an architectural limit. (we can have
* more than 256 devices theoretically, but they will
* have to use shared interrupts)
* Since vectors 0x00-0x1f are used/reserved for the CPU,
* the usable vector space is 0x20-0xff (224 vectors)
*/
#define NR_IRQS 64
#define NR_IRQS 224
static __inline__ int irq_cannonicalize(int irq)
{
......
#ifndef _I386_SEMAPHORE_HELPER_H
#define _I386_SEMAPHORE_HELPER_H
/*
* SMP- and interrupt-safe semaphores helper functions.
*
* (C) Copyright 1996 Linus Torvalds
* (C) Copyright 1999 Andrea Arcangeli
*/
/*
* These two _must_ execute atomically wrt each other.
*
* This is trivially done with load_locked/store_cond,
* but on the x86 we need an external synchronizer.
*/
static inline void wake_one_more(struct semaphore * sem)
{
unsigned long flags;
spin_lock_irqsave(&semaphore_wake_lock, flags);
if (atomic_read(&sem->count) <= 0)
sem->waking++;
spin_unlock_irqrestore(&semaphore_wake_lock, flags);
}
static inline int waking_non_zero(struct semaphore *sem)
{
unsigned long flags;
int ret = 0;
spin_lock_irqsave(&semaphore_wake_lock, flags);
if (sem->waking > 0) {
sem->waking--;
ret = 1;
}
spin_unlock_irqrestore(&semaphore_wake_lock, flags);
return ret;
}
/*
* waking_non_zero_interruptible:
* 1 got the lock
* 0 go to sleep
* -EINTR interrupted
*
* We must undo the sem->count down_interruptible() increment while we are
* protected by the spinlock in order to make atomic this atomic_inc() with the
* atomic_read() in wake_one_more(), otherwise we can race. -arca
*/
static inline int waking_non_zero_interruptible(struct semaphore *sem,
struct task_struct *tsk)
{
unsigned long flags;
int ret = 0;
spin_lock_irqsave(&semaphore_wake_lock, flags);
if (sem->waking > 0) {
sem->waking--;
ret = 1;
} else if (signal_pending(tsk)) {
atomic_inc(&sem->count);
ret = -EINTR;
}
spin_unlock_irqrestore(&semaphore_wake_lock, flags);
return ret;
}
/*
* waking_non_zero_trylock:
* 1 failed to lock
* 0 got the lock
*
* We must undo the sem->count down_trylock() increment while we are
* protected by the spinlock in order to make atomic this atomic_inc() with the
* atomic_read() in wake_one_more(), otherwise we can race. -arca
*/
static inline int waking_non_zero_trylock(struct semaphore *sem)
{
unsigned long flags;
int ret = 1;
spin_lock_irqsave(&semaphore_wake_lock, flags);
if (sem->waking <= 0)
atomic_inc(&sem->count);
else {
sem->waking--;
ret = 0;
}
spin_unlock_irqrestore(&semaphore_wake_lock, flags);
return ret;
}
#endif
......@@ -12,6 +12,11 @@
* the original code and to make semaphore waits
* interruptible so that processes waiting on
* semaphores can be killed.
* Modified 1999-02-14 by Andrea Arcangeli, split the sched.c helper
* functions in asm/sempahore-helper.h while fixing a
* potential and subtle race discovered by Ulrich Schmid
* in down_interruptible(). Since I started to play here I
* also implemented the `trylock' semaphore operation.
*
* If you would like to see an analysis of this implementation, please
* ftp to gcom.com and download the file
......@@ -23,131 +28,29 @@
#include <asm/atomic.h>
#include <asm/spinlock.h>
/*
* Semaphores are recursive: we allow the holder process
* to recursively do down() operations on a semaphore that
* the process already owns. In order to do that, we need
* to keep a semaphore-local copy of the owner and the
* "depth of ownership".
*
* NOTE! Nasty memory ordering rules:
* - "owner" and "owner_count" may only be modified once you hold the
* lock.
* - "owner_count" must be written _after_ modifying owner, and
* must be read _before_ reading owner. There must be appropriate
* write and read barriers to enforce this.
*
* On an x86, writes are always ordered, so the only enformcement
* necessary is to make sure that the owner_depth is written after
* the owner value in program order.
*
* For read ordering guarantees, the semaphore wake_lock spinlock
* is already giving us ordering guarantees.
*
* Other (saner) architectures would use "wmb()" and "rmb()" to
* do this in a more obvious manner.
*/
struct semaphore {
atomic_t count;
unsigned long owner, owner_depth;
int waking;
struct wait_queue * wait;
};
/*
* Because we want the non-contention case to be
* fast, we save the stack pointer into the "owner"
* field, and to get the true task pointer we have
* to do the bit masking. That moves the masking
* operation into the slow path.
*/
#define semaphore_owner(sem) \
((struct task_struct *)((2*PAGE_MASK) & (sem)->owner))
#define MUTEX ((struct semaphore) { ATOMIC_INIT(1), 0, 0, 0, NULL })
#define MUTEX_LOCKED ((struct semaphore) { ATOMIC_INIT(0), 0, 1, 0, NULL })
#define MUTEX ((struct semaphore) { ATOMIC_INIT(1), 0, NULL })
#define MUTEX_LOCKED ((struct semaphore) { ATOMIC_INIT(0), 0, NULL })
asmlinkage void __down_failed(void /* special register calling convention */);
asmlinkage int __down_failed_interruptible(void /* params in registers */);
asmlinkage int __down_failed_trylock(void /* params in registers */);
asmlinkage void __up_wakeup(void /* special register calling convention */);
asmlinkage void __down(struct semaphore * sem);
asmlinkage int __down_interruptible(struct semaphore * sem);
asmlinkage int __down_trylock(struct semaphore * sem);
asmlinkage void __up(struct semaphore * sem);
extern spinlock_t semaphore_wake_lock;
#define sema_init(sem, val) atomic_set(&((sem)->count), (val))
/*
* These two _must_ execute atomically wrt each other.
*
* This is trivially done with load_locked/store_cond,
* but on the x86 we need an external synchronizer.
*/
static inline void wake_one_more(struct semaphore * sem)
{
unsigned long flags;
spin_lock_irqsave(&semaphore_wake_lock, flags);
sem->waking++;
spin_unlock_irqrestore(&semaphore_wake_lock, flags);
}
/*
* NOTE NOTE NOTE!
*
* We read owner-count _before_ getting the semaphore. This
* is important, because the semaphore also acts as a memory
* ordering point between reading owner_depth and reading
* the owner.
*
* Why is this necessary? The "owner_depth" essentially protects
* us from using stale owner information - in the case that this
* process was the previous owner but somebody else is racing to
* aquire the semaphore, the only way we can see ourselves as an
* owner is with "owner_depth" of zero (so that we know to avoid
* the stale value).
*
* In the non-race case (where we really _are_ the owner), there
* is not going to be any question about what owner_depth is.
*
* In the race case, the race winner will not even get here, because
* it will have successfully gotten the semaphore with the locked
* decrement operation.
*
* Basically, we have two values, and we cannot guarantee that either
* is really up-to-date until we have aquired the semaphore. But we
* _can_ depend on a ordering between the two values, so we can use
* one of them to determine whether we can trust the other:
*
* Cases:
* - owner_depth == zero: ignore the semaphore owner, because it
* cannot possibly be us. Somebody else may be in the process
* of modifying it and the zero may be "stale", but it sure isn't
* going to say that "we" are the owner anyway, so who cares?
* - owner_depth is non-zero. That means that even if somebody
* else wrote the non-zero count value, the write ordering requriement
* means that they will have written themselves as the owner, so
* if we now see ourselves as an owner we can trust it to be true.
*/
static inline int waking_non_zero(struct semaphore *sem, struct task_struct *tsk)
{
unsigned long flags;
unsigned long owner_depth = sem->owner_depth;
int ret = 0;
spin_lock_irqsave(&semaphore_wake_lock, flags);
if (sem->waking > 0 || (owner_depth && semaphore_owner(sem) == tsk)) {
sem->owner = (unsigned long) tsk;
sem->owner_depth++; /* Don't use the possibly stale value */
sem->waking--;
ret = 1;
}
spin_unlock_irqrestore(&semaphore_wake_lock, flags);
return ret;
}
/*
* This is ugly, but we want the default case to fall through.
* "down_failed" is a special asm handler that calls the C
......@@ -161,9 +64,7 @@ extern inline void down(struct semaphore * sem)
"lock ; "
#endif
"decl 0(%0)\n\t"
"js 2f\n\t"
"movl %%esp,4(%0)\n"
"movl $1,8(%0)\n\t"
"js 2f\n"
"1:\n"
".section .text.lock,\"ax\"\n"
"2:\tpushl $1b\n\t"
......@@ -185,8 +86,6 @@ extern inline int down_interruptible(struct semaphore * sem)
#endif
"decl 0(%1)\n\t"
"js 2f\n\t"
"movl %%esp,4(%1)\n\t"
"movl $1,8(%1)\n\t"
"xorl %0,%0\n"
"1:\n"
".section .text.lock,\"ax\"\n"
......@@ -199,6 +98,28 @@ extern inline int down_interruptible(struct semaphore * sem)
return result;
}
extern inline int down_trylock(struct semaphore * sem)
{
int result;
__asm__ __volatile__(
"# atomic interruptible down operation\n\t"
#ifdef __SMP__
"lock ; "
#endif
"decl 0(%1)\n\t"
"js 2f\n\t"
"xorl %0,%0\n"
"1:\n"
".section .text.lock,\"ax\"\n"
"2:\tpushl $1b\n\t"
"jmp __down_failed_trylock\n"
".previous"
:"=a" (result)
:"c" (sem)
:"memory");
return result;
}
/*
* Note! This is subtle. We jump to wake people up only if
......@@ -210,7 +131,6 @@ extern inline void up(struct semaphore * sem)
{
__asm__ __volatile__(
"# atomic up operation\n\t"
"decl 8(%0)\n\t"
#ifdef __SMP__
"lock ; "
#endif
......
......@@ -453,8 +453,7 @@ struct sock {
#ifdef CONFIG_FILTER
/* Socket Filtering Instructions */
int filter;
struct sock_filter *filter_data;
struct sk_filter *filter;
#endif /* CONFIG_FILTER */
/* This is where all the private (optional) areas that don't
......@@ -790,11 +789,11 @@ extern void sklist_destroy_socket(struct sock **list, struct sock *sk);
* sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
* than pkt_len we keep whole skb->data.
*/
extern __inline__ int sk_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
extern __inline__ int sk_filter(struct sk_buff *skb, struct sk_filter *filter)
{
int pkt_len;
pkt_len = sk_run_filter(skb->data, skb->len, filter, flen);
pkt_len = sk_run_filter(skb, filter->insns, filter->len);
if(!pkt_len)
return 1; /* Toss Packet */
else
......@@ -802,6 +801,23 @@ extern __inline__ int sk_filter(struct sk_buff *skb, struct sock_filter *filter,
return 0;
}
extern __inline__ void sk_filter_release(struct sock *sk, struct sk_filter *fp)
{
unsigned int size = sk_filter_len(fp);
atomic_sub(size, &sk->omem_alloc);
if (atomic_dec_and_test(&fp->refcnt))
kfree_s(fp, size);
}
extern __inline__ void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
{
atomic_inc(&fp->refcnt);
atomic_add(sk_filter_len(fp), &sk->omem_alloc);
}
#endif /* CONFIG_FILTER */
/*
......@@ -837,11 +853,8 @@ extern __inline__ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
return -ENOMEM;
#ifdef CONFIG_FILTER
if (sk->filter)
{
if (sk_filter(skb, sk->filter_data, sk->filter))
return -EPERM; /* Toss packet */
}
if (sk->filter && sk_filter(skb, sk->filter))
return -EPERM; /* Toss packet */
#endif /* CONFIG_FILTER */
skb_set_owner_r(skb, sk);
......
......@@ -912,7 +912,7 @@ static __inline__ void tcp_build_and_update_options(__u32 *ptr, struct tcp_opt *
* can generate.
*/
extern __inline__ void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack,
int offer_wscale, int wscale, __u32 tstamp)
int offer_wscale, int wscale, __u32 tstamp, __u32 ts_recent)
{
/* We always get an MSS option.
* The option bytes which will be seen in normal data
......@@ -936,7 +936,7 @@ extern __inline__ void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sa
*ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
*ptr++ = htonl(tstamp); /* TSVAL */
*ptr++ = __constant_htonl(0); /* TSECR */
*ptr++ = htonl(ts_recent); /* TSECR */
} else if(sack)
*ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
(TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM);
......
......@@ -61,7 +61,7 @@ extern int request_dma(unsigned int dmanr, char * deviceID);
extern void free_dma(unsigned int dmanr);
extern spinlock_t dma_spin_lock;
#ifdef MODVERSIONS
#ifdef CONFIG_MODVERSIONS
const struct module_symbol __export_Using_Versions
__attribute__((section("__ksymtab"))) = {
1 /* Version version */, "Using_Versions"
......@@ -322,6 +322,8 @@ EXPORT_SYMBOL(printk);
EXPORT_SYMBOL(sprintf);
EXPORT_SYMBOL(vsprintf);
EXPORT_SYMBOL(kdevname);
EXPORT_SYMBOL(bdevname);
EXPORT_SYMBOL(cdevname);
EXPORT_SYMBOL(simple_strtoul);
EXPORT_SYMBOL(system_utsname); /* UTS data */
EXPORT_SYMBOL(uts_sem); /* UTS semaphore */
......@@ -370,6 +372,7 @@ EXPORT_SYMBOL(is_bad_inode);
EXPORT_SYMBOL(event);
EXPORT_SYMBOL(__down);
EXPORT_SYMBOL(__down_interruptible);
EXPORT_SYMBOL(__down_trylock);
EXPORT_SYMBOL(__up);
EXPORT_SYMBOL(brw_page);
......
......@@ -36,6 +36,7 @@
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/mmu_context.h>
#include <asm/semaphore-helper.h>
#include <linux/timex.h>
......@@ -863,30 +864,28 @@ void __up(struct semaphore *sem)
struct task_struct *tsk = current; \
struct wait_queue wait = { tsk, NULL };
#define DOWN_HEAD(task_state) \
\
\
tsk->state = (task_state); \
add_wait_queue(&sem->wait, &wait); \
\
/* \
* Ok, we're set up. sem->count is known to be less than zero \
* so we must wait. \
* \
* We can let go the lock for purposes of waiting. \
* We re-acquire it after awaking so as to protect \
* all semaphore operations. \
* \
* If "up()" is called before we call waking_non_zero() then \
* we will catch it right away. If it is called later then \
* we will have to go through a wakeup cycle to catch it. \
* \
* Multiple waiters contend for the semaphore lock to see \
* who gets to gate through and who has to wait some more. \
*/ \
for (;;) { \
if (waking_non_zero(sem, tsk)) /* are we waking up? */ \
break; /* yes, exit loop */
#define DOWN_HEAD(task_state) \
\
\
tsk->state = (task_state); \
add_wait_queue(&sem->wait, &wait); \
\
/* \
* Ok, we're set up. sem->count is known to be less than zero \
* so we must wait. \
* \
* We can let go the lock for purposes of waiting. \
* We re-acquire it after awaking so as to protect \
* all semaphore operations. \
* \
* If "up()" is called before we call waking_non_zero() then \
* we will catch it right away. If it is called later then \
* we will have to go through a wakeup cycle to catch it. \
* \
* Multiple waiters contend for the semaphore lock to see \
* who gets to gate through and who has to wait some more. \
*/ \
for (;;) {
#define DOWN_TAIL(task_state) \
tsk->state = (task_state); \
......@@ -898,6 +897,8 @@ void __down(struct semaphore * sem)
{
DOWN_VAR
DOWN_HEAD(TASK_UNINTERRUPTIBLE)
if (waking_non_zero(sem))
break;
schedule();
DOWN_TAIL(TASK_UNINTERRUPTIBLE)
}
......@@ -907,10 +908,13 @@ int __down_interruptible(struct semaphore * sem)
DOWN_VAR
int ret = 0;
DOWN_HEAD(TASK_INTERRUPTIBLE)
if (signal_pending(tsk))
ret = waking_non_zero_interruptible(sem, tsk);
if (ret)
{
ret = -EINTR; /* interrupted */
atomic_inc(&sem->count); /* give up on down operation */
if (ret == 1)
/* ret != 0 only if we get interrupted -arca */
ret = 0;
break;
}
schedule();
......@@ -918,6 +922,11 @@ int __down_interruptible(struct semaphore * sem)
return ret;
}
int __down_trylock(struct semaphore * sem)
{
return waking_non_zero_trylock(sem);
}
#define SLEEP_ON_VAR \
unsigned long flags; \
struct wait_queue wait;
......
......@@ -11,6 +11,8 @@
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Andi Kleen - Fix a few bad bugs and races.
*/
#include <linux/config.h>
......@@ -36,6 +38,22 @@
#include <asm/uaccess.h>
#include <linux/filter.h>
/* No hurry in this branch */
static u8 *load_pointer(struct sk_buff *skb, int k)
{
u8 *ptr = NULL;
if (k>=SKF_NET_OFF)
ptr = skb->nh.raw + k - SKF_NET_OFF;
else if (k>=SKF_LL_OFF)
ptr = skb->mac.raw + k - SKF_LL_OFF;
if (ptr<skb->head && ptr < skb->tail)
return ptr;
return NULL;
}
/*
* Decode and apply filter instructions to the skb->data.
* Return length to keep, 0 for none. skb is the data we are
......@@ -43,15 +61,19 @@
* len is the number of filter blocks in the array.
*/
int sk_run_filter(unsigned char *data, int len, struct sock_filter *filter, int flen)
int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
{
unsigned char *data = skb->data;
/* len is UNSIGNED. Byte wide insns relies only on implicit
type casts to prevent reading arbitrary memory locations.
*/
unsigned int len = skb->len;
struct sock_filter *fentry; /* We walk down these */
u32 A = 0; /* Accumulator */
u32 X = 0; /* Index Register */
u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */
int k;
int pc;
int *t;
/*
* Process array of filter instructions.
......@@ -60,53 +82,75 @@ int sk_run_filter(unsigned char *data, int len, struct sock_filter *filter, int
for(pc = 0; pc < flen; pc++)
{
fentry = &filter[pc];
if(fentry->code & BPF_X)
t=&X;
else
t=&fentry->k;
switch(fentry->code)
{
case BPF_ALU|BPF_ADD|BPF_X:
A += X;
continue;
case BPF_ALU|BPF_ADD|BPF_K:
A += *t;
A += fentry->k;
continue;
case BPF_ALU|BPF_SUB|BPF_X:
A -= X;
continue;
case BPF_ALU|BPF_SUB|BPF_K:
A -= *t;
A -= fentry->k;
continue;
case BPF_ALU|BPF_MUL|BPF_X:
A *= X;
continue;
case BPF_ALU|BPF_MUL|BPF_K:
A *= *t;
A *= X;
continue;
case BPF_ALU|BPF_DIV|BPF_X:
if(X == 0)
return (0);
A /= X;
continue;
case BPF_ALU|BPF_DIV|BPF_K:
if(*t == 0)
if(fentry->k == 0)
return (0);
A /= *t;
A /= fentry->k;
continue;
case BPF_ALU|BPF_AND|BPF_X:
A &= X;
continue;
case BPF_ALU|BPF_AND|BPF_K:
A &= *t;
A &= fentry->k;
continue;
case BPF_ALU|BPF_OR|BPF_X:
A |= X;
continue;
case BPF_ALU|BPF_OR|BPF_K:
A |= *t;
A |= fentry->k;
continue;
case BPF_ALU|BPF_LSH|BPF_X:
A <<= X;
continue;
case BPF_ALU|BPF_LSH|BPF_K:
A <<= *t;
A <<= fentry->k;
continue;
case BPF_ALU|BPF_RSH|BPF_X:
A >>= X;
continue;
case BPF_ALU|BPF_RSH|BPF_K:
A >>= *t;
A >>= fentry->k;
continue;
case BPF_ALU|BPF_NEG:
......@@ -148,26 +192,62 @@ int sk_run_filter(unsigned char *data, int len, struct sock_filter *filter, int
case BPF_JMP|BPF_JSET|BPF_X:
pc += (A & X) ? fentry->jt : fentry->jf;
continue;
case BPF_LD|BPF_W|BPF_ABS:
k = fentry->k;
if(k + sizeof(long) > len)
return (0);
A = ntohl(*(long*)&data[k]);
continue;
load_w:
if(k+sizeof(u32) <= len) {
A = ntohl(*(u32*)&data[k]);
continue;
}
if (k<0) {
u8 *ptr;
if (k>=SKF_AD_OFF)
break;
if ((ptr = load_pointer(skb, k)) != NULL) {
A = ntohl(*(u32*)ptr);
continue;
}
}
return 0;
case BPF_LD|BPF_H|BPF_ABS:
k = fentry->k;
if(k + sizeof(short) > len)
return (0);
A = ntohs(*(short*)&data[k]);
continue;
load_h:
if(k + sizeof(u16) <= len) {
A = ntohs(*(u16*)&data[k]);
continue;
}
if (k<0) {
u8 *ptr;
if (k>=SKF_AD_OFF)
break;
if ((ptr = load_pointer(skb, k)) != NULL) {
A = ntohs(*(u16*)ptr);
continue;
}
}
return 0;
case BPF_LD|BPF_B|BPF_ABS:
k = fentry->k;
if(k >= len)
return (0);
A = data[k];
continue;
load_b:
if(k < len) {
A = data[k];
continue;
}
if (k<0) {
u8 *ptr;
if (k>=SKF_AD_OFF)
break;
if ((ptr = load_pointer(skb, k)) != NULL) {
A = *ptr;
continue;
}
}
case BPF_LD|BPF_W|BPF_LEN:
A = len;
......@@ -177,35 +257,23 @@ int sk_run_filter(unsigned char *data, int len, struct sock_filter *filter, int
X = len;
continue;
case BPF_LD|BPF_W|BPF_IND:
case BPF_LD|BPF_W|BPF_IND:
k = X + fentry->k;
if(k + sizeof(u32) > len)
return (0);
A = ntohl(*(u32 *)&data[k]);
continue;
goto load_w;
case BPF_LD|BPF_H|BPF_IND:
k = X + fentry->k;
if(k + sizeof(u16) > len)
return (0);
A = ntohs(*(u16*)&data[k]);
continue;
goto load_h;
case BPF_LD|BPF_B|BPF_IND:
k = X + fentry->k;
if(k >= len)
return (0);
A = data[k];
continue;
goto load_b;
case BPF_LDX|BPF_B|BPF_MSH:
/*
* Hack for BPF to handle TOS etc
*/
k = fentry->k;
if(k >= len)
return (0);
X = (data[fentry->k] & 0xf) << 2;
X = (data[k] & 0xf) << 2;
continue;
case BPF_LD|BPF_IMM:
......@@ -216,7 +284,7 @@ int sk_run_filter(unsigned char *data, int len, struct sock_filter *filter, int
X = fentry->k;
continue;
case BPF_LD|BPF_MEM:
case BPF_LD|BPF_MEM:
A = mem[fentry->k];
continue;
......@@ -246,15 +314,29 @@ int sk_run_filter(unsigned char *data, int len, struct sock_filter *filter, int
mem[fentry->k] = X;
continue;
default:
/* Invalid instruction counts as RET */
return (0);
}
/* Handle ancillary data, which are impossible
(or very difficult) to get parsing packet contents.
*/
switch (k-SKF_AD_OFF) {
case SKF_AD_PROTOCOL:
A = htons(skb->protocol);
continue;
case SKF_AD_PKTTYPE:
A = skb->pkt_type;
continue;
case SKF_AD_IFINDEX:
A = skb->dev->ifindex;
continue;
default:
return 0;
}
}
printk(KERN_ERR "Filter ruleset ran off the end.\n");
return (0);
}
......@@ -279,13 +361,17 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
ftest = &filter[pc];
if(BPF_CLASS(ftest->code) == BPF_JMP)
{
{
/*
* But they mustn't jump off the end.
*/
if(BPF_OP(ftest->code) == BPF_JA)
{
if(pc + ftest->k + 1>= (unsigned)flen)
/* Note, the large ftest->k might cause
loops. Compare this with conditional
jumps below, where offsets are limited. --ANK (981016)
*/
if (ftest->k >= (unsigned)(flen-pc-1))
return (-EINVAL);
}
else
......@@ -302,17 +388,18 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
* Check that memory operations use valid addresses.
*/
if(ftest->k <0 || ftest->k >= BPF_MEMWORDS)
if (ftest->k >= BPF_MEMWORDS)
{
/*
* But it might not be a memory operation...
*/
if (BPF_CLASS(ftest->code) == BPF_ST)
switch (ftest->code) {
case BPF_ST:
case BPF_STX:
case BPF_LD|BPF_MEM:
case BPF_LDX|BPF_MEM:
return -EINVAL;
if((BPF_CLASS(ftest->code) == BPF_LD) &&
(BPF_MODE(ftest->code) == BPF_MEM))
return (-EINVAL);
}
}
}
......@@ -332,35 +419,36 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
struct sock_filter *fp, *old_filter;
int fsize = sizeof(struct sock_filter) * fprog->len;
struct sk_filter *fp;
unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
int err;
/* Make sure new filter is there and in the right amounts. */
if(fprog->filter == NULL || fprog->len == 0 || fsize > BPF_MAXINSNS)
if (fprog->filter == NULL || fprog->len > BPF_MAXINSNS)
return (-EINVAL);
if((err = sk_chk_filter(fprog->filter, fprog->len))==0)
{
/* If existing filter, remove it first */
if(sk->filter)
{
old_filter = sk->filter_data;
kfree_s(old_filter, (sizeof(old_filter) * sk->filter));
sk->filter_data = NULL;
}
fp = (struct sk_filter *)sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
if(fp == NULL)
return (-ENOMEM);
fp = (struct sock_filter *)kmalloc(fsize, GFP_KERNEL);
if(fp == NULL)
return (-ENOMEM);
if (copy_from_user(fp->insns, fprog->filter, fsize)) {
sock_kfree_s(sk, fp, fsize+sizeof(*fp));
return -EFAULT;
}
memset(fp,0,sizeof(*fp));
memcpy(fp, fprog->filter, fsize); /* Copy instructions */
atomic_set(&fp->refcnt, 1);
fp->len = fprog->len;
sk->filter = fprog->len; /* Number of filter blocks */
sk->filter_data = fp; /* Filter instructions */
if ((err = sk_chk_filter(fp->insns, fp->len))==0) {
struct sk_filter *old_fp = sk->filter;
sk->filter = fp;
wmb();
fp = old_fp;
}
if (fp)
sk_filter_release(sk, fp);
return (err);
}
#endif /* CONFIG_FILTER */
......@@ -155,10 +155,6 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
int err;
struct linger ling;
int ret = 0;
#ifdef CONFIG_FILTER
struct sock_fprog fprog;
#endif
/*
* Options without arguments
......@@ -256,12 +252,13 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
case SO_PRIORITY:
if (val >= 0 && val <= 7)
{
if(val==7 && !capable(CAP_NET_ADMIN))
return -EPERM;
sk->priority = val;
else
return(-EINVAL);
}
break;
case SO_LINGER:
if(optlen<sizeof(ling))
return -EINVAL; /* 1003.1g */
......@@ -310,10 +307,12 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
if (optlen > IFNAMSIZ)
optlen = IFNAMSIZ;
if (copy_from_user(devname, optval, optlen))
return -EFAULT;
return -EFAULT;
/* Remove any cached route for this socket. */
lock_sock(sk);
dst_release(xchg(&sk->dst_cache, NULL));
release_sock(sk);
if (devname[0] == '\0') {
sk->bound_dev_if = 0;
......@@ -331,30 +330,32 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
#ifdef CONFIG_FILTER
case SO_ATTACH_FILTER:
if(optlen < sizeof(struct sock_fprog))
return -EINVAL;
ret = -EINVAL;
if (optlen == sizeof(struct sock_fprog)) {
struct sock_fprog fprog;
if(copy_from_user(&fprog, optval, sizeof(fprog)))
{
ret = -EFAULT;
break;
}
if (copy_from_user(&fprog, optval, sizeof(fprog)))
break;
ret = sk_attach_filter(&fprog, sk);
ret = sk_attach_filter(&fprog, sk);
}
break;
case SO_DETACH_FILTER:
if(sk->filter)
{
fprog.filter = sk->filter_data;
kfree_s(fprog.filter, (sizeof(fprog.filter) * sk->filter));
sk->filter_data = NULL;
sk->filter = 0;
if(sk->filter) {
struct sk_filter *filter;
filter = sk->filter;
sk->filter = NULL;
wmb();
if (filter)
sk_filter_release(sk, filter);
return 0;
}
else
return -EINVAL;
break;
return -ENOENT;
#endif
/* We implement the SO_SNDLOWAT etc to
not be settable (1003.1g 5.3) */
......@@ -504,6 +505,16 @@ void sk_free(struct sock *sk)
if (sk->destruct)
sk->destruct(sk);
#ifdef CONFIG_FILTER
if (sk->filter) {
sk_filter_release(sk, sk->filter);
sk->filter = NULL;
}
#endif
if (atomic_read(&sk->omem_alloc))
printk(KERN_DEBUG "sk_free: optmem leakage (%d bytes) detected.\n", atomic_read(&sk->omem_alloc));
kmem_cache_free(sk_cachep, sk);
}
......
......@@ -1323,6 +1323,10 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
newsk->pair = NULL;
skb_queue_head_init(&newsk->back_log);
skb_queue_head_init(&newsk->error_queue);
#ifdef CONFIG_FILTER
if (newsk->filter)
sk_filter_charge(newsk, newsk->filter);
#endif
/* Now setup tcp_opt */
newtp = &(newsk->tp_pinfo.af_tcp);
......@@ -1553,12 +1557,10 @@ static inline struct sock *tcp_v4_hnd_req(struct sock *sk,struct sk_buff *skb)
int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
{
#ifdef CONFIG_FILTER
if (sk->filter)
{
if (sk_filter(skb, sk->filter_data, sk->filter))
goto discard;
}
if (sk->filter && sk_filter(skb, sk->filter))
goto discard;
#endif /* CONFIG_FILTER */
/*
......
......@@ -30,6 +30,7 @@
* David S. Miller : Charge memory using the right skb
* during syn/ack processing.
* David S. Miller : Output engine completely rewritten.
* Andrea Arcangeli: SYNACK carry ts_recent in tsecr.
*
*/
......@@ -135,7 +136,8 @@ void tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
(sysctl_flags & SYSCTL_FLAG_SACK),
(sysctl_flags & SYSCTL_FLAG_WSCALE),
tp->rcv_wscale,
TCP_SKB_CB(skb)->when);
TCP_SKB_CB(skb)->when,
tp->ts_recent);
} else {
tcp_build_and_update_options((__u32 *)(th + 1),
tp, TCP_SKB_CB(skb)->when);
......@@ -862,7 +864,8 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
TCP_SKB_CB(skb)->when = jiffies;
tcp_syn_build_options((__u32 *)(th + 1), req->mss, req->tstamp_ok,
req->sack_ok, req->wscale_ok, req->rcv_wscale,
TCP_SKB_CB(skb)->when);
TCP_SKB_CB(skb)->when,
req->ts_recent);
skb->csum = 0;
th->doff = (tcp_header_size >> 2);
......
......@@ -75,8 +75,7 @@ void net_timer (unsigned long data)
/* Only process if socket is not in use. */
if (atomic_read(&sk->sock_readers)) {
/* Try again later. */
sk->timer.expires = jiffies+HZ/20;
add_timer(&sk->timer);
mod_timer(&sk->timer, jiffies+HZ/20);
return;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment