Commit 8565fe85 authored by Linus Torvalds's avatar Linus Torvalds

v2.4.2.3 -> v2.4.2.4

  - Petr Vandrovec, Al Viro: dentry revalidation fixes
  - Stephen Tweedie / Manfred Spraul: kswapd and ptrace race
  - Neil Brown: nfsd/rpc/raid cleanups and fixes
parent b56566c6
......@@ -1501,15 +1501,6 @@ CONFIG_MD_RAID0
If unsure, say Y.
RAID-1/RAID-5 code (DANGEROUS)
CONFIG_RAID15_DANGEROUS
This new RAID1/RAID5 code has been freshly merged, and has not seen
enough testing yet. While there are no known bugs in it, it might
destroy your filesystems, eat your data and start World War III.
You have been warned.
If unsure, say N.
RAID-1 (mirroring) mode
CONFIG_MD_RAID1
A RAID-1 set consists of several disk drives which are exact copies
......
VERSION = 2
PATCHLEVEL = 4
SUBLEVEL = 3
EXTRAVERSION =-pre3
EXTRAVERSION =-pre4
KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
......
......@@ -974,7 +974,7 @@ static void __init pci_fixup_vt8363(struct pci_dev *d)
pci_write_config_byte(d, 0x70, tmp & ~(1<<3));
}
pci_read_config_byte(d, 0x71, &tmp);
if(tmp & (1<<3)) {
if(!(tmp & (1<<3))) {
printk("PCI: Bursting cornercase bug worked around\n");
pci_write_config_byte(d, 0x71, tmp | (1<<3));
}
......
......@@ -3331,6 +3331,7 @@ COMPATIBLE_IOCTL(PPPIOCSMRRU)
COMPATIBLE_IOCTL(PPPIOCCONNECT)
COMPATIBLE_IOCTL(PPPIOCDISCONN)
COMPATIBLE_IOCTL(PPPIOCATTCHAN)
COMPATIBLE_IOCTL(PPPIOCGCHAN)
/* PPPOX */
COMPATIBLE_IOCTL(PPPOEIOCSFWD);
COMPATIBLE_IOCTL(PPPOEIOCDFWD);
......
......@@ -698,7 +698,9 @@ static void export_rdev (mdk_rdev_t * rdev)
md_list_del(&rdev->pending);
MD_INIT_LIST_HEAD(&rdev->pending);
}
#ifndef MODULE
md_autodetect_dev(rdev->dev);
#endif
rdev->dev = 0;
rdev->faulty = 0;
kfree(rdev);
......@@ -3528,6 +3530,7 @@ static void md_geninit (void)
int md__init md_init (void)
{
static char * name = "mdrecoveryd";
int minor;
printk (KERN_INFO "md driver %d.%d.%d MAX_MD_DEVS=%d, MD_SB_DISKS=%d\n",
MD_MAJOR_VERSION, MD_MINOR_VERSION,
......@@ -3539,9 +3542,14 @@ int md__init md_init (void)
return (-1);
}
devfs_handle = devfs_mk_dir (NULL, "md", NULL);
devfs_register_series (devfs_handle, "%u",MAX_MD_DEVS,DEVFS_FL_DEFAULT,
MAJOR_NR, 0, S_IFBLK | S_IRUSR | S_IWUSR,
&md_fops, NULL);
/* we don't use devfs_register_series because we want to fill md_hd_struct */
for (minor=0; minor < MAX_MD_DEVS; ++minor) {
char devname[128];
sprintf (devname, "%u", minor);
md_hd_struct[minor].de = devfs_register (devfs_handle,
devname, DEVFS_FL_DEFAULT, MAJOR_NR, minor,
S_IFBLK | S_IRUSR | S_IWUSR, &md_fops, NULL);
}
/* forward all md request to md_make_request */
blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), md_make_request);
......@@ -3584,7 +3592,7 @@ struct {
* Searches all registered partitions for autorun RAID arrays
* at boot time.
*/
static int detected_devices[128] md__initdata;
static int detected_devices[128];
static int dev_cnt;
void md_autodetect_dev (kdev_t dev)
......
......@@ -327,7 +327,7 @@ int el3_probe(struct net_device *dev)
irq = idev->irq_resource[0].start;
if (el3_debug > 3)
printk ("ISAPnP reports %s at i/o 0x%x, irq %d\n",
el3_isapnp_adapters[i].name, ioaddr, irq);
(char*) el3_isapnp_adapters[i].driver_data, ioaddr, irq);
EL3WINDOW(0);
for (j = 0; j < 3; j++)
el3_isapnp_phys_addr[pnp_cards][j] =
......
......@@ -474,7 +474,7 @@ static int corkscrew_scan(struct net_device *dev)
irq = idev->irq_resource[0].start;
if(corkscrew_debug)
printk ("ISAPNP reports %s at i/o 0x%x, irq %d\n",
corkscrew_isapnp_adapters[i].name,ioaddr, irq);
(char*) corkscrew_isapnp_adapters[i].driver_data, ioaddr, irq);
if ((inw(ioaddr + 0x2002) & 0x1f0) != (ioaddr & 0x1f0))
continue;
......
......@@ -224,8 +224,7 @@ static inline struct dentry * __dget_locked(struct dentry *dentry)
atomic_inc(&dentry->d_count);
if (atomic_read(&dentry->d_count) == 1) {
dentry_stat.nr_unused--;
list_del(&dentry->d_lru);
INIT_LIST_HEAD(&dentry->d_lru); /* make "list_empty()" work */
list_del_init(&dentry->d_lru);
}
return dentry;
}
......@@ -414,8 +413,7 @@ void shrink_dcache_sb(struct super_block * sb)
if (atomic_read(&dentry->d_count))
continue;
dentry_stat.nr_unused--;
list_del(tmp);
INIT_LIST_HEAD(tmp);
list_del_init(tmp);
prune_one_dentry(dentry);
goto repeat;
}
......@@ -657,6 +655,7 @@ struct dentry * d_alloc(struct dentry * parent, const struct qstr *name)
void d_instantiate(struct dentry *entry, struct inode * inode)
{
if (!list_empty(&entry->d_alias)) BUG();
spin_lock(&dcache_lock);
if (inode)
list_add(&entry->d_alias, &inode->i_dentry);
......@@ -745,58 +744,48 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
/**
* d_validate - verify dentry provided from insecure source
* @dentry: The dentry alleged to be valid
* @dparent: The parent dentry
* @dentry: The dentry alleged to be valid child of @dparent
* @dparent: The parent dentry (known to be valid)
* @hash: Hash of the dentry
* @len: Length of the name
*
* An insecure source has sent us a dentry, here we verify it and dget() it.
* This is used by ncpfs in its readdir implementation.
* Zero is returned in the dentry is invalid.
*
* NOTE: This function does _not_ dereference the pointers before we have
* validated them. We can test the pointer values, but we
* must not actually use them until we have found a valid
* copy of the pointer in kernel space..
*/
int d_validate(struct dentry *dentry, struct dentry *dparent,
unsigned int hash, unsigned int len)
int d_validate(struct dentry *dentry, struct dentry *dparent)
{
unsigned long dent_addr = (unsigned long) dentry;
unsigned long min_addr = PAGE_OFFSET;
unsigned long align_mask = 0x0F;
struct list_head *base, *lhp;
int valid = 1;
spin_lock(&dcache_lock);
if (dentry != dparent) {
base = d_hash(dparent, hash);
lhp = base;
while ((lhp = lhp->next) != base) {
if (dentry == list_entry(lhp, struct dentry, d_hash)) {
__dget_locked(dentry);
goto out;
}
}
} else {
/*
* Special case: local mount points don't live in
* the hashes, so we search the super blocks.
*/
struct super_block *sb = sb_entry(super_blocks.next);
if (dent_addr < min_addr)
goto out;
if (dent_addr > (unsigned long)high_memory - sizeof(struct dentry))
goto out;
if (dent_addr & align_mask)
goto out;
if ((!kern_addr_valid(dent_addr)) || (!kern_addr_valid(dent_addr -1 +
sizeof(struct dentry))))
goto out;
for (; sb != sb_entry(&super_blocks);
sb = sb_entry(sb->s_list.next)) {
if (!sb->s_dev)
continue;
if (sb->s_root == dentry) {
__dget_locked(dentry);
goto out;
}
if (dentry->d_parent != dparent)
goto out;
spin_lock(&dcache_lock);
lhp = base = d_hash(dparent, dentry->d_name.hash);
while ((lhp = lhp->next) != base) {
if (dentry == list_entry(lhp, struct dentry, d_hash)) {
__dget_locked(dentry);
spin_unlock(&dcache_lock);
return 1;
}
}
valid = 0;
out:
spin_unlock(&dcache_lock);
return valid;
out:
return 0;
}
/*
......@@ -849,6 +838,7 @@ void d_delete(struct dentry * dentry)
void d_rehash(struct dentry * entry)
{
struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
if (!list_empty(&entry->d_hash)) BUG();
spin_lock(&dcache_lock);
list_add(&entry->d_hash, list);
spin_unlock(&dcache_lock);
......@@ -923,8 +913,7 @@ void d_move(struct dentry * dentry, struct dentry * target)
list_add(&dentry->d_hash, &target->d_hash);
/* Unhash the target: dput() will then get rid of it */
list_del(&target->d_hash);
INIT_LIST_HEAD(&target->d_hash);
list_del_init(&target->d_hash);
list_del(&dentry->d_child);
list_del(&target->d_child);
......
......@@ -326,56 +326,15 @@ ncp_lookup_validate(struct dentry * dentry, int flags)
return res;
}
/* most parts from nfsd_d_validate() */
static int
ncp_d_validate(struct dentry *dentry)
{
unsigned long dent_addr = (unsigned long) dentry;
unsigned long min_addr = PAGE_OFFSET;
unsigned long align_mask = 0x0F;
unsigned int len;
int valid = 0;
if (dent_addr < min_addr)
goto bad_addr;
if (dent_addr > (unsigned long)high_memory - sizeof(struct dentry))
goto bad_addr;
if ((dent_addr & ~align_mask) != dent_addr)
goto bad_align;
if ((!kern_addr_valid(dent_addr)) || (!kern_addr_valid(dent_addr -1 +
sizeof(struct dentry))))
goto bad_addr;
/*
* Looks safe enough to dereference ...
*/
len = dentry->d_name.len;
if (len > NCP_MAXPATHLEN)
goto out;
/*
* Note: d_validate doesn't dereference the parent pointer ...
* just combines it with the name hash to find the hash chain.
*/
valid = d_validate(dentry, dentry->d_parent, dentry->d_name.hash, len);
out:
return valid;
bad_addr:
PRINTK("ncp_d_validate: invalid address %lx\n", dent_addr);
goto out;
bad_align:
PRINTK("ncp_d_validate: unaligned address %lx\n", dent_addr);
goto out;
}
static struct dentry *
ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
{
struct dentry *dent = dentry;
struct list_head *next;
if (ncp_d_validate(dent)) {
if (dent->d_parent == parent &&
(unsigned long)dent->d_fsdata == fpos) {
if (d_validate(dent, parent)) {
if (dent->d_name.len <= NCP_MAXPATHLEN &&
(unsigned long)dent->d_fsdata == fpos) {
if (!dent->d_inode) {
dput(dent);
dent = NULL;
......@@ -580,6 +539,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
struct ncp_cache_control ctl = *ctrl;
struct qstr qname;
int valid = 0;
int hashed = 0;
ino_t ino = 0;
__u8 __name[256];
......@@ -602,9 +562,11 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
newdent = d_alloc(dentry, &qname);
if (!newdent)
goto end_advance;
} else
} else {
hashed = 1;
memcpy((char *) newdent->d_name.name, qname.name,
newdent->d_name.len);
}
if (!newdent->d_inode) {
entry->opened = 0;
......@@ -612,7 +574,9 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
newino = ncp_iget(inode->i_sb, entry);
if (newino) {
newdent->d_op = &ncp_dentry_operations;
d_add(newdent, newino);
d_instantiate(newdent, newino);
if (!hashed)
d_rehash(newdent);
}
} else
ncp_update_inode2(newdent->d_inode, entry);
......
......@@ -196,7 +196,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
struct iattr *attr = &argp->attrs;
struct inode *inode;
struct dentry *dchild;
int nfserr, type, mode, rdonly = 0;
int nfserr, type, mode;
dev_t rdev = NODEV;
dprintk("nfsd: CREATE %s %s\n",
......@@ -207,13 +207,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
if (nfserr)
goto done; /* must fh_put dirfhp even on error */
/* Check for MAY_WRITE separately. */
nfserr = nfsd_permission(dirfhp->fh_export, dirfhp->fh_dentry,
MAY_WRITE);
if (nfserr == nfserr_rofs) {
rdonly = 1; /* Non-fatal error for echo > /dev/null */
} else if (nfserr)
goto done;
/* Check for MAY_WRITE in nfsd_create if necessary */
nfserr = nfserr_acces;
if (!argp->len)
......@@ -257,10 +251,25 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
* else assume a file */
if (inode) {
type = inode->i_mode & S_IFMT;
if (type == S_IFCHR || type == S_IFBLK) {
switch(type) {
case S_IFCHR:
case S_IFBLK:
/* reserve rdev for later checking */
attr->ia_size = inode->i_rdev;
attr->ia_valid |= ATTR_SIZE;
/* FALLTHROUGH */
case S_IFIFO:
/* this is probably a permission check..
* at least IRIX implements perm checking on
* echo thing > device-special-file-or-pipe
* by does a CREATE with type==0
*/
nfserr = nfsd_permission(newfhp->fh_export,
newfhp->fh_dentry,
MAY_WRITE);
if (nfserr && nfserr != nfserr_rofs)
goto out_unlock;
}
} else
type = S_IFREG;
......@@ -273,11 +282,6 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
mode = 0; /* ??? */
}
/* This is for "echo > /dev/null" a la SunOS. Argh. */
nfserr = nfserr_rofs;
if (rdonly && (!inode || type == S_IFREG))
goto out_unlock;
attr->ia_valid |= ATTR_MODE;
attr->ia_mode = mode;
......
......@@ -737,27 +737,24 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
* nice and simple solution (IMHO), and it seems to
* work:-)
*/
if (EX_WGATHER(exp) && (atomic_read(&inode->i_writecount) > 1
|| (last_ino == inode->i_ino && last_dev == inode->i_dev))) {
#if 0
interruptible_sleep_on_timeout(&inode->i_wait, 10 * HZ / 1000);
#else
dprintk("nfsd: write defer %d\n", current->pid);
/* FIXME: Olaf commented this out [gam3] */
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout((HZ+99)/100);
current->state = TASK_RUNNING;
dprintk("nfsd: write resume %d\n", current->pid);
#endif
}
if (EX_WGATHER(exp)) {
if (atomic_read(&inode->i_writecount) > 1
|| (last_ino == inode->i_ino && last_dev == inode->i_dev)) {
dprintk("nfsd: write defer %d\n", current->pid);
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout((HZ+99)/100);
current->state = TASK_RUNNING;
dprintk("nfsd: write resume %d\n", current->pid);
}
if (inode->i_state & I_DIRTY) {
dprintk("nfsd: write sync %d\n", current->pid);
nfsd_sync(&file);
}
if (inode->i_state & I_DIRTY) {
dprintk("nfsd: write sync %d\n", current->pid);
nfsd_sync(&file);
}
#if 0
wake_up(&inode->i_wait);
wake_up(&inode->i_wait);
#endif
}
last_ino = inode->i_ino;
last_dev = inode->i_dev;
}
......
......@@ -71,47 +71,6 @@ smb_invalidate_dircache_entries(struct dentry *parent)
spin_unlock(&dcache_lock);
}
static int
smb_d_validate(struct dentry *dentry)
{
unsigned long dent_addr = (unsigned long) dentry;
unsigned long min_addr = PAGE_OFFSET;
unsigned long align_mask = 0x0F;
unsigned int len;
int valid = 0;
if (dent_addr < min_addr)
goto bad_addr;
if (dent_addr > (unsigned long)high_memory - sizeof(struct dentry))
goto bad_addr;
if ((dent_addr & ~align_mask) != dent_addr)
goto bad_align;
if ((!kern_addr_valid(dent_addr)) || (!kern_addr_valid(dent_addr -1 +
sizeof(struct dentry))))
goto bad_addr;
/*
* Looks safe enough to dereference ...
*/
len = dentry->d_name.len;
if (len > SMB_MAXPATHLEN)
goto out;
/*
* Note: d_validate doesn't dereference the parent pointer ...
* just combines it with the name hash to find the hash chain.
*/
valid = d_validate(dentry, dentry->d_parent, dentry->d_name.hash, len);
out:
return valid;
bad_addr:
printk(KERN_ERR "smb_d_validate: invalid address %lx\n", dent_addr);
goto out;
bad_align:
printk(KERN_ERR "smb_d_validate: unaligned address %lx\n", dent_addr);
goto out;
}
/*
* dget, but require that fpos and parent matches what the dentry contains.
* dentry is not known to be a valid pointer at entry.
......@@ -122,8 +81,8 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
struct dentry *dent = dentry;
struct list_head *next;
if (smb_d_validate(dent)) {
if (dent->d_parent == parent &&
if (d_validate(dent, parent)) {
if (dent->d_name.len <= SMB_MAXPATHLEN &&
(unsigned long)dent->d_fsdata == fpos) {
if (!dent->d_inode) {
dput(dent);
......
......@@ -180,7 +180,7 @@ extern int * max_segments[MAX_BLKDEV];
extern atomic_t queued_sectors;
#define MAX_SEGMENTS 128
#define MAX_SECTORS (MAX_SEGMENTS*8)
#define MAX_SECTORS 255
#define PageAlignSize(size) (((size) + PAGE_SIZE -1) & PAGE_MASK)
......
......@@ -217,7 +217,7 @@ extern void d_move(struct dentry *, struct dentry *);
extern struct dentry * d_lookup(struct dentry *, struct qstr *);
/* validate "insecure" dentry pointer */
extern int d_validate(struct dentry *, struct dentry *, unsigned int, unsigned int);
extern int d_validate(struct dentry *, struct dentry *);
extern char * __d_path(struct dentry *, struct vfsmount *, struct dentry *,
struct vfsmount *, char *, int);
......
......@@ -21,6 +21,14 @@ typedef struct free_area_struct {
struct pglist_data;
/*
* On machines where it is needed (eg PCs) we divide physical memory
* into multiple physical zones. On a PC we have 3 zones:
*
* ZONE_DMA < 16 MB ISA DMA capable memory
* ZONE_NORMAL 16-896 MB direct mapped by the kernel
* ZONE_HIGHMEM > 896 MB only page cache and user processes
*/
typedef struct zone_struct {
/*
* Commonly accessed fields:
......@@ -75,6 +83,17 @@ typedef struct zonelist_struct {
#define NR_GFPINDEX 0x100
/*
* The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM
* (mostly NUMA machines?) to denote a higher-level memory zone than the
* zone_struct denotes.
*
* On NUMA machines, each NUMA node would have a pg_data_t to describe
* it's memory layout.
*
* XXX: we need to move the global memory statistics (active_list, ...)
* into the pg_data_t to properly support NUMA.
*/
struct bootmem_data;
typedef struct pglist_data {
zone_t node_zones[MAX_NR_ZONES];
......
......@@ -28,6 +28,7 @@ static int access_one_page(struct mm_struct * mm, struct vm_area_struct * vma, u
struct page *page;
repeat:
spin_lock(&mm->page_table_lock);
pgdir = pgd_offset(vma->vm_mm, addr);
if (pgd_none(*pgdir))
goto fault_in_page;
......@@ -47,9 +48,13 @@ static int access_one_page(struct mm_struct * mm, struct vm_area_struct * vma, u
/* ZERO_PAGE is special: reads from it are ok even though it's marked reserved */
if (page != ZERO_PAGE(addr) || write) {
if ((!VALID_PAGE(page)) || PageReserved(page))
if ((!VALID_PAGE(page)) || PageReserved(page)) {
spin_unlock(&mm->page_table_lock);
return 0;
}
}
get_page(page);
spin_unlock(&mm->page_table_lock);
flush_cache_page(vma, addr);
if (write) {
......@@ -64,19 +69,23 @@ static int access_one_page(struct mm_struct * mm, struct vm_area_struct * vma, u
flush_page_to_ram(page);
kunmap(page);
}
put_page(page);
return len;
fault_in_page:
spin_unlock(&mm->page_table_lock);
/* -1: out of memory. 0 - unmapped page */
if (handle_mm_fault(mm, vma, addr, write) > 0)
goto repeat;
return 0;
bad_pgd:
spin_unlock(&mm->page_table_lock);
pgd_ERROR(*pgdir);
return 0;
bad_pmd:
spin_unlock(&mm->page_table_lock);
pmd_ERROR(*pgmiddle);
return 0;
}
......
......@@ -212,16 +212,22 @@ static inline void
svc_sock_release(struct svc_rqst *rqstp)
{
struct svc_sock *svsk = rqstp->rq_sock;
struct svc_serv *serv = svsk->sk_server;
if (!svsk)
return;
svc_release_skb(rqstp);
rqstp->rq_sock = NULL;
spin_lock_bh(&serv->sv_lock);
if (!--(svsk->sk_inuse) && svsk->sk_dead) {
spin_unlock_bh(&serv->sv_lock);
dprintk("svc: releasing dead socket\n");
sock_release(svsk->sk_sock);
kfree(svsk);
}
else
spin_unlock_bh(&serv->sv_lock);
}
/*
......@@ -1034,14 +1040,15 @@ svc_delete_socket(struct svc_sock *svsk)
if (svsk->sk_qued)
rpc_remove_list(&serv->sv_sockets, svsk);
spin_unlock_bh(&serv->sv_lock);
svsk->sk_dead = 1;
if (!svsk->sk_inuse) {
spin_unlock_bh(&serv->sv_lock);
sock_release(svsk->sk_sock);
kfree(svsk);
} else {
spin_unlock_bh(&serv->sv_lock);
printk(KERN_NOTICE "svc: server socket destroy delayed\n");
/* svsk->sk_server = NULL; */
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment