Commit d88ebc0a authored by Linus Torvalds's avatar Linus Torvalds

Merge master.kernel.org:/home/hch/BK/xfs/linux-2.5

into home.transmeta.com:/home/torvalds/v2.5/linux
parents 3f614a3d 3f27dd28
......@@ -50,7 +50,7 @@
#include "proto.h"
#include "irq_impl.h"
u64 jiffies_64;
u64 jiffies_64 = INITIAL_JIFFIES;
extern unsigned long wall_jiffies; /* kernel/timer.c */
......
......@@ -32,7 +32,7 @@
#include <asm/irq.h>
#include <asm/leds.h>
u64 jiffies_64;
u64 jiffies_64 = INITIAL_JIFFIES;
extern unsigned long wall_jiffies;
......
......@@ -45,7 +45,7 @@
#include <asm/svinto.h>
u64 jiffies_64;
u64 jiffies_64 = INITIAL_JIFFIES;
static int have_rtc; /* used to remember if we have an RTC or not */
......
......@@ -75,6 +75,11 @@ config X86_SUMMIT
If you don't have one of these computers, you should say N here.
config ACPI_SRAT
bool
default y
depends on NUMA && X86_SUMMIT
config X86_BIGSMP
bool "Support for other sub-arch SMP systems with more than 8 CPUs"
help
......@@ -483,7 +488,7 @@ config NR_CPUS
# Common NUMA Features
config NUMA
bool "Numa Memory Allocation Support"
depends on X86_NUMAQ
depends on (HIGHMEM64G && (X86_NUMAQ || (X86_SUMMIT && ACPI && !ACPI_HT_ONLY)))
config DISCONTIGMEM
bool
......@@ -752,6 +757,13 @@ config HAVE_DEC_LOCK
depends on (SMP || PREEMPT) && X86_CMPXCHG
default y
# turning this on wastes a bunch of space.
# Summit needs it only when NUMA is on
config BOOT_IOREMAP
bool
depends on (X86_SUMMIT && NUMA)
default y
endmenu
......
......@@ -28,6 +28,7 @@ obj-$(CONFIG_X86_NUMAQ) += numaq.o
obj-$(CONFIG_EDD) += edd.o
obj-$(CONFIG_MODULES) += module.o
obj-y += sysenter.o
obj-$(CONFIG_ACPI_SRAT) += srat.o
EXTRA_AFLAGS := -traditional
......
......@@ -223,7 +223,7 @@ static void set_ioapic_affinity (unsigned int irq, unsigned long mask)
extern unsigned long irq_affinity [NR_IRQS];
int __cacheline_aligned pending_irq_balance_apicid [NR_IRQS];
static int irqbalance_disabled __initdata = 0;
static int irqbalance_disabled = NO_BALANCE_IRQ;
static int physical_balance = 0;
struct irq_cpu_info {
......@@ -492,7 +492,7 @@ static inline void balance_irq (int cpu, int irq)
unsigned long allowed_mask;
unsigned int new_cpu;
if (no_balance_irq)
if (irqbalance_disabled)
return;
allowed_mask = cpu_online_map & irq_affinity[irq];
......
This diff is collapsed.
......@@ -66,7 +66,7 @@ int pit_latch_buggy; /* extern */
#include "do_timer.h"
u64 jiffies_64;
u64 jiffies_64 = INITIAL_JIFFIES;
unsigned long cpu_khz; /* Detected as we calibrate the TSC */
......
......@@ -1230,9 +1230,10 @@ flush_tlb_all_function(void* info)
void
flush_tlb_all(void)
{
preempt_disable();
smp_call_function (flush_tlb_all_function, 0, 1, 1);
do_flush_tlb_all_local();
preempt_enable();
}
/* used to set up the trampoline for other CPUs when the memory manager
......
......@@ -2,8 +2,9 @@
# Makefile for the linux i386-specific parts of the memory manager.
#
obj-y := init.o pgtable.o fault.o ioremap.o extable.o pageattr.o
obj-y := init.o pgtable.o fault.o ioremap.o extable.o pageattr.o
obj-$(CONFIG_DISCONTIGMEM) += discontig.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_HIGHMEM) += highmem.o
obj-$(CONFIG_BOOT_IOREMAP) += boot_ioremap.o
/*
* arch/i386/mm/boot_ioremap.c
*
* Re-map functions for early boot-time before paging_init() when the
* boot-time pagetables are still in use
*
* Written by Dave Hansen <haveblue@us.ibm.com>
*/
/*
* We need to use the 2-level pagetable functions, but CONFIG_X86_PAE
* keeps that from happenning. If anyone has a better way, I'm listening.
*
* boot_pte_t is defined only if this all works correctly
*/
#include <linux/config.h>
#undef CONFIG_X86_PAE
#include <asm/page.h>
#include <asm/pgtable.h>
#include <linux/init.h>
#include <linux/stddef.h>
/*
* I'm cheating here. It is known that the two boot PTE pages are
* allocated next to each other. I'm pretending that they're just
* one big array.
*/
#define BOOT_PTE_PTRS (PTRS_PER_PTE*2)
#define boot_pte_index(address) \
(((address) >> PAGE_SHIFT) & (BOOT_PTE_PTRS - 1))
static inline boot_pte_t* boot_vaddr_to_pte(void *address)
{
boot_pte_t* boot_pg = (boot_pte_t*)pg0;
return &boot_pg[boot_pte_index((unsigned long)address)];
}
/*
* This is only for a caller who is clever enough to page-align
* phys_addr and virtual_source, and who also has a preference
* about which virtual address from which to steal ptes
*/
static void __boot_ioremap(unsigned long phys_addr, unsigned long nrpages,
void* virtual_source)
{
boot_pte_t* pte;
int i;
pte = boot_vaddr_to_pte(virtual_source);
for (i=0; i < nrpages; i++, phys_addr += PAGE_SIZE, pte++) {
set_pte(pte, pfn_pte(phys_addr>>PAGE_SHIFT, PAGE_KERNEL));
}
}
/* the virtual space we're going to remap comes from this array */
#define BOOT_IOREMAP_PAGES 4
#define BOOT_IOREMAP_SIZE (BOOT_IOREMAP_PAGES*PAGE_SIZE)
__initdata char boot_ioremap_space[BOOT_IOREMAP_SIZE]
__attribute__ ((aligned (PAGE_SIZE)));
/*
* This only applies to things which need to ioremap before paging_init()
* bt_ioremap() and plain ioremap() are both useless at this point.
*
* When used, we're still using the boot-time pagetables, which only
* have 2 PTE pages mapping the first 8MB
*
* There is no unmap. The boot-time PTE pages aren't used after boot.
* If you really want the space back, just remap it yourself.
* boot_ioremap(&ioremap_space-PAGE_OFFSET, BOOT_IOREMAP_SIZE)
*/
__init void* boot_ioremap(unsigned long phys_addr, unsigned long size)
{
unsigned long last_addr, offset;
unsigned int nrpages;
last_addr = phys_addr + size - 1;
/* page align the requested address */
offset = phys_addr & ~PAGE_MASK;
phys_addr &= PAGE_MASK;
size = PAGE_ALIGN(last_addr) - phys_addr;
nrpages = size >> PAGE_SHIFT;
if (nrpages > BOOT_IOREMAP_PAGES)
return NULL;
__boot_ioremap(phys_addr, nrpages, boot_ioremap_space);
return &boot_ioremap_space[offset];
}
......@@ -284,6 +284,7 @@ void __init zone_sizes_init(void)
for (nid = 0; nid < numnodes; nid++) {
unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
unsigned long *zholes_size;
unsigned int max_dma;
unsigned long low = max_low_pfn;
......@@ -307,6 +308,7 @@ void __init zone_sizes_init(void)
#endif
}
}
zholes_size = get_zholes_size(nid);
/*
* We let the lmem_map for node 0 be allocated from the
* normal bootmem allocator, but other nodes come from the
......@@ -315,10 +317,10 @@ void __init zone_sizes_init(void)
if (nid)
free_area_init_node(nid, NODE_DATA(nid),
node_remap_start_vaddr[nid], zones_size,
start, 0);
start, zholes_size);
else
free_area_init_node(nid, NODE_DATA(nid), 0,
zones_size, start, 0);
zones_size, start, zholes_size);
}
return;
}
......
......@@ -29,6 +29,8 @@ static long htlbzone_pages;
static LIST_HEAD(htlbpage_freelist);
static spinlock_t htlbpage_lock = SPIN_LOCK_UNLOCKED;
void free_huge_page(struct page *page);
static struct page *alloc_hugetlb_page(void)
{
int i;
......@@ -45,7 +47,7 @@ static struct page *alloc_hugetlb_page(void)
htlbpagemem--;
spin_unlock(&htlbpage_lock);
set_page_count(page, 1);
page->lru.prev = (void *)huge_page_release;
page->lru.prev = (void *)free_huge_page;
for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); ++i)
clear_highpage(&page[i]);
return page;
......
......@@ -205,6 +205,7 @@ void *ioremap_nocache (unsigned long phys_addr, unsigned long size)
iounmap(p);
p = NULL;
}
global_flush_tlb();
}
return p;
......@@ -226,6 +227,7 @@ void iounmap(void *addr)
change_page_attr(virt_to_page(__va(p->phys_addr)),
p->size >> PAGE_SHIFT,
PAGE_KERNEL);
global_flush_tlb();
}
kfree(p);
}
......
......@@ -27,7 +27,7 @@
extern unsigned long wall_jiffies;
extern unsigned long last_time_offset;
u64 jiffies_64;
u64 jiffies_64 = INITIAL_JIFFIES;
#ifdef CONFIG_IA64_DEBUG_IRQ
......
......@@ -26,6 +26,8 @@ static long htlbzone_pages;
static LIST_HEAD(htlbpage_freelist);
static spinlock_t htlbpage_lock = SPIN_LOCK_UNLOCKED;
void free_huge_page(struct page *page);
static struct page *alloc_hugetlb_page(void)
{
int i;
......@@ -42,6 +44,7 @@ static struct page *alloc_hugetlb_page(void)
htlbpagemem--;
spin_unlock(&htlbpage_lock);
set_page_count(page, 1);
page->lru.prev = (void *)free_huge_page;
for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); ++i)
clear_highpage(&page[i]);
return page;
......
......@@ -26,7 +26,7 @@
#include <linux/timex.h>
#include <linux/profile.h>
u64 jiffies_64;
u64 jiffies_64 = INITIAL_JIFFIES;
static inline int set_rtc_mmss(unsigned long nowtime)
{
......
......@@ -26,7 +26,7 @@
#define TICK_SIZE (tick_nsec / 1000)
u64 jiffies_64;
u64 jiffies_64 = INITIAL_JIFFIES;
static inline int set_rtc_mmss(unsigned long nowtime)
{
......
......@@ -32,7 +32,7 @@
#define USECS_PER_JIFFY (1000000/HZ)
#define USECS_PER_JIFFY_FRAC ((1000000ULL << 32) / HZ & 0xffffffff)
u64 jiffies_64;
u64 jiffies_64 = INITIAL_JIFFIES;
/*
* forward reference
......
......@@ -32,7 +32,7 @@
#include <linux/timex.h>
u64 jiffies_64;
u64 jiffies_64 = INITIAL_JIFFIES;
/* xtime and wall_jiffies keep wall-clock time */
extern unsigned long wall_jiffies;
......
......@@ -68,7 +68,7 @@
#include <asm/time.h>
/* XXX false sharing with below? */
u64 jiffies_64;
u64 jiffies_64 = INITIAL_JIFFIES;
unsigned long disarm_decr[NR_CPUS];
......
......@@ -65,7 +65,7 @@
void smp_local_timer_interrupt(struct pt_regs *);
u64 jiffies_64;
u64 jiffies_64 = INITIAL_JIFFIES;
/* keep track of when we need to update the rtc */
time_t last_rtc_update;
......
......@@ -46,7 +46,7 @@
#define TICK_SIZE tick
u64 jiffies_64;
u64 jiffies_64 = INITIAL_JIFFIES;
static ext_int_info_t ext_int_info_timer;
static uint64_t xtime_cc;
......
......@@ -45,7 +45,7 @@
#define TICK_SIZE tick
u64 jiffies_64;
u64 jiffies_64 = INITIAL_JIFFIES;
static ext_int_info_t ext_int_info_timer;
static uint64_t xtime_cc;
......
......@@ -70,7 +70,7 @@
#endif /* CONFIG_CPU_SUBTYPE_ST40STB1 */
#endif /* __sh3__ or __SH4__ */
u64 jiffies_64;
u64 jiffies_64 = INITIAL_JIFFIES;
extern unsigned long wall_jiffies;
#define TICK_SIZE tick
......
......@@ -45,7 +45,7 @@
extern unsigned long wall_jiffies;
u64 jiffies_64;
u64 jiffies_64 = INITIAL_JIFFIES;
spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
enum sparc_clock_type sp_clock_typ;
......
......@@ -47,7 +47,7 @@ unsigned long ds1287_regs = 0UL;
extern unsigned long wall_jiffies;
u64 jiffies_64;
u64 jiffies_64 = INITIAL_JIFFIES;
static unsigned long mstk48t08_regs = 0UL;
static unsigned long mstk48t59_regs = 0UL;
......
......@@ -25,6 +25,7 @@ spinlock_t htlbpage_lock = SPIN_LOCK_UNLOCKED;
extern long htlbpagemem;
static void zap_hugetlb_resources(struct vm_area_struct *);
void free_huge_page(struct page *page);
#define MAX_ID 32
struct htlbpagekey {
......@@ -64,6 +65,7 @@ static struct page *alloc_hugetlb_page(void)
spin_unlock(&htlbpage_lock);
set_page_count(page, 1);
page->lru.prev = (void *)free_huge_page;
memset(page_address(page), 0, HPAGE_SIZE);
return page;
......
......@@ -25,7 +25,7 @@
#include "mach.h"
u64 jiffies_64;
u64 jiffies_64 = INITIAL_JIFFIES;
#define TICK_SIZE (tick_nsec / 1000)
......
......@@ -437,6 +437,7 @@ static __init int init_k8_gatt(agp_kern_info *info)
}
flush_gart();
global_flush_tlb();
printk("PCI-DMA: aperture base @ %x size %u KB\n", aper_base, aper_size>>10);
return 0;
......
......@@ -30,7 +30,7 @@
#include <asm/apic.h>
#endif
u64 jiffies_64;
u64 jiffies_64;
extern int using_apic_timer;
......
......@@ -205,6 +205,7 @@ void *ioremap_nocache (unsigned long phys_addr, unsigned long size)
iounmap(p);
p = NULL;
}
global_flush_tlb();
}
return p;
......@@ -226,6 +227,7 @@ void iounmap(void *addr)
change_page_attr(virt_to_page(__va(p->phys_addr)),
p->size >> PAGE_SHIFT,
PAGE_KERNEL);
global_flush_tlb();
}
kfree(p);
}
/*
* Code extracted from
* linux/kernel/hd.c
*
* Copyright (C) 1991-1998 Linus Torvalds
*
* devfs support - jj, rgooch, 980122
*
* Moved partition checking code to fs/partitions* - Russell King
* (linux@arm.uk.linux.org)
*/
/*
* TODO: rip out the remaining init crap from this file --hch
* gendisk handling
*/
#include <linux/config.h>
......@@ -29,8 +17,9 @@
static struct subsystem block_subsys;
#define MAX_PROBE_HASH 23 /* random */
struct blk_probe {
static struct blk_probe {
struct blk_probe *next;
dev_t dev;
unsigned long range;
......@@ -38,21 +27,27 @@ struct blk_probe {
struct gendisk *(*get)(dev_t dev, int *part, void *data);
int (*lock)(dev_t, void *);
void *data;
} *probes[MAX_BLKDEV];
} *probes[MAX_PROBE_HASH];
/* index in the above */
/* index in the above - for now: assume no multimajor ranges */
static inline int dev_to_index(dev_t dev)
{
return MAJOR(dev);
return MAJOR(dev) % MAX_PROBE_HASH;
}
/*
* Register device numbers dev..(dev+range-1)
* range must be nonzero
* The hash chain is sorted on range, so that subranges can override.
*/
void blk_register_region(dev_t dev, unsigned long range, struct module *module,
struct gendisk *(*probe)(dev_t, int *, void *),
int (*lock)(dev_t, void *), void *data)
struct gendisk *(*probe)(dev_t, int *, void *),
int (*lock)(dev_t, void *), void *data)
{
int index = dev_to_index(dev);
struct blk_probe *p = kmalloc(sizeof(struct blk_probe), GFP_KERNEL);
struct blk_probe **s;
p->owner = module;
p->get = probe;
p->lock = lock;
......@@ -71,6 +66,7 @@ void blk_unregister_region(dev_t dev, unsigned long range)
{
int index = dev_to_index(dev);
struct blk_probe **s;
down_write(&block_subsys.rwsem);
for (s = &probes[index]; *s; s = &(*s)->next) {
struct blk_probe *p = *s;
......@@ -94,6 +90,7 @@ static struct gendisk *exact_match(dev_t dev, int *part, void *data)
static int exact_lock(dev_t dev, void *data)
{
struct gendisk *p = data;
if (!get_disk(p))
return -1;
return 0;
......@@ -109,14 +106,14 @@ static int exact_lock(dev_t dev, void *data)
void add_disk(struct gendisk *disk)
{
disk->flags |= GENHD_FL_UP;
blk_register_region(MKDEV(disk->major, disk->first_minor), disk->minors,
NULL, exact_match, exact_lock, disk);
blk_register_region(MKDEV(disk->major, disk->first_minor),
disk->minors, NULL, exact_match, exact_lock, disk);
register_disk(disk);
elv_register_queue(disk);
}
EXPORT_SYMBOL(add_disk);
EXPORT_SYMBOL(del_gendisk);
EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */
void unlink_gendisk(struct gendisk *disk)
{
......@@ -146,18 +143,17 @@ get_gendisk(dev_t dev, int *part)
struct gendisk *(*probe)(dev_t, int *, void *);
struct module *owner;
void *data;
if (p->dev > dev || p->dev + p->range <= dev)
if (p->dev > dev || p->dev + p->range - 1 < dev)
continue;
if (p->range >= best) {
up_read(&block_subsys.rwsem);
return NULL;
}
if (p->range - 1 >= best)
break;
if (!try_module_get(p->owner))
continue;
owner = p->owner;
data = p->data;
probe = p->get;
best = p->range;
best = p->range - 1;
*part = dev - p->dev;
if (p->lock && p->lock(dev, data) < 0) {
module_put(owner);
......@@ -169,7 +165,7 @@ get_gendisk(dev_t dev, int *part)
module_put(owner);
if (disk)
return disk;
goto retry;
goto retry; /* this terminates: best decreases */
}
up_read(&block_subsys.rwsem);
return NULL;
......@@ -245,7 +241,7 @@ extern int blk_dev_init(void);
static struct gendisk *base_probe(dev_t dev, int *part, void *data)
{
char name[20];
char name[30];
sprintf(name, "block-major-%d", MAJOR(dev));
request_module(name);
return NULL;
......@@ -256,11 +252,11 @@ int __init device_init(void)
struct blk_probe *base = kmalloc(sizeof(struct blk_probe), GFP_KERNEL);
int i;
memset(base, 0, sizeof(struct blk_probe));
base->dev = MKDEV(1,0);
base->range = MKDEV(MAX_BLKDEV-1, 255) - base->dev + 1;
base->dev = 1;
base->range = ~0; /* range 1 .. ~0 */
base->get = base_probe;
for (i = 1; i < MAX_BLKDEV; i++)
probes[i] = base;
for (i = 0; i < MAX_PROBE_HASH; i++)
probes[i] = base; /* must remain last in chain */
blk_dev_init();
subsystem_register(&block_subsys);
return 0;
......@@ -281,12 +277,14 @@ struct disk_attribute {
ssize_t (*show)(struct gendisk *, char *);
};
static ssize_t disk_attr_show(struct kobject * kobj, struct attribute * attr,
char * page)
static ssize_t disk_attr_show(struct kobject *kobj, struct attribute *attr,
char *page)
{
struct gendisk * disk = to_disk(kobj);
struct disk_attribute * disk_attr = container_of(attr,struct disk_attribute,attr);
struct gendisk *disk = to_disk(kobj);
struct disk_attribute *disk_attr =
container_of(attr,struct disk_attribute,attr);
ssize_t ret = 0;
if (disk_attr->show)
ret = disk_attr->show(disk,page);
return ret;
......@@ -303,11 +301,11 @@ static ssize_t disk_dev_read(struct gendisk * disk, char *page)
}
static ssize_t disk_range_read(struct gendisk * disk, char *page)
{
return sprintf(page, "%d\n",disk->minors);
return sprintf(page, "%d\n", disk->minors);
}
static ssize_t disk_size_read(struct gendisk * disk, char *page)
{
return sprintf(page, "%llu\n",(unsigned long long)get_capacity(disk));
return sprintf(page, "%llu\n", (unsigned long long)get_capacity(disk));
}
static inline unsigned jiffies_to_msec(unsigned jif)
......
......@@ -1461,6 +1461,7 @@ void blk_insert_request(request_queue_t *q, struct request *rq,
if (blk_rq_tagged(rq))
blk_queue_end_tag(q, rq);
drive_stat_acct(rq, rq->nr_sectors, 1);
__elv_add_request(q, rq, !at_head, 0);
q->request_fn(q);
spin_unlock_irqrestore(q->queue_lock, flags);
......
......@@ -60,6 +60,7 @@ int blk_do_rq(request_queue_t *q, struct block_device *bdev, struct request *rq)
rq->flags |= REQ_NOMERGE;
rq->waiting = &wait;
drive_stat_acct(rq, rq->nr_sectors, 1);
elv_add_request(q, rq, 1, 1);
generic_unplug_device(q);
wait_for_completion(&wait);
......
......@@ -119,7 +119,7 @@ static ctl_table raid_root_table[] = {
.procname = "dev",
.maxlen = 0,
.mode = 0555,
.proc_handler = raid_dir_table,
.child = raid_dir_table,
},
{ .ctl_name = 0 }
};
......
......@@ -151,7 +151,7 @@ static int irda_thread(void *startup)
while (irda_rq_queue.thread != NULL) {
set_task_state(current, TASK_UNINTERRUPTIBLE);
set_task_state(current, TASK_INTERRUPTIBLE);
add_wait_queue(&irda_rq_queue.kick, &wait);
if (list_empty(&irda_rq_queue.request_list))
schedule();
......
......@@ -1004,8 +1004,11 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
*/
if (unlikely(move_count != dentry->d_move_count))
break;
if (!d_unhashed(dentry))
found = dget(dentry);
if (!d_unhashed(dentry)) {
atomic_inc(&dentry->d_count);
dentry->d_vfs_flags |= DCACHE_REFERENCED;
found = dentry;
}
spin_unlock(&dentry->d_lock);
break;
}
......
......@@ -33,12 +33,17 @@ static unsigned char ext3_filetype_table[] = {
static int ext3_readdir(struct file *, void *, filldir_t);
static int ext3_dx_readdir(struct file * filp,
void * dirent, filldir_t filldir);
static int ext3_release_dir (struct inode * inode,
struct file * filp);
struct file_operations ext3_dir_operations = {
.read = generic_read_dir,
.readdir = ext3_readdir, /* we take BKL. needed?*/
.ioctl = ext3_ioctl, /* BKL held */
.fsync = ext3_sync_file, /* BKL held */
#ifdef CONFIG_EXT3_INDEX
.release = ext3_release_dir,
#endif
};
......@@ -275,7 +280,11 @@ static void free_rb_tree_fname(struct rb_root *root)
*/
parent = n->rb_parent;
fname = rb_entry(n, struct fname, rb_hash);
kfree(fname);
while (fname) {
struct fname * old = fname;
fname = fname->next;
kfree (old);
}
if (!parent)
root->rb_node = 0;
else if (parent->rb_left == n)
......@@ -481,4 +490,13 @@ static int ext3_dx_readdir(struct file * filp,
UPDATE_ATIME(inode);
return 0;
}
static int ext3_release_dir (struct inode * inode, struct file * filp)
{
if (is_dx(inode) && filp->private_data)
ext3_htree_free_dir_info(filp->private_data);
return 0;
}
#endif
......@@ -55,29 +55,61 @@ static int ext3_open_file (struct inode * inode, struct file * filp)
return 0;
}
/*
* ext3_file_write().
*
* Most things are done in ext3_prepare_write() and ext3_commit_write().
*/
static ssize_t
ext3_file_write(struct kiocb *iocb, const char *buf, size_t count, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_dentry->d_inode;
int ret, err;
ret = generic_file_aio_write(iocb, buf, count, pos);
/*
* Nasty: if the file is subject to synchronous writes then we need
* to force generic_osync_inode() to call ext3_write_inode().
* We do that by marking the inode dirty. This adds much more
* computational expense than we need, but we're going to sync
* anyway.
* Skip flushing if there was an error, or if nothing was written.
*/
if (ret <= 0)
return ret;
/*
* If the inode is IS_SYNC, or is O_SYNC and we are doing data
* journalling then we need to make sure that we force the transaction
* to disk to keep all metadata uptodate synchronously.
*/
if (IS_SYNC(inode) || (file->f_flags & O_SYNC))
mark_inode_dirty(inode);
if (file->f_flags & O_SYNC) {
/*
* If we are non-data-journaled, then the dirty data has
* already been flushed to backing store by generic_osync_inode,
* and the inode has been flushed too if there have been any
* modifications other than mere timestamp updates.
*
* Open question --- do we care about flushing timestamps too
* if the inode is IS_SYNC?
*/
if (!ext3_should_journal_data(inode))
return ret;
goto force_commit;
}
return generic_file_aio_write(iocb, buf, count, pos);
/*
* So we know that there has been no forced data flush. If the inode
* is marked IS_SYNC, we need to force one ourselves.
*/
if (!IS_SYNC(inode))
return ret;
/*
* Open question #2 --- should we force data to disk here too? If we
* don't, the only impact is that data=writeback filesystems won't
* flush data to disk automatically on IS_SYNC, only metadata (but
* historically, that is what ext2 has done.)
*/
force_commit:
err = ext3_force_commit(inode->i_sb);
if (err)
return err;
return ret;
}
struct file_operations ext3_file_operations = {
......
......@@ -80,6 +80,16 @@ static LIST_HEAD(anon_hash_chain); /* for inodes with NULL i_sb */
*/
spinlock_t inode_lock = SPIN_LOCK_UNLOCKED;
/*
* iprune_sem provides exclusion between the kswapd or try_to_free_pages
* icache shrinking path, and the umount path. Without this exclusion,
* by the time prune_icache calls iput for the inode whose pages it has
* been invalidating, or by the time it calls clear_inode & destroy_inode
* from its final dispose_list, the struct super_block they refer to
* (for inode->i_sb->s_op) may already have been freed and reused.
*/
static DECLARE_MUTEX(iprune_sem);
/*
* Statistics gathering..
*/
......@@ -320,6 +330,7 @@ int invalidate_inodes(struct super_block * sb)
int busy;
LIST_HEAD(throw_away);
down(&iprune_sem);
spin_lock(&inode_lock);
busy = invalidate_list(&inode_in_use, sb, &throw_away);
busy |= invalidate_list(&inode_unused, sb, &throw_away);
......@@ -328,6 +339,7 @@ int invalidate_inodes(struct super_block * sb)
spin_unlock(&inode_lock);
dispose_list(&throw_away);
up(&iprune_sem);
return busy;
}
......@@ -395,6 +407,7 @@ static void prune_icache(int nr_to_scan)
int nr_scanned;
unsigned long reap = 0;
down(&iprune_sem);
spin_lock(&inode_lock);
for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
struct inode *inode;
......@@ -429,7 +442,10 @@ static void prune_icache(int nr_to_scan)
}
inodes_stat.nr_unused -= nr_pruned;
spin_unlock(&inode_lock);
dispose_list(&freeable);
up(&iprune_sem);
if (current_is_kswapd)
mod_page_state(kswapd_inodesteal, reap);
else
......
......@@ -358,7 +358,8 @@ int proc_pid_stat(struct task_struct *task, char * buffer)
nice,
0UL /* removed */,
jiffies_to_clock_t(task->it_real_value),
(unsigned long long) jiffies_64_to_clock_t(task->start_time),
(unsigned long long)
jiffies_64_to_clock_t(task->start_time - INITIAL_JIFFIES),
vsize,
mm ? mm->rss : 0, /* you might want to shift this left 3 */
task->rlim[RLIMIT_RSS].rlim_cur,
......
......@@ -104,7 +104,7 @@ static int uptime_read_proc(char *page, char **start, off_t off,
unsigned long uptime_remainder;
int len;
uptime = get_jiffies_64();
uptime = get_jiffies_64() - INITIAL_JIFFIES;
uptime_remainder = (unsigned long) do_div(uptime, HZ);
#if HZ!=100
......@@ -320,7 +320,7 @@ static int kstat_read_proc(char *page, char **start, off_t off,
{
int i, len;
extern unsigned long total_forks;
u64 jif = get_jiffies_64();
u64 jif = get_jiffies_64() - INITIAL_JIFFIES;
unsigned int sum = 0, user = 0, nice = 0, system = 0, idle = 0, iowait = 0;
for (i = 0 ; i < NR_CPUS; i++) {
......
......@@ -10,7 +10,7 @@
((phys_apic) & (~0xf)) )
#endif
#define no_balance_irq (1)
#define NO_BALANCE_IRQ (1)
#define esr_disable (1)
static inline int apic_id_registered(void)
......
......@@ -9,7 +9,7 @@
#define TARGET_CPUS 0x01
#endif
#define no_balance_irq (0)
#define NO_BALANCE_IRQ (0)
#define esr_disable (0)
#define INT_DELIVERY_MODE dest_LowestPrio
......
......@@ -5,7 +5,7 @@
#define TARGET_CPUS (0xf)
#define no_balance_irq (1)
#define NO_BALANCE_IRQ (1)
#define esr_disable (1)
#define INT_DELIVERY_MODE dest_LowestPrio
......
......@@ -4,7 +4,7 @@
extern int x86_summit;
#define esr_disable (x86_summit ? 1 : 0)
#define no_balance_irq (0)
#define NO_BALANCE_IRQ (0)
#define XAPIC_DEST_CPUS_MASK 0x0Fu
#define XAPIC_DEST_CLUSTER_MASK 0xF0u
......
......@@ -12,6 +12,8 @@
#ifdef CONFIG_X86_NUMAQ
#include <asm/numaq.h>
#elif CONFIG_X86_SUMMIT
#include <asm/srat.h>
#else
#define pfn_to_nid(pfn) (0)
#endif /* CONFIG_X86_NUMAQ */
......
......@@ -168,6 +168,10 @@ struct sys_cfg_data {
struct eachquadmem eq[MAX_NUMNODES]; /* indexed by quad id */
};
static inline unsigned long get_zholes_size(int nid)
{
return 0;
}
#endif /* CONFIG_X86_NUMAQ */
#endif /* NUMAQ_H */
......@@ -5,6 +5,8 @@
#ifdef CONFIG_X86_NUMAQ
#include <asm/numaq.h>
#elif CONFIG_X86_SUMMIT
#include <asm/srat.h>
#else
#define MAX_NUMNODES 1
#endif /* CONFIG_X86_NUMAQ */
......
......@@ -49,6 +49,7 @@ typedef struct { unsigned long long pgd; } pgd_t;
typedef struct { unsigned long pte_low; } pte_t;
typedef struct { unsigned long pmd; } pmd_t;
typedef struct { unsigned long pgd; } pgd_t;
#define boot_pte_t pte_t /* or would you rather have a typedef */
#define pte_val(x) ((x).pte_low)
#define HPAGE_SHIFT 22
#endif
......
/*
* Some of the code in this file has been gleaned from the 64 bit
* discontigmem support code base.
*
* Copyright (C) 2002, IBM Corp.
*
* All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Send feedback to Pat Gaughen <gone@us.ibm.com>
*/
#ifndef _ASM_SRAT_H_
#define _ASM_SRAT_H_
/*
* each element in pfnnode_map represents 256 MB (2^28) of pages.
* so, to represent 64GB we need 256 elements.
*/
#define MAX_ELEMENTS 256
#define PFN_TO_ELEMENT(pfn) ((pfn)>>(28 - PAGE_SHIFT))
extern int pfnnode_map[];
#define pfn_to_nid(pfn) ({ pfnnode_map[PFN_TO_ELEMENT(pfn)]; })
#define pfn_to_pgdat(pfn) NODE_DATA(pfn_to_nid(pfn))
#define PHYSADDR_TO_NID(pa) pfn_to_nid(pa >> PAGE_SHIFT)
#define MAX_NUMNODES 8
extern void get_memcfg_from_srat(void);
extern unsigned long *get_zholes_size(int);
#define get_memcfg_numa() get_memcfg_from_srat()
#endif /* _ASM_SRAT_H_ */
......@@ -79,7 +79,7 @@ typedef struct {
struct acpi_table_rsdt {
struct acpi_table_header header;
u32 entry[1];
u32 entry[8];
} __attribute__ ((packed));
/* Extended System Description Table (XSDT) */
......
......@@ -262,6 +262,8 @@ extern char * d_path(struct dentry *, struct vfsmount *, char *, int);
static __inline__ struct dentry * dget(struct dentry *dentry)
{
if (dentry) {
if (!atomic_read(&dentry->d_count))
BUG();
atomic_inc(&dentry->d_count);
dentry->d_vfs_flags |= DCACHE_REFERENCED;
}
......
......@@ -232,11 +232,15 @@ static inline void get_page(struct page *page)
static inline void put_page(struct page *page)
{
if (PageCompound(page)) {
page = (struct page *)page->lru.next;
if (page->lru.prev) { /* destructor? */
(*(void (*)(struct page *))page->lru.prev)(page);
return;
if (put_page_testzero(page)) {
page = (struct page *)page->lru.next;
if (page->lru.prev) { /* destructor? */
(*(void (*)(struct page *))page->lru.prev)(page);
} else {
__page_cache_release(page);
}
}
return;
}
if (!PageReserved(page) && put_page_testzero(page))
__page_cache_release(page);
......
......@@ -27,6 +27,12 @@ struct timezone {
#include <linux/spinlock.h>
#include <linux/seqlock.h>
/*
* Have the 32 bit jiffies value wrap 5 minutes after boot
* so jiffies wrap bugs show up earlier.
*/
#define INITIAL_JIFFIES ((unsigned int) (-300*HZ))
/*
* Change timeval to jiffies, trying to avoid the
* most obvious overflows..
......
......@@ -870,7 +870,7 @@ asmlinkage long sys_times(struct tms * tbuf)
if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
return -EFAULT;
}
return jiffies_to_clock_t(jiffies);
return (long) jiffies_64_to_clock_t(get_jiffies_64());
}
/*
......
......@@ -757,7 +757,7 @@ static inline void calc_load(unsigned long ticks)
}
/* jiffies at the most recent update of wall time */
unsigned long wall_jiffies;
unsigned long wall_jiffies = INITIAL_JIFFIES;
/*
* This read-write spinlock protects us from races in SMP while
......@@ -1104,7 +1104,7 @@ asmlinkage long sys_sysinfo(struct sysinfo *info)
do {
seq = read_seqbegin(&xtime_lock);
uptime = jiffies_64;
uptime = jiffies_64 - INITIAL_JIFFIES;
do_div(uptime, HZ);
val.uptime = (unsigned long) uptime;
......@@ -1180,6 +1180,13 @@ static void __devinit init_timers_cpu(int cpu)
}
for (j = 0; j < TVR_SIZE; j++)
INIT_LIST_HEAD(base->tv1.vec + j);
base->timer_jiffies = INITIAL_JIFFIES;
base->tv1.index = INITIAL_JIFFIES & TVR_MASK;
base->tv2.index = (INITIAL_JIFFIES >> TVR_BITS) & TVN_MASK;
base->tv3.index = (INITIAL_JIFFIES >> (TVR_BITS+TVN_BITS)) & TVN_MASK;
base->tv4.index = (INITIAL_JIFFIES >> (TVR_BITS+2*TVN_BITS)) & TVN_MASK;
base->tv5.index = (INITIAL_JIFFIES >> (TVR_BITS+3*TVN_BITS)) & TVN_MASK;
}
static int __devinit timer_cpu_notify(struct notifier_block *self,
......
......@@ -90,19 +90,16 @@ u32 attribute((pure)) crc32_le(u32 crc, unsigned char const *p, size_t len)
const u32 *tab = crc32table_le;
# ifdef __LITTLE_ENDIAN
# define DO_CRC crc = (crc>>8) ^ tab[ crc & 255 ]
# define ENDIAN_SHIFT 0
# define DO_CRC(x) crc = tab[ (crc ^ (x)) & 255 ] ^ (crc>>8)
# else
# define DO_CRC crc = (crc<<8) ^ tab[ crc >> 24 ]
# define ENDIAN_SHIFT 24
# define DO_CRC(x) crc = tab[ ((crc >> 24) ^ (x)) & 255] ^ (crc<<8)
# endif
crc = __cpu_to_le32(crc);
/* Align it */
if(unlikely(((long)b)&3 && len)){
do {
crc ^= *((u8 *)b)++ << ENDIAN_SHIFT;
DO_CRC;
DO_CRC(*((u8 *)b)++);
} while ((--len) && ((long)b)&3 );
}
if(likely(len >= 4)){
......@@ -112,10 +109,10 @@ u32 attribute((pure)) crc32_le(u32 crc, unsigned char const *p, size_t len)
--b; /* use pre increment below(*++b) for speed */
do {
crc ^= *++b;
DO_CRC;
DO_CRC;
DO_CRC;
DO_CRC;
DO_CRC(0);
DO_CRC(0);
DO_CRC(0);
DO_CRC(0);
} while (--len);
b++; /* point to next byte(s) */
len = save_len;
......@@ -123,8 +120,7 @@ u32 attribute((pure)) crc32_le(u32 crc, unsigned char const *p, size_t len)
/* And the last few bytes */
if(len){
do {
crc ^= *((u8 *)b)++ << ENDIAN_SHIFT;
DO_CRC;
DO_CRC(*((u8 *)b)++);
} while (--len);
}
......@@ -195,19 +191,16 @@ u32 attribute((pure)) crc32_be(u32 crc, unsigned char const *p, size_t len)
const u32 *tab = crc32table_be;
# ifdef __LITTLE_ENDIAN
# define DO_CRC crc = (crc>>8) ^ tab[ crc & 255 ]
# define ENDIAN_SHIFT 24
# define DO_CRC(x) crc = tab[ (crc ^ (x)) & 255 ] ^ (crc>>8)
# else
# define DO_CRC crc = (crc<<8) ^ tab[ crc >> 24 ]
# define ENDIAN_SHIFT 0
# define DO_CRC(x) crc = tab[ ((crc >> 24) ^ (x)) & 255] ^ (crc<<8)
# endif
crc = __cpu_to_be32(crc);
/* Align it */
if(unlikely(((long)b)&3 && len)){
do {
crc ^= *((u8 *)b)++ << ENDIAN_SHIFT;
DO_CRC;
DO_CRC(*((u8 *)b)++);
} while ((--len) && ((long)b)&3 );
}
if(likely(len >= 4)){
......@@ -217,10 +210,10 @@ u32 attribute((pure)) crc32_be(u32 crc, unsigned char const *p, size_t len)
--b; /* use pre increment below(*++b) for speed */
do {
crc ^= *++b;
DO_CRC;
DO_CRC;
DO_CRC;
DO_CRC;
DO_CRC(0);
DO_CRC(0);
DO_CRC(0);
DO_CRC(0);
} while (--len);
b++; /* point to next byte(s) */
len = save_len;
......@@ -228,8 +221,7 @@ u32 attribute((pure)) crc32_be(u32 crc, unsigned char const *p, size_t len)
/* And the last few bytes */
if(len){
do {
crc ^= *((u8 *)b)++ << ENDIAN_SHIFT;
DO_CRC;
DO_CRC(*((u8 *)b)++);
} while (--len);
}
return __be32_to_cpu(crc);
......
......@@ -8,8 +8,12 @@
/* How many bits at a time to use. Requires a table of 4<<CRC_xx_BITS bytes. */
/* For less performance-sensitive, use 4 */
#define CRC_LE_BITS 8
#define CRC_BE_BITS 8
#ifndef CRC_LE_BITS
# define CRC_LE_BITS 8
#endif
#ifndef CRC_BE_BITS
# define CRC_BE_BITS 8
#endif
/*
* Little-endian CRC computation. Used with serial bit streams sent
......
......@@ -559,21 +559,12 @@ void do_generic_mapping_read(struct address_space *mapping,
page_cache_readahead(mapping, ra, filp, index);
nr = nr - offset;
/*
* Try to find the data in the page cache..
*/
find_page:
read_lock(&mapping->page_lock);
page = radix_tree_lookup(&mapping->page_tree, index);
if (!page) {
read_unlock(&mapping->page_lock);
handle_ra_miss(mapping,ra);
page = find_get_page(mapping, index);
if (unlikely(page == NULL)) {
handle_ra_miss(mapping, ra);
goto no_cached_page;
}
page_cache_get(page);
read_unlock(&mapping->page_lock);
if (!PageUptodate(page))
goto page_not_up_to_date;
page_ok:
......
......@@ -158,9 +158,7 @@ pte_t * pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
pmd_populate(mm, pmd, new);
}
out:
if (pmd_present(*pmd))
return pte_offset_map(pmd, address);
return NULL;
return pte_offset_map(pmd, address);
}
pte_t * pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
......
......@@ -61,6 +61,9 @@ static int badness(struct task_struct *p)
if (!p->mm)
return 0;
if (p->flags & PF_MEMDIE)
return 0;
/*
* The memory size of the process is the basis for the badness.
*/
......
......@@ -1643,7 +1643,7 @@ cache_alloc_debugcheck_after(kmem_cache_t *cachep,
if (cachep->ctor && cachep->flags & SLAB_POISON) {
unsigned long ctor_flags = SLAB_CTOR_CONSTRUCTOR;
if (!flags & __GFP_WAIT)
if (!(flags & __GFP_WAIT))
ctor_flags |= SLAB_CTOR_ATOMIC;
cachep->ctor(objp, cachep, ctor_flags);
......@@ -2064,7 +2064,7 @@ static void enable_cpucache (kmem_cache_t *cachep)
else
limit = 248;
#ifndef DEBUG
#if DEBUG
/* With debugging enabled, large batchcount lead to excessively
* long periods with disabled local interrupts. Limit the
* batchcount
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment