Commit 6199faa1 authored by Linus Torvalds's avatar Linus Torvalds

Merge http://lia64.bkbits.net/to-linus-2.5

into home.osdl.org:/home/torvalds/v2.5/linux
parents 6031e508 c2a5f235
...@@ -220,24 +220,8 @@ config NUMA ...@@ -220,24 +220,8 @@ config NUMA
Access). This option is for configuring high-end multiprocessor Access). This option is for configuring high-end multiprocessor
server systems. If in doubt, say N. server systems. If in doubt, say N.
choice
prompt "Maximum Memory per NUMA Node" if NUMA && IA64_DIG
depends on NUMA && IA64_DIG
default IA64_NODESIZE_16GB
config IA64_NODESIZE_16GB
bool "16GB"
config IA64_NODESIZE_64GB
bool "64GB"
config IA64_NODESIZE_256GB
bool "256GB"
endchoice
config DISCONTIGMEM config DISCONTIGMEM
bool "Discontiguous memory support" if (IA64_DIG || IA64_SGI_SN2 || IA64_GENERIC) && NUMA bool "Discontiguous memory support" if (IA64_DIG || IA64_SGI_SN2 || IA64_GENERIC) && NUMA && VIRTUAL_MEM_MAP
default y if (IA64_SGI_SN2 || IA64_GENERIC) && NUMA default y if (IA64_SGI_SN2 || IA64_GENERIC) && NUMA
help help
Say Y to support efficient handling of discontiguous physical memory, Say Y to support efficient handling of discontiguous physical memory,
...@@ -250,14 +234,10 @@ config VIRTUAL_MEM_MAP ...@@ -250,14 +234,10 @@ config VIRTUAL_MEM_MAP
default y if !IA64_HP_SIM default y if !IA64_HP_SIM
help help
Say Y to compile the kernel with support for a virtual mem map. Say Y to compile the kernel with support for a virtual mem map.
This is an alternate method of supporting large holes in the This code also only takes effect if a memory hole of greater than
physical address space on non NUMA machines. Since the DISCONTIGMEM 1 Gb is found during boot. You must turn this option on if you
option is not supported on machines with the ZX1 chipset, this is require the DISCONTIGMEM option for your machine. If you are
the only way of supporting more than 1 Gb of memory on those unsure, say Y.
machines. This code also only takes effect if a memory hole of
greater than 1 Gb is found during boot, so it is safe to enable
unless you require the DISCONTIGMEM option for your machine. If you
are unsure, say Y.
config IA64_MCA config IA64_MCA
bool "Enable IA-64 Machine Check Abort" bool "Enable IA-64 Machine Check Abort"
......
...@@ -64,7 +64,7 @@ core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/ ...@@ -64,7 +64,7 @@ core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/
drivers-$(CONFIG_PCI) += arch/ia64/pci/ drivers-$(CONFIG_PCI) += arch/ia64/pci/
drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim/ drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim/
drivers-$(CONFIG_IA64_HP_ZX1) += arch/ia64/hp/common/ arch/ia64/hp/zx1/ drivers-$(CONFIG_IA64_HP_ZX1) += arch/ia64/hp/common/ arch/ia64/hp/zx1/
drivers-$(CONFIG_IA64_GENERIC) += arch/ia64/hp/common/ arch/ia64/hp/zx1/ arch/ia64/hp/sim/ drivers-$(CONFIG_IA64_GENERIC) += arch/ia64/hp/common/ arch/ia64/hp/zx1/ arch/ia64/hp/sim/ arch/ia64/sn/
drivers-$(CONFIG_OPROFILE) += arch/ia64/oprofile/ drivers-$(CONFIG_OPROFILE) += arch/ia64/oprofile/
boot := arch/ia64/hp/sim/boot boot := arch/ia64/hp/sim/boot
......
...@@ -2486,11 +2486,14 @@ static int ...@@ -2486,11 +2486,14 @@ static int
putstat64 (struct stat64 *ubuf, struct kstat *kbuf) putstat64 (struct stat64 *ubuf, struct kstat *kbuf)
{ {
int err; int err;
u64 hdev;
if (clear_user(ubuf, sizeof(*ubuf))) if (clear_user(ubuf, sizeof(*ubuf)))
return -EFAULT; return -EFAULT;
err = __put_user(huge_encode_dev(kbuf->dev), &ubuf->st_dev); hdev = huge_encode_dev(kbuf->dev);
err = __put_user(hdev, (u32*)&ubuf->st_dev);
err |= __put_user(hdev >> 32, ((u32*)&ubuf->st_dev) + 1);
err |= __put_user(kbuf->ino, &ubuf->__st_ino); err |= __put_user(kbuf->ino, &ubuf->__st_ino);
err |= __put_user(kbuf->ino, &ubuf->st_ino_lo); err |= __put_user(kbuf->ino, &ubuf->st_ino_lo);
err |= __put_user(kbuf->ino >> 32, &ubuf->st_ino_hi); err |= __put_user(kbuf->ino >> 32, &ubuf->st_ino_hi);
...@@ -2498,7 +2501,9 @@ putstat64 (struct stat64 *ubuf, struct kstat *kbuf) ...@@ -2498,7 +2501,9 @@ putstat64 (struct stat64 *ubuf, struct kstat *kbuf)
err |= __put_user(kbuf->nlink, &ubuf->st_nlink); err |= __put_user(kbuf->nlink, &ubuf->st_nlink);
err |= __put_user(kbuf->uid, &ubuf->st_uid); err |= __put_user(kbuf->uid, &ubuf->st_uid);
err |= __put_user(kbuf->gid, &ubuf->st_gid); err |= __put_user(kbuf->gid, &ubuf->st_gid);
err |= __put_user(huge_encode_dev(kbuf->rdev), &ubuf->st_rdev); hdev = huge_encode_dev(kbuf->rdev);
err = __put_user(hdev, (u32*)&ubuf->st_rdev);
err |= __put_user(hdev >> 32, ((u32*)&ubuf->st_rdev) + 1);
err |= __put_user(kbuf->size, &ubuf->st_size_lo); err |= __put_user(kbuf->size, &ubuf->st_size_lo);
err |= __put_user((kbuf->size >> 32), &ubuf->st_size_hi); err |= __put_user((kbuf->size >> 32), &ubuf->st_size_hi);
err |= __put_user(kbuf->atime.tv_sec, &ubuf->st_atime); err |= __put_user(kbuf->atime.tv_sec, &ubuf->st_atime);
...@@ -2724,8 +2729,8 @@ sys32_open (const char * filename, int flags, int mode) ...@@ -2724,8 +2729,8 @@ sys32_open (const char * filename, int flags, int mode)
struct epoll_event32 struct epoll_event32
{ {
u32 events; u32 events;
u64 data; u32 data[2];
} __attribute__((packed)); };
asmlinkage long asmlinkage long
sys32_epoll_ctl(int epfd, int op, int fd, struct epoll_event32 *event) sys32_epoll_ctl(int epfd, int op, int fd, struct epoll_event32 *event)
...@@ -2740,10 +2745,10 @@ sys32_epoll_ctl(int epfd, int op, int fd, struct epoll_event32 *event) ...@@ -2740,10 +2745,10 @@ sys32_epoll_ctl(int epfd, int op, int fd, struct epoll_event32 *event)
return error; return error;
__get_user(event64.events, &event->events); __get_user(event64.events, &event->events);
__get_user(data_halfword, (u32*)(&event->data)); __get_user(data_halfword, &event->data[0]);
event64.data = data_halfword; event64.data = data_halfword;
__get_user(data_halfword, ((u32*)(&event->data) + 1)); __get_user(data_halfword, &event->data[1]);
event64.data |= ((u64)data_halfword) << 32; event64.data |= (u64)data_halfword << 32;
set_fs(KERNEL_DS); set_fs(KERNEL_DS);
error = sys_epoll_ctl(epfd, op, fd, &event64); error = sys_epoll_ctl(epfd, op, fd, &event64);
...@@ -2758,8 +2763,9 @@ sys32_epoll_wait(int epfd, struct epoll_event32 *events, int maxevents, ...@@ -2758,8 +2763,9 @@ sys32_epoll_wait(int epfd, struct epoll_event32 *events, int maxevents,
{ {
struct epoll_event *events64 = NULL; struct epoll_event *events64 = NULL;
mm_segment_t old_fs = get_fs(); mm_segment_t old_fs = get_fs();
int error; int error, numevents, size;
int evt_idx; int evt_idx;
int do_free_pages = 0;
if (maxevents <= 0) { if (maxevents <= 0) {
return -EINVAL; return -EINVAL;
...@@ -2770,43 +2776,45 @@ sys32_epoll_wait(int epfd, struct epoll_event32 *events, int maxevents, ...@@ -2770,43 +2776,45 @@ sys32_epoll_wait(int epfd, struct epoll_event32 *events, int maxevents,
maxevents * sizeof(struct epoll_event32)))) maxevents * sizeof(struct epoll_event32))))
return error; return error;
/* Allocate the space needed for the intermediate copy */ /*
events64 = kmalloc(maxevents * sizeof(struct epoll_event), GFP_KERNEL); * Allocate space for the intermediate copy. If the space needed
* is large enough to cause kmalloc to fail, then try again with
* __get_free_pages.
*/
size = maxevents * sizeof(struct epoll_event);
events64 = kmalloc(size, GFP_KERNEL);
if (events64 == NULL) { if (events64 == NULL) {
return -ENOMEM; events64 = (struct epoll_event *)
} __get_free_pages(GFP_KERNEL, get_order(size));
if (events64 == NULL)
/* Expand the 32-bit structures into the 64-bit structures */ return -ENOMEM;
for (evt_idx = 0; evt_idx < maxevents; evt_idx++) { do_free_pages = 1;
u32 data_halfword;
__get_user(events64[evt_idx].events, &events[evt_idx].events);
__get_user(data_halfword, (u32*)(&events[evt_idx].data));
events64[evt_idx].data = data_halfword;
__get_user(data_halfword, ((u32*)(&events[evt_idx].data) + 1));
events64[evt_idx].data |= ((u64)data_halfword) << 32;
} }
/* Do the system call */ /* Do the system call */
set_fs(KERNEL_DS); /* copy_to/from_user should work on kernel mem*/ set_fs(KERNEL_DS); /* copy_to/from_user should work on kernel mem*/
error = sys_epoll_wait(epfd, events64, maxevents, timeout); numevents = sys_epoll_wait(epfd, events64, maxevents, timeout);
set_fs(old_fs); set_fs(old_fs);
/* Don't modify userspace memory if we're returning an error */ /* Don't modify userspace memory if we're returning an error */
if (!error) { if (numevents > 0) {
/* Translate the 64-bit structures back into the 32-bit /* Translate the 64-bit structures back into the 32-bit
structures */ structures */
for (evt_idx = 0; evt_idx < maxevents; evt_idx++) { for (evt_idx = 0; evt_idx < numevents; evt_idx++) {
__put_user(events64[evt_idx].events, __put_user(events64[evt_idx].events,
&events[evt_idx].events); &events[evt_idx].events);
__put_user((u32)(events64[evt_idx].data), __put_user((u32)events64[evt_idx].data,
(u32*)(&events[evt_idx].data)); &events[evt_idx].data[0]);
__put_user((u32)(events64[evt_idx].data >> 32), __put_user((u32)(events64[evt_idx].data >> 32),
((u32*)(&events[evt_idx].data) + 1)); &events[evt_idx].data[1]);
} }
} }
kfree(events64); if (do_free_pages)
return error; free_pages((unsigned long) events64, get_order(size));
else
kfree(events64);
return numevents;
} }
#ifdef NOTYET /* UNTESTED FOR IA64 FROM HERE DOWN */ #ifdef NOTYET /* UNTESTED FOR IA64 FROM HERE DOWN */
......
...@@ -380,7 +380,7 @@ acpi_numa_processor_affinity_init (struct acpi_table_processor_affinity *pa) ...@@ -380,7 +380,7 @@ acpi_numa_processor_affinity_init (struct acpi_table_processor_affinity *pa)
void __init void __init
acpi_numa_memory_affinity_init (struct acpi_table_memory_affinity *ma) acpi_numa_memory_affinity_init (struct acpi_table_memory_affinity *ma)
{ {
unsigned long paddr, size, hole_size, min_hole_size; unsigned long paddr, size;
u8 pxm; u8 pxm;
struct node_memblk_s *p, *q, *pend; struct node_memblk_s *p, *q, *pend;
...@@ -402,34 +402,6 @@ acpi_numa_memory_affinity_init (struct acpi_table_memory_affinity *ma) ...@@ -402,34 +402,6 @@ acpi_numa_memory_affinity_init (struct acpi_table_memory_affinity *ma)
if (!ma->flags.enabled) if (!ma->flags.enabled)
return; return;
/*
* When the chunk is not the first one in the node, check distance
* from the other chunks. When the hole is too huge ignore the chunk.
* This restriction should be removed when multiple chunks per node
* is supported.
*/
pend = &node_memblk[num_memblks];
min_hole_size = 0;
for (p = &node_memblk[0]; p < pend; p++) {
if (p->nid != pxm)
continue;
if (p->start_paddr < paddr)
hole_size = paddr - (p->start_paddr + p->size);
else
hole_size = p->start_paddr - (paddr + size);
if (!min_hole_size || hole_size < min_hole_size)
min_hole_size = hole_size;
}
if (min_hole_size) {
if (min_hole_size > size) {
printk(KERN_ERR "Too huge memory hole. Ignoring %ld MBytes at %lx\n",
size/(1024*1024), paddr);
return;
}
}
/* record this node in proximity bitmap */ /* record this node in proximity bitmap */
pxm_bit_set(pxm); pxm_bit_set(pxm);
...@@ -454,6 +426,12 @@ acpi_numa_arch_fixup (void) ...@@ -454,6 +426,12 @@ acpi_numa_arch_fixup (void)
{ {
int i, j, node_from, node_to; int i, j, node_from, node_to;
/* If there's no SRAT, fix the phys_id */
if (srat_num_cpus == 0) {
node_cpuid[0].phys_id = hard_smp_processor_id();
return;
}
/* calculate total number of nodes in system from PXM bitmap */ /* calculate total number of nodes in system from PXM bitmap */
numnodes = 0; /* init total nodes in system */ numnodes = 0; /* init total nodes in system */
...@@ -614,6 +592,12 @@ acpi_boot_init (void) ...@@ -614,6 +592,12 @@ acpi_boot_init (void)
smp_build_cpu_map(); smp_build_cpu_map();
# ifdef CONFIG_NUMA # ifdef CONFIG_NUMA
if (srat_num_cpus == 0) {
int cpu, i = 1;
for (cpu = 0; cpu < smp_boot_data.cpu_count; cpu++)
if (smp_boot_data.cpu_phys_id[cpu] != hard_smp_processor_id())
node_cpuid[i++].phys_id = smp_boot_data.cpu_phys_id[cpu];
}
build_cpu_to_node_map(); build_cpu_to_node_map();
# endif # endif
#endif #endif
......
...@@ -265,15 +265,15 @@ cSaveCRs: ...@@ -265,15 +265,15 @@ cSaveCRs:
add r4=8,r2 // duplicate r2 in r4 add r4=8,r2 // duplicate r2 in r4
add r6=2*8,r2 // duplicate r2 in r4 add r6=2*8,r2 // duplicate r2 in r4
mov r3=cr0 // cr.dcr mov r3=cr.dcr
mov r5=cr1 // cr.itm mov r5=cr.itm
mov r7=cr2;; // cr.iva mov r7=cr.iva;;
st8 [r2]=r3,8*8 st8 [r2]=r3,8*8
st8 [r4]=r5,3*8 st8 [r4]=r5,3*8
st8 [r6]=r7,3*8;; // 48 byte rements st8 [r6]=r7,3*8;; // 48 byte rements
mov r3=cr8;; // cr.pta mov r3=cr.pta;;
st8 [r2]=r3,8*8;; // 64 byte rements st8 [r2]=r3,8*8;; // 64 byte rements
// if PSR.ic=0, reading interruption registers causes an illegal operation fault // if PSR.ic=0, reading interruption registers causes an illegal operation fault
...@@ -286,23 +286,23 @@ begin_skip_intr_regs: ...@@ -286,23 +286,23 @@ begin_skip_intr_regs:
add r4=8,r2 // duplicate r2 in r4 add r4=8,r2 // duplicate r2 in r4
add r6=2*8,r2 // duplicate r2 in r6 add r6=2*8,r2 // duplicate r2 in r6
mov r3=cr16 // cr.ipsr mov r3=cr.ipsr
mov r5=cr17 // cr.isr mov r5=cr.isr
mov r7=r0;; // cr.ida => cr18 (reserved) mov r7=r0;;
st8 [r2]=r3,3*8 st8 [r2]=r3,3*8
st8 [r4]=r5,3*8 st8 [r4]=r5,3*8
st8 [r6]=r7,3*8;; st8 [r6]=r7,3*8;;
mov r3=cr19 // cr.iip mov r3=cr.iip
mov r5=cr20 // cr.idtr mov r5=cr.ifa
mov r7=cr21;; // cr.iitr mov r7=cr.itir;;
st8 [r2]=r3,3*8 st8 [r2]=r3,3*8
st8 [r4]=r5,3*8 st8 [r4]=r5,3*8
st8 [r6]=r7,3*8;; st8 [r6]=r7,3*8;;
mov r3=cr22 // cr.iipa mov r3=cr.iipa
mov r5=cr23 // cr.ifs mov r5=cr.ifs
mov r7=cr24;; // cr.iim mov r7=cr.iim;;
st8 [r2]=r3,3*8 st8 [r2]=r3,3*8
st8 [r4]=r5,3*8 st8 [r4]=r5,3*8
st8 [r6]=r7,3*8;; st8 [r6]=r7,3*8;;
...@@ -311,104 +311,101 @@ begin_skip_intr_regs: ...@@ -311,104 +311,101 @@ begin_skip_intr_regs:
st8 [r2]=r3,160;; // 160 byte rement st8 [r2]=r3,160;; // 160 byte rement
SkipIntrRegs: SkipIntrRegs:
st8 [r2]=r0,168 // another 168 byte . st8 [r2]=r0,152;; // another 152 byte .
mov r3=cr66;; // cr.lid add r4=8,r2 // duplicate r2 in r4
st8 [r2]=r3,40 // 40 byte rement add r6=2*8,r2 // duplicate r2 in r6
mov r3=cr71;; // cr.ivr mov r3=cr.lid
st8 [r2]=r3,8 // mov r5=cr.ivr // cr.ivr, don't read it
mov r7=cr.tpr;;
mov r3=cr72;; // cr.tpr st8 [r2]=r3,3*8
st8 [r2]=r3,24 // 24 byte increment st8 [r4]=r5,3*8
st8 [r6]=r7,3*8;;
mov r3=r0;; // cr.eoi => cr75
st8 [r2]=r3,168 // 168 byte inc.
mov r3=r0;; // cr.irr0 => cr96
st8 [r2]=r3,16 // 16 byte inc.
mov r3=r0;; // cr.irr1 => cr98
st8 [r2]=r3,16 // 16 byte inc.
mov r3=r0;; // cr.irr2 => cr100 mov r3=r0 // cr.eoi => cr67
st8 [r2]=r3,16 // 16 byte inc mov r5=r0 // cr.irr0 => cr68
mov r7=r0;; // cr.irr1 => cr69
st8 [r2]=r3,3*8
st8 [r4]=r5,3*8
st8 [r6]=r7,3*8;;
mov r3=r0;; // cr.irr3 => cr100 mov r3=r0 // cr.irr2 => cr70
st8 [r2]=r3,16 // 16b inc. mov r5=r0 // cr.irr3 => cr71
mov r7=cr.itv;;
st8 [r2]=r3,3*8
st8 [r4]=r5,3*8
st8 [r6]=r7,3*8;;
mov r3=r0;; // cr.itv => cr114 mov r3=cr.pmv
st8 [r2]=r3,16 // 16 byte inc. mov r5=cr.cmcv;;
st8 [r2]=r3,7*8
st8 [r4]=r5,7*8;;
mov r3=r0;; // cr.pmv => cr116 mov r3=r0 // cr.lrr0 => cr80
st8 [r2]=r3,8 mov r5=r0;; // cr.lrr1 => cr81
st8 [r2]=r3,23*8
st8 [r4]=r5,23*8;;
mov r3=r0;; // cr.lrr0 => cr117 adds r2=25*8,r2;;
st8 [r2]=r3,8
mov r3=r0;; // cr.lrr1 => cr118
st8 [r2]=r3,8
mov r3=r0;; // cr.cmcv => cr119
st8 [r2]=r3,8*10;;
cSaveARs: cSaveARs:
// save ARs // save ARs
add r4=8,r2 // duplicate r2 in r4 add r4=8,r2 // duplicate r2 in r4
add r6=2*8,r2 // duplicate r2 in r6 add r6=2*8,r2 // duplicate r2 in r6
mov r3=ar0 // ar.kro mov r3=ar.k0
mov r5=ar1 // ar.kr1 mov r5=ar.k1
mov r7=ar2;; // ar.kr2 mov r7=ar.k2;;
st8 [r2]=r3,3*8 st8 [r2]=r3,3*8
st8 [r4]=r5,3*8 st8 [r4]=r5,3*8
st8 [r6]=r7,3*8;; st8 [r6]=r7,3*8;;
mov r3=ar3 // ar.kr3 mov r3=ar.k3
mov r5=ar4 // ar.kr4 mov r5=ar.k4
mov r7=ar5;; // ar.kr5 mov r7=ar.k5;;
st8 [r2]=r3,3*8 st8 [r2]=r3,3*8
st8 [r4]=r5,3*8 st8 [r4]=r5,3*8
st8 [r6]=r7,3*8;; st8 [r6]=r7,3*8;;
mov r3=ar6 // ar.kr6 mov r3=ar.k6
mov r5=ar7 // ar.kr7 mov r5=ar.k7
mov r7=r0;; // ar.kr8 mov r7=r0;; // ar.kr8
st8 [r2]=r3,10*8 st8 [r2]=r3,10*8
st8 [r4]=r5,10*8 st8 [r4]=r5,10*8
st8 [r6]=r7,10*8;; // rement by 72 bytes st8 [r6]=r7,10*8;; // rement by 72 bytes
mov r3=ar16 // ar.rsc mov r3=ar.rsc
mov ar16=r0 // put RSE in enforced lazy mode mov ar.rsc=r0 // put RSE in enforced lazy mode
mov r5=ar17 // ar.bsp mov r5=ar.bsp
;; ;;
mov r7=ar18;; // ar.bspstore mov r7=ar.bspstore;;
st8 [r2]=r3,3*8 st8 [r2]=r3,3*8
st8 [r4]=r5,3*8 st8 [r4]=r5,3*8
st8 [r6]=r7,3*8;; st8 [r6]=r7,3*8;;
mov r3=ar19;; // ar.rnat mov r3=ar.rnat;;
st8 [r2]=r3,8*13 // increment by 13x8 bytes st8 [r2]=r3,8*13 // increment by 13x8 bytes
mov r3=ar32;; // ar.ccv mov r3=ar.ccv;;
st8 [r2]=r3,8*4 st8 [r2]=r3,8*4
mov r3=ar36;; // ar.unat mov r3=ar.unat;;
st8 [r2]=r3,8*4 st8 [r2]=r3,8*4
mov r3=ar40;; // ar.fpsr mov r3=ar.fpsr;;
st8 [r2]=r3,8*4 st8 [r2]=r3,8*4
mov r3=ar44;; // ar.itc mov r3=ar.itc;;
st8 [r2]=r3,160 // 160 st8 [r2]=r3,160 // 160
mov r3=ar64;; // ar.pfs mov r3=ar.pfs;;
st8 [r2]=r3,8 st8 [r2]=r3,8
mov r3=ar65;; // ar.lc mov r3=ar.lc;;
st8 [r2]=r3,8 st8 [r2]=r3,8
mov r3=ar66;; // ar.ec mov r3=ar.ec;;
st8 [r2]=r3 st8 [r2]=r3
add r2=8*62,r2 //padding add r2=8*62,r2 //padding
...@@ -417,7 +414,8 @@ cSaveARs: ...@@ -417,7 +414,8 @@ cSaveARs:
movl r4=0x00;; movl r4=0x00;;
cStRR: cStRR:
mov r3=rr[r4];; dep.z r5=r4,61,3;;
mov r3=rr[r5];;
st8 [r2]=r3,8 st8 [r2]=r3,8
add r4=1,r4 add r4=1,r4
br.cloop.sptk.few cStRR br.cloop.sptk.few cStRR
...@@ -501,12 +499,12 @@ restore_CRs: ...@@ -501,12 +499,12 @@ restore_CRs:
ld8 r3=[r2],8*8 ld8 r3=[r2],8*8
ld8 r5=[r4],3*8 ld8 r5=[r4],3*8
ld8 r7=[r6],3*8;; // 48 byte increments ld8 r7=[r6],3*8;; // 48 byte increments
mov cr0=r3 // cr.dcr mov cr.dcr=r3
mov cr1=r5 // cr.itm mov cr.itm=r5
mov cr2=r7;; // cr.iva mov cr.iva=r7;;
ld8 r3=[r2],8*8;; // 64 byte increments ld8 r3=[r2],8*8;; // 64 byte increments
// mov cr8=r3 // cr.pta // mov cr.pta=r3
// if PSR.ic=1, reading interruption registers causes an illegal operation fault // if PSR.ic=1, reading interruption registers causes an illegal operation fault
...@@ -523,64 +521,66 @@ begin_rskip_intr_regs: ...@@ -523,64 +521,66 @@ begin_rskip_intr_regs:
ld8 r3=[r2],3*8 ld8 r3=[r2],3*8
ld8 r5=[r4],3*8 ld8 r5=[r4],3*8
ld8 r7=[r6],3*8;; ld8 r7=[r6],3*8;;
mov cr16=r3 // cr.ipsr mov cr.ipsr=r3
mov cr17=r5 // cr.isr is read only // mov cr.isr=r5 // cr.isr is read only
// mov cr18=r7;; // cr.ida (reserved - don't restore)
ld8 r3=[r2],3*8 ld8 r3=[r2],3*8
ld8 r5=[r4],3*8 ld8 r5=[r4],3*8
ld8 r7=[r6],3*8;; ld8 r7=[r6],3*8;;
mov cr19=r3 // cr.iip mov cr.iip=r3
mov cr20=r5 // cr.idtr mov cr.ifa=r5
mov cr21=r7;; // cr.iitr mov cr.itir=r7;;
ld8 r3=[r2],3*8 ld8 r3=[r2],3*8
ld8 r5=[r4],3*8 ld8 r5=[r4],3*8
ld8 r7=[r6],3*8;; ld8 r7=[r6],3*8;;
mov cr22=r3 // cr.iipa mov cr.iipa=r3
mov cr23=r5 // cr.ifs mov cr.ifs=r5
mov cr24=r7 // cr.iim mov cr.iim=r7
ld8 r3=[r2],160;; // 160 byte increment ld8 r3=[r2],160;; // 160 byte increment
mov cr25=r3 // cr.iha mov cr.iha=r3
rSkipIntrRegs: rSkipIntrRegs:
ld8 r3=[r2],168;; // another 168 byte inc. ld8 r3=[r2],152;; // another 152 byte inc.
ld8 r3=[r2],40;; // 40 byte increment add r4=8,r2 // duplicate r2 in r4
mov cr66=r3 // cr.lid add r6=2*8,r2;; // duplicate r2 in r6
ld8 r3=[r2],8;; ld8 r3=[r2],8*3
// mov cr71=r3 // cr.ivr is read only ld8 r5=[r4],8*3
ld8 r3=[r2],24;; // 24 byte increment ld8 r7=[r6],8*3;;
mov cr72=r3 // cr.tpr mov cr.lid=r3
// mov cr.ivr=r5 // cr.ivr is read only
ld8 r3=[r2],168;; // 168 byte inc. mov cr.tpr=r7;;
// mov cr75=r3 // cr.eoi
ld8 r3=[r2],8*3
ld8 r3=[r2],16;; // 16 byte inc. ld8 r5=[r4],8*3
// mov cr96=r3 // cr.irr0 is read only ld8 r7=[r6],8*3;;
// mov cr.eoi=r3
ld8 r3=[r2],16;; // 16 byte inc. // mov cr.irr0=r5 // cr.irr0 is read only
// mov cr98=r3 // cr.irr1 is read only // mov cr.irr1=r7;; // cr.irr1 is read only
ld8 r3=[r2],16;; // 16 byte inc ld8 r3=[r2],8*3
// mov cr100=r3 // cr.irr2 is read only ld8 r5=[r4],8*3
ld8 r7=[r6],8*3;;
ld8 r3=[r2],16;; // 16b inc. // mov cr.irr2=r3 // cr.irr2 is read only
// mov cr102=r3 // cr.irr3 is read only // mov cr.irr3=r5 // cr.irr3 is read only
mov cr.itv=r7;;
ld8 r3=[r2],16;; // 16 byte inc.
// mov cr114=r3 // cr.itv ld8 r3=[r2],8*7
ld8 r5=[r4],8*7;;
ld8 r3=[r2],8;; mov cr.pmv=r3
// mov cr116=r3 // cr.pmv mov cr.cmcv=r5;;
ld8 r3=[r2],8;;
// mov cr117=r3 // cr.lrr0 ld8 r3=[r2],8*23
ld8 r3=[r2],8;; ld8 r5=[r4],8*23;;
// mov cr118=r3 // cr.lrr1 adds r2=8*23,r2
ld8 r3=[r2],8*10;; adds r4=8*23,r4;;
// mov cr119=r3 // cr.cmcv // mov cr.lrr0=r3
// mov cr.lrr1=r5
adds r2=8*2,r2;;
restore_ARs: restore_ARs:
add r4=8,r2 // duplicate r2 in r4 add r4=8,r2 // duplicate r2 in r4
...@@ -589,67 +589,67 @@ restore_ARs: ...@@ -589,67 +589,67 @@ restore_ARs:
ld8 r3=[r2],3*8 ld8 r3=[r2],3*8
ld8 r5=[r4],3*8 ld8 r5=[r4],3*8
ld8 r7=[r6],3*8;; ld8 r7=[r6],3*8;;
mov ar0=r3 // ar.kro mov ar.k0=r3
mov ar1=r5 // ar.kr1 mov ar.k1=r5
mov ar2=r7;; // ar.kr2 mov ar.k2=r7;;
ld8 r3=[r2],3*8 ld8 r3=[r2],3*8
ld8 r5=[r4],3*8 ld8 r5=[r4],3*8
ld8 r7=[r6],3*8;; ld8 r7=[r6],3*8;;
mov ar3=r3 // ar.kr3 mov ar.k3=r3
mov ar4=r5 // ar.kr4 mov ar.k4=r5
mov ar5=r7;; // ar.kr5 mov ar.k5=r7;;
ld8 r3=[r2],10*8 ld8 r3=[r2],10*8
ld8 r5=[r4],10*8 ld8 r5=[r4],10*8
ld8 r7=[r6],10*8;; ld8 r7=[r6],10*8;;
mov ar6=r3 // ar.kr6 mov ar.k6=r3
mov ar7=r5 // ar.kr7 mov ar.k7=r5
// mov ar8=r6 // ar.kr8
;; ;;
ld8 r3=[r2],3*8 ld8 r3=[r2],3*8
ld8 r5=[r4],3*8 ld8 r5=[r4],3*8
ld8 r7=[r6],3*8;; ld8 r7=[r6],3*8;;
// mov ar16=r3 // ar.rsc // mov ar.rsc=r3
// mov ar17=r5 // ar.bsp is read only // mov ar.bsp=r5 // ar.bsp is read only
mov ar16=r0 // make sure that RSE is in enforced lazy mode mov ar.rsc=r0 // make sure that RSE is in enforced lazy mode
;; ;;
mov ar18=r7;; // ar.bspstore mov ar.bspstore=r7;;
ld8 r9=[r2],8*13;; ld8 r9=[r2],8*13;;
mov ar19=r9 // ar.rnat mov ar.rnat=r9
mov ar16=r3 // ar.rsc mov ar.rsc=r3
ld8 r3=[r2],8*4;; ld8 r3=[r2],8*4;;
mov ar32=r3 // ar.ccv mov ar.ccv=r3
ld8 r3=[r2],8*4;; ld8 r3=[r2],8*4;;
mov ar36=r3 // ar.unat mov ar.unat=r3
ld8 r3=[r2],8*4;; ld8 r3=[r2],8*4;;
mov ar40=r3 // ar.fpsr mov ar.fpsr=r3
ld8 r3=[r2],160;; // 160 ld8 r3=[r2],160;; // 160
// mov ar44=r3 // ar.itc // mov ar.itc=r3
ld8 r3=[r2],8;; ld8 r3=[r2],8;;
mov ar64=r3 // ar.pfs mov ar.pfs=r3
ld8 r3=[r2],8;; ld8 r3=[r2],8;;
mov ar65=r3 // ar.lc mov ar.lc=r3
ld8 r3=[r2];; ld8 r3=[r2];;
mov ar66=r3 // ar.ec mov ar.ec=r3
add r2=8*62,r2;; // padding add r2=8*62,r2;; // padding
restore_RRs: restore_RRs:
mov r5=ar.lc mov r5=ar.lc
mov ar.lc=0x08-1 mov ar.lc=0x08-1
movl r4=0x00 movl r4=0x00;;
cStRRr: cStRRr:
dep.z r7=r4,61,3
ld8 r3=[r2],8;; ld8 r3=[r2],8;;
// mov rr[r4]=r3 // what are its access previledges? mov rr[r7]=r3 // what are its access previledges?
add r4=1,r4 add r4=1,r4
br.cloop.sptk.few cStRRr br.cloop.sptk.few cStRRr
;; ;;
......
...@@ -101,7 +101,7 @@ int ...@@ -101,7 +101,7 @@ int
filter_rsvd_memory (unsigned long start, unsigned long end, void *arg) filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
{ {
unsigned long range_start, range_end, prev_start; unsigned long range_start, range_end, prev_start;
void (*func)(unsigned long, unsigned long); void (*func)(unsigned long, unsigned long, int);
int i; int i;
#if IGNORE_PFN0 #if IGNORE_PFN0
...@@ -122,11 +122,7 @@ filter_rsvd_memory (unsigned long start, unsigned long end, void *arg) ...@@ -122,11 +122,7 @@ filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
range_end = min(end, rsvd_region[i].start); range_end = min(end, rsvd_region[i].start);
if (range_start < range_end) if (range_start < range_end)
#ifdef CONFIG_DISCONTIGMEM call_pernode_memory(__pa(range_start), range_end - range_start, func);
call_pernode_memory(__pa(range_start), __pa(range_end), func);
#else
(*func)(__pa(range_start), range_end - range_start);
#endif
/* nothing more available in this segment */ /* nothing more available in this segment */
if (range_end == end) return 0; if (range_end == end) return 0;
...@@ -239,7 +235,6 @@ setup_arch (char **cmdline_p) ...@@ -239,7 +235,6 @@ setup_arch (char **cmdline_p)
strlcpy(saved_command_line, *cmdline_p, sizeof(saved_command_line)); strlcpy(saved_command_line, *cmdline_p, sizeof(saved_command_line));
efi_init(); efi_init();
find_memory();
#ifdef CONFIG_ACPI_BOOT #ifdef CONFIG_ACPI_BOOT
/* Initialize the ACPI boot-time table parser */ /* Initialize the ACPI boot-time table parser */
...@@ -253,6 +248,8 @@ setup_arch (char **cmdline_p) ...@@ -253,6 +248,8 @@ setup_arch (char **cmdline_p)
# endif # endif
#endif /* CONFIG_APCI_BOOT */ #endif /* CONFIG_APCI_BOOT */
find_memory();
/* process SAL system table: */ /* process SAL system table: */
ia64_sal_init(efi.sal_systab); ia64_sal_init(efi.sal_systab);
...@@ -544,28 +541,7 @@ cpu_init (void) ...@@ -544,28 +541,7 @@ cpu_init (void)
struct cpuinfo_ia64 *cpu_info; struct cpuinfo_ia64 *cpu_info;
void *cpu_data; void *cpu_data;
#ifdef CONFIG_SMP cpu_data = per_cpu_init();
int cpu;
/*
* get_free_pages() cannot be used before cpu_init() done. BSP allocates
* "NR_CPUS" pages for all CPUs to avoid that AP calls get_zeroed_page().
*/
if (smp_processor_id() == 0) {
cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS, PERCPU_PAGE_SIZE,
__pa(MAX_DMA_ADDRESS));
for (cpu = 0; cpu < NR_CPUS; cpu++) {
memcpy(cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start);
__per_cpu_offset[cpu] = (char *) cpu_data - __per_cpu_start;
cpu_data += PERCPU_PAGE_SIZE;
per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
}
}
cpu_data = __per_cpu_start + __per_cpu_offset[smp_processor_id()];
#else /* !CONFIG_SMP */
cpu_data = __phys_per_cpu_start;
#endif /* !CONFIG_SMP */
get_max_cacheline_size(); get_max_cacheline_size();
...@@ -576,9 +552,6 @@ cpu_init (void) ...@@ -576,9 +552,6 @@ cpu_init (void)
* accessing cpu_data() through the canonical per-CPU address. * accessing cpu_data() through the canonical per-CPU address.
*/ */
cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(cpu_info) - __per_cpu_start); cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(cpu_info) - __per_cpu_start);
#ifdef CONFIG_NUMA
cpu_info->node_data = get_node_data_ptr();
#endif
identify_cpu(cpu_info); identify_cpu(cpu_info);
#ifdef CONFIG_MCKINLEY #ifdef CONFIG_MCKINLEY
......
...@@ -25,6 +25,10 @@ ...@@ -25,6 +25,10 @@
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/sections.h> #include <asm/sections.h>
#ifdef CONFIG_VIRTUAL_MEM_MAP
static unsigned long num_dma_physpages;
#endif
/** /**
* show_mem - display a memory statistics summary * show_mem - display a memory statistics summary
* *
...@@ -161,3 +165,133 @@ find_memory (void) ...@@ -161,3 +165,133 @@ find_memory (void)
find_initrd(); find_initrd();
} }
#ifdef CONFIG_SMP
/**
* per_cpu_init - setup per-cpu variables
*
* Allocate and setup per-cpu data areas.
*/
void *
per_cpu_init (void)
{
void *cpu_data;
int cpu;
/*
* get_free_pages() cannot be used before cpu_init() done. BSP
* allocates "NR_CPUS" pages for all CPUs to avoid that AP calls
* get_zeroed_page().
*/
if (smp_processor_id() == 0) {
cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS,
PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
for (cpu = 0; cpu < NR_CPUS; cpu++) {
memcpy(cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start);
__per_cpu_offset[cpu] = (char *) cpu_data - __per_cpu_start;
cpu_data += PERCPU_PAGE_SIZE;
per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
}
}
return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
}
#endif /* CONFIG_SMP */
static int
count_pages (u64 start, u64 end, void *arg)
{
unsigned long *count = arg;
*count += (end - start) >> PAGE_SHIFT;
return 0;
}
#ifdef CONFIG_VIRTUAL_MEM_MAP
static int
count_dma_pages (u64 start, u64 end, void *arg)
{
unsigned long *count = arg;
if (end <= MAX_DMA_ADDRESS)
*count += (end - start) >> PAGE_SHIFT;
return 0;
}
#endif
/*
* Set up the page tables.
*/
void
paging_init (void)
{
unsigned long max_dma;
unsigned long zones_size[MAX_NR_ZONES];
#ifdef CONFIG_VIRTUAL_MEM_MAP
unsigned long zholes_size[MAX_NR_ZONES];
unsigned long max_gap;
#endif
/* initialize mem_map[] */
memset(zones_size, 0, sizeof(zones_size));
num_physpages = 0;
efi_memmap_walk(count_pages, &num_physpages);
max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
#ifdef CONFIG_VIRTUAL_MEM_MAP
memset(zholes_size, 0, sizeof(zholes_size));
num_dma_physpages = 0;
efi_memmap_walk(count_dma_pages, &num_dma_physpages);
if (max_low_pfn < max_dma) {
zones_size[ZONE_DMA] = max_low_pfn;
zholes_size[ZONE_DMA] = max_low_pfn - num_dma_physpages;
} else {
zones_size[ZONE_DMA] = max_dma;
zholes_size[ZONE_DMA] = max_dma - num_dma_physpages;
if (num_physpages > num_dma_physpages) {
zones_size[ZONE_NORMAL] = max_low_pfn - max_dma;
zholes_size[ZONE_NORMAL] =
((max_low_pfn - max_dma) -
(num_physpages - num_dma_physpages));
}
}
max_gap = 0;
efi_memmap_walk(find_largest_hole, (u64 *)&max_gap);
if (max_gap < LARGE_GAP) {
vmem_map = (struct page *) 0;
free_area_init_node(0, &contig_page_data, NULL, zones_size, 0,
zholes_size);
mem_map = contig_page_data.node_mem_map;
} else {
unsigned long map_size;
/* allocate virtual_mem_map */
map_size = PAGE_ALIGN(max_low_pfn * sizeof(struct page));
vmalloc_end -= map_size;
vmem_map = (struct page *) vmalloc_end;
efi_memmap_walk(create_mem_map_page_table, 0);
free_area_init_node(0, &contig_page_data, vmem_map, zones_size,
0, zholes_size);
mem_map = contig_page_data.node_mem_map;
printk("Virtual mem_map starts at 0x%p\n", mem_map);
}
#else /* !CONFIG_VIRTUAL_MEM_MAP */
if (max_low_pfn < max_dma)
zones_size[ZONE_DMA] = max_low_pfn;
else {
zones_size[ZONE_DMA] = max_dma;
zones_size[ZONE_NORMAL] = max_low_pfn - max_dma;
}
free_area_init(zones_size);
#endif /* !CONFIG_VIRTUAL_MEM_MAP */
zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
}
...@@ -17,72 +17,57 @@ ...@@ -17,72 +17,57 @@
#include <linux/acpi.h> #include <linux/acpi.h>
#include <linux/efi.h> #include <linux/efi.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
#include <asm/tlb.h>
#include <asm/meminit.h> #include <asm/meminit.h>
#include <asm/numa.h>
#include <asm/sections.h>
/*
* Round an address upward to the next multiple of GRANULE size.
*/
#define GRANULEROUNDUP(n) (((n)+IA64_GRANULE_SIZE-1) & ~(IA64_GRANULE_SIZE-1))
static struct ia64_node_data *node_data[MAX_NUMNODES];
static long boot_pg_data[8*MAX_NUMNODES+sizeof(pg_data_t)] __initdata;
static pg_data_t *pg_data_ptr[MAX_NUMNODES] __initdata;
static bootmem_data_t bdata[MAX_NUMNODES][NR_BANKS_PER_NODE+1] __initdata;
/* /*
* Return the compact node number of this cpu. Used prior to * Track per-node information needed to setup the boot memory allocator, the
* setting up the cpu_data area. * per-node areas, and the real VM.
* Note - not fast, intended for boot use only!!
*/ */
int struct early_node_data {
boot_get_local_nodeid(void) struct ia64_node_data *node_data;
{ pg_data_t *pgdat;
int i; unsigned long pernode_addr;
unsigned long pernode_size;
for (i = 0; i < NR_CPUS; i++) struct bootmem_data bootmem_data;
if (node_cpuid[i].phys_id == hard_smp_processor_id()) unsigned long num_physpages;
return node_cpuid[i].nid; unsigned long num_dma_physpages;
unsigned long min_pfn;
/* node info missing, so nid should be 0.. */ unsigned long max_pfn;
return 0; };
}
/*
* Return a pointer to the pg_data structure for a node.
* This function is used ONLY in early boot before the cpu_data
* structure is available.
*/
pg_data_t* __init
boot_get_pg_data_ptr(long node)
{
return pg_data_ptr[node];
}
static struct early_node_data mem_data[NR_NODES] __initdata;
/* /*
* Return a pointer to the node data for the current node. * To prevent cache aliasing effects, align per-node structures so that they
* (boottime initialization only) * start at addresses that are strided by node number.
*/ */
struct ia64_node_data * #define NODEDATA_ALIGN(addr, node) \
get_node_data_ptr(void) ((((addr) + 1024*1024-1) & ~(1024*1024-1)) + (node)*PERCPU_PAGE_SIZE)
{
return node_data[boot_get_local_nodeid()]; /**
} * build_node_maps - callback to setup bootmem structs for each node
* @start: physical start of range
/* * @len: length of range
* We allocate one of the bootmem_data_t structs for each piece of memory * @node: node where this range resides
* that we wish to treat as a contiguous block. Each such block must start *
* on a BANKSIZE boundary. Multiple banks per node is not supported. * We allocate a struct bootmem_data for each piece of memory that we wish to
* treat as a virtually contiguous block (i.e. each node). Each such block
* must start on an %IA64_GRANULE_SIZE boundary, so we round the address down
* if necessary. Any non-existent pages will simply be part of the virtual
* memmap. We also update min_low_pfn and max_low_pfn here as we receive
* memory ranges from the caller.
*/ */
static int __init static int __init build_node_maps(unsigned long start, unsigned long len,
build_maps(unsigned long pstart, unsigned long length, int node) int node)
{ {
bootmem_data_t *bdp; unsigned long cstart, epfn, end = start + len;
unsigned long cstart, epfn; struct bootmem_data *bdp = &mem_data[node].bootmem_data;
bdp = pg_data_ptr[node]->bdata; epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT;
epfn = GRANULEROUNDUP(pstart + length) >> PAGE_SHIFT; cstart = GRANULEROUNDDOWN(start);
cstart = pstart & ~(BANKSIZE - 1);
if (!bdp->node_low_pfn) { if (!bdp->node_low_pfn) {
bdp->node_boot_start = cstart; bdp->node_boot_start = cstart;
...@@ -98,34 +83,143 @@ build_maps(unsigned long pstart, unsigned long length, int node) ...@@ -98,34 +83,143 @@ build_maps(unsigned long pstart, unsigned long length, int node)
return 0; return 0;
} }
/* /**
* Find space on each node for the bootmem map. * early_nr_cpus_node - return number of cpus on a given node
* @node: node to check
*
* Count the number of cpus on @node. We can't use nr_cpus_node() yet because
* acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been
* called yet.
*/
static int early_nr_cpus_node(int node)
{
int cpu, n = 0;
for (cpu = 0; cpu < NR_CPUS; cpu++)
if (node == node_cpuid[cpu].nid)
n++;
return n;
}
/**
* find_pernode_space - allocate memory for memory map and per-node structures
* @start: physical start of range
* @len: length of range
* @node: node where this range resides
*
* This routine reserves space for the per-cpu data struct, the list of
* pg_data_ts and the per-node data struct. Each node will have something like
* the following in the first chunk of addr. space large enough to hold it.
*
* ________________________
* | |
* |~~~~~~~~~~~~~~~~~~~~~~~~| <-- NODEDATA_ALIGN(start, node) for the first
* | PERCPU_PAGE_SIZE * | start and length big enough
* | NR_CPUS |
* |------------------------|
* | local pg_data_t * |
* |------------------------|
* | local ia64_node_data |
* |------------------------|
* | ??? |
* |________________________|
* *
* Called by efi_memmap_walk to find boot memory on each node. Note that * Once this space has been set aside, the bootmem maps are initialized. We
* only blocks that are free are passed to this routine (currently filtered by * could probably move the allocation of the per-cpu and ia64_node_data space
* free_available_memory). * outside of this function and use alloc_bootmem_node(), but doing it here
* is straightforward and we get the alignments we want so...
*/ */
static int __init static int __init find_pernode_space(unsigned long start, unsigned long len,
find_bootmap_space(unsigned long pstart, unsigned long length, int node) int node)
{ {
unsigned long mapsize, pages, epfn; unsigned long epfn, cpu, cpus;
bootmem_data_t *bdp; unsigned long pernodesize = 0, pernode;
void *cpu_data;
struct bootmem_data *bdp = &mem_data[node].bootmem_data;
epfn = (pstart + length) >> PAGE_SHIFT; epfn = (start + len) >> PAGE_SHIFT;
bdp = &pg_data_ptr[node]->bdata[0];
if (pstart < bdp->node_boot_start || epfn > bdp->node_low_pfn) /*
* Make sure this memory falls within this node's usable memory
* since we may have thrown some away in build_maps().
*/
if (start < bdp->node_boot_start ||
epfn > bdp->node_low_pfn)
return 0; return 0;
if (!bdp->node_bootmem_map) { /* Don't setup this node's local space twice... */
pages = bdp->node_low_pfn - (bdp->node_boot_start>>PAGE_SHIFT); if (!mem_data[node].pernode_addr) {
/*
* Calculate total size needed, incl. what's necessary
* for good alignment and alias prevention.
*/
cpus = early_nr_cpus_node(node);
pernodesize += PERCPU_PAGE_SIZE * cpus;
pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
pernodesize = PAGE_ALIGN(pernodesize);
pernode = NODEDATA_ALIGN(start, node);
/* Is this range big enough for what we want to store here? */
if (start + len > (pernode + pernodesize)) {
mem_data[node].pernode_addr = pernode;
mem_data[node].pernode_size = pernodesize;
memset(__va(pernode), 0, pernodesize);
cpu_data = (void *)pernode;
pernode += PERCPU_PAGE_SIZE * cpus;
mem_data[node].pgdat = __va(pernode);
pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
mem_data[node].node_data = __va(pernode);
pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
mem_data[node].pgdat->bdata = bdp;
pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
/*
* Copy the static per-cpu data into the region we
* just set aside and then setup __per_cpu_offset
* for each CPU on this node.
*/
for (cpu = 0; cpu < NR_CPUS; cpu++) {
if (node == node_cpuid[cpu].nid) {
memcpy(cpu_data, __phys_per_cpu_start,
__per_cpu_end-__per_cpu_start);
__per_cpu_offset[cpu] =
(char*)__va(cpu_data) -
__per_cpu_start;
cpu_data += PERCPU_PAGE_SIZE;
}
}
}
}
pernode = mem_data[node].pernode_addr;
pernodesize = mem_data[node].pernode_size;
if (pernode && !bdp->node_bootmem_map) {
unsigned long pages, mapsize, map = 0;
pages = bdp->node_low_pfn -
(bdp->node_boot_start >> PAGE_SHIFT);
mapsize = bootmem_bootmap_pages(pages) << PAGE_SHIFT; mapsize = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
if (length > mapsize) {
init_bootmem_node( /*
BOOT_NODE_DATA(node), * The map will either contain the pernode area or begin
pstart>>PAGE_SHIFT, * after it.
bdp->node_boot_start>>PAGE_SHIFT, */
bdp->node_low_pfn); if (pernode - start > mapsize)
map = start;
else if (start + len - pernode - pernodesize > mapsize)
map = pernode + pernodesize;
if (map) {
init_bootmem_node(mem_data[node].pgdat,
map>>PAGE_SHIFT,
bdp->node_boot_start>>PAGE_SHIFT,
bdp->node_low_pfn);
} }
} }
...@@ -133,85 +227,93 @@ find_bootmap_space(unsigned long pstart, unsigned long length, int node) ...@@ -133,85 +227,93 @@ find_bootmap_space(unsigned long pstart, unsigned long length, int node)
return 0; return 0;
} }
/**
/* * free_node_bootmem - free bootmem allocator memory for use
* Free available memory to the bootmem allocator. * @start: physical start of range
* * @len: length of range
* Note that only blocks that are free are passed to this routine (currently * @node: node where this range resides
* filtered by free_available_memory).
* *
* Simply calls the bootmem allocator to free the specified ranged from
* the given pg_data_t's bdata struct. After this function has been called
* for all the entries in the EFI memory map, the bootmem allocator will
* be ready to service allocation requests.
*/ */
static int __init static int __init free_node_bootmem(unsigned long start, unsigned long len,
discontig_free_bootmem_node(unsigned long pstart, unsigned long length, int node) int node)
{ {
free_bootmem_node(BOOT_NODE_DATA(node), pstart, length); free_bootmem_node(mem_data[node].pgdat, start, len);
return 0; return 0;
} }
/**
/* * reserve_pernode_space - reserve memory for per-node space
* Reserve the space used by the bootmem maps. *
* Reserve the space used by the bootmem maps & per-node space in the boot
* allocator so that when we actually create the real mem maps we don't
* use their memory.
*/ */
static void __init static void __init reserve_pernode_space(void)
discontig_reserve_bootmem(void)
{ {
int node; unsigned long base, size, pages;
unsigned long mapbase, mapsize, pages; struct bootmem_data *bdp;
bootmem_data_t *bdp; int node;
for (node = 0; node < numnodes; node++) { for (node = 0; node < numnodes; node++) {
bdp = BOOT_NODE_DATA(node)->bdata; pg_data_t *pdp = mem_data[node].pgdat;
bdp = pdp->bdata;
/* First the bootmem_map itself */
pages = bdp->node_low_pfn - (bdp->node_boot_start>>PAGE_SHIFT); pages = bdp->node_low_pfn - (bdp->node_boot_start>>PAGE_SHIFT);
mapsize = bootmem_bootmap_pages(pages) << PAGE_SHIFT; size = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
mapbase = __pa(bdp->node_bootmem_map); base = __pa(bdp->node_bootmem_map);
reserve_bootmem_node(BOOT_NODE_DATA(node), mapbase, mapsize); reserve_bootmem_node(pdp, base, size);
/* Now the per-node space */
size = mem_data[node].pernode_size;
base = __pa(mem_data[node].pernode_addr);
reserve_bootmem_node(pdp, base, size);
} }
} }
/* /**
* Allocate per node tables. * initialize_pernode_data - fixup per-cpu & per-node pointers
* - the pg_data structure is allocated on each node. This minimizes offnode
* memory references
* - the node data is allocated & initialized. Portions of this structure is read-only (after
* boot) and contains node-local pointers to usefuls data structures located on
* other nodes.
* *
* We also switch to using the "real" pg_data structures at this point. Earlier in boot, we * Each node's per-node area has a copy of the global pg_data_t list, so
* use a different structure. The only use for pg_data prior to the point in boot is to get * we copy that to each node here, as well as setting the per-cpu pointer
* the pointer to the bdata for the node. * to the local node data structure. The active_cpus field of the per-node
* structure gets setup by the platform_cpu_init() function later.
*/ */
static void __init static void __init initialize_pernode_data(void)
allocate_pernode_structures(void)
{ {
pg_data_t *pgdat=0, *new_pgdat_list=0; int cpu, node;
int node, mynode; pg_data_t *pgdat_list[NR_NODES];
mynode = boot_get_local_nodeid(); for (node = 0; node < numnodes; node++)
for (node = numnodes - 1; node >= 0 ; node--) { pgdat_list[node] = mem_data[node].pgdat;
node_data[node] = alloc_bootmem_node(BOOT_NODE_DATA(node), sizeof (struct ia64_node_data));
pgdat = __alloc_bootmem_node(BOOT_NODE_DATA(node), sizeof(pg_data_t), SMP_CACHE_BYTES, 0);
pgdat->bdata = &(bdata[node][0]);
pg_data_ptr[node] = pgdat;
pgdat->pgdat_next = new_pgdat_list;
new_pgdat_list = pgdat;
}
memcpy(node_data[mynode]->pg_data_ptrs, pg_data_ptr, sizeof(pg_data_ptr)); /* Copy the pg_data_t list to each node and init the node field */
memcpy(node_data[mynode]->node_data_ptrs, node_data, sizeof(node_data)); for (node = 0; node < numnodes; node++) {
memcpy(mem_data[node].node_data->pg_data_ptrs, pgdat_list,
sizeof(pgdat_list));
}
pgdat_list = new_pgdat_list; /* Set the node_data pointer for each per-cpu struct */
for (cpu = 0; cpu < NR_CPUS; cpu++) {
node = node_cpuid[cpu].nid;
per_cpu(cpu_info, cpu).node_data = mem_data[node].node_data;
}
} }
/* /**
* Called early in boot to setup the boot memory allocator, and to * find_memory - walk the EFI memory map and setup the bootmem allocator
* allocate the node-local pg_data & node-directory data structures.. *
* Called early in boot to setup the bootmem allocator, and to
* allocate the per-cpu and per-node structures.
*/ */
void __init find_memory(void) void __init find_memory(void)
{ {
int node;
reserve_memory(); reserve_memory();
if (numnodes == 0) { if (numnodes == 0) {
...@@ -219,94 +321,48 @@ void __init find_memory(void) ...@@ -219,94 +321,48 @@ void __init find_memory(void)
numnodes = 1; numnodes = 1;
} }
for (node = 0; node < numnodes; node++) {
pg_data_ptr[node] = (pg_data_t*) &boot_pg_data[node];
pg_data_ptr[node]->bdata = &bdata[node][0];
}
min_low_pfn = -1; min_low_pfn = -1;
max_low_pfn = 0; max_low_pfn = 0;
efi_memmap_walk(filter_rsvd_memory, build_maps); /* These actually end up getting called by call_pernode_memory() */
efi_memmap_walk(filter_rsvd_memory, find_bootmap_space); efi_memmap_walk(filter_rsvd_memory, build_node_maps);
efi_memmap_walk(filter_rsvd_memory, discontig_free_bootmem_node); efi_memmap_walk(filter_rsvd_memory, find_pernode_space);
discontig_reserve_bootmem(); efi_memmap_walk(filter_rsvd_memory, free_node_bootmem);
allocate_pernode_structures();
reserve_pernode_space();
initialize_pernode_data();
max_pfn = max_low_pfn;
find_initrd(); find_initrd();
} }
/* /**
* Initialize the paging system. * per_cpu_init - setup per-cpu variables
* - determine sizes of each node *
* - initialize the paging system for the node * find_pernode_space() does most of this already, we just need to set
* - build the nodedir for the node. This contains pointers to * local_per_cpu_offset
* the per-bank mem_map entries.
* - fix the page struct "virtual" pointers. These are bank specific
* values that the paging system doesn't understand.
* - replicate the nodedir structure to other nodes
*/ */
void *per_cpu_init(void)
void __init
discontig_paging_init(void)
{ {
int node, mynode; int cpu;
unsigned long max_dma, zones_size[MAX_NR_ZONES];
unsigned long kaddr, ekaddr, bid;
struct page *page;
bootmem_data_t *bdp;
max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
mynode = boot_get_local_nodeid();
for (node = 0; node < numnodes; node++) {
long pfn, startpfn;
memset(zones_size, 0, sizeof(zones_size));
startpfn = -1; if (smp_processor_id() == 0) {
bdp = BOOT_NODE_DATA(node)->bdata; for (cpu = 0; cpu < NR_CPUS; cpu++) {
pfn = bdp->node_boot_start >> PAGE_SHIFT; per_cpu(local_per_cpu_offset, cpu) =
if (startpfn == -1) __per_cpu_offset[cpu];
startpfn = pfn;
if (pfn > max_dma)
zones_size[ZONE_NORMAL] += (bdp->node_low_pfn - pfn);
else if (bdp->node_low_pfn < max_dma)
zones_size[ZONE_DMA] += (bdp->node_low_pfn - pfn);
else {
zones_size[ZONE_DMA] += (max_dma - pfn);
zones_size[ZONE_NORMAL] += (bdp->node_low_pfn - max_dma);
}
free_area_init_node(node, NODE_DATA(node), NULL, zones_size, startpfn, 0);
page = NODE_DATA(node)->node_mem_map;
bdp = BOOT_NODE_DATA(node)->bdata;
kaddr = (unsigned long)__va(bdp->node_boot_start);
ekaddr = (unsigned long)__va(bdp->node_low_pfn << PAGE_SHIFT);
while (kaddr < ekaddr) {
if (paddr_to_nid(__pa(kaddr)) == node) {
bid = BANK_MEM_MAP_INDEX(kaddr);
node_data[mynode]->node_id_map[bid] = node;
node_data[mynode]->bank_mem_map_base[bid] = page;
}
kaddr += BANKSIZE;
page += BANKSIZE/PAGE_SIZE;
} }
} }
/* return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
* Finish setting up the node data for this node, then copy it to the other nodes.
*/
for (node=0; node < numnodes; node++)
if (mynode != node) {
memcpy(node_data[node], node_data[mynode], sizeof(struct ia64_node_data));
node_data[node]->node = node;
}
} }
/**
* show_mem - give short summary of memory stats
*
* Shows a simple page count of reserved and used pages in the system.
* For discontig machines, it does this on a per-pgdat basis.
*/
void show_mem(void) void show_mem(void)
{ {
int i, reserved = 0; int i, reserved = 0;
...@@ -335,7 +391,12 @@ void show_mem(void) ...@@ -335,7 +391,12 @@ void show_mem(void)
printk("%d free buffer pages\n", nr_free_buffer_pages()); printk("%d free buffer pages\n", nr_free_buffer_pages());
} }
/* /**
* call_pernode_memory - use SRAT to call callback functions with node info
* @start: physical start of range
* @len: length of range
* @arg: function to call for each range
*
* efi_memmap_walk() knows nothing about layout of memory across nodes. Find * efi_memmap_walk() knows nothing about layout of memory across nodes. Find
* out to which node a block of memory belongs. Ignore memory that we cannot * out to which node a block of memory belongs. Ignore memory that we cannot
* identify, and split blocks that run across multiple nodes. * identify, and split blocks that run across multiple nodes.
...@@ -343,10 +404,10 @@ void show_mem(void) ...@@ -343,10 +404,10 @@ void show_mem(void)
* Take this opportunity to round the start address up and the end address * Take this opportunity to round the start address up and the end address
* down to page boundaries. * down to page boundaries.
*/ */
void call_pernode_memory(unsigned long start, unsigned long end, void *arg) void call_pernode_memory(unsigned long start, unsigned long len, void *arg)
{ {
unsigned long rs, re; unsigned long rs, re, end = start + len;
void (*func)(unsigned long, unsigned long, int, int); void (*func)(unsigned long, unsigned long, int);
int i; int i;
start = PAGE_ALIGN(start); start = PAGE_ALIGN(start);
...@@ -357,21 +418,127 @@ void call_pernode_memory(unsigned long start, unsigned long end, void *arg) ...@@ -357,21 +418,127 @@ void call_pernode_memory(unsigned long start, unsigned long end, void *arg)
func = arg; func = arg;
if (!num_memblks) { if (!num_memblks) {
/* /* No SRAT table, to assume one node (node 0) */
* This machine doesn't have SRAT, so call func with
* nid=0, bank=0.
*/
if (start < end) if (start < end)
(*func)(start, end - start, 0, 0); (*func)(start, len, 0);
return; return;
} }
for (i = 0; i < num_memblks; i++) { for (i = 0; i < num_memblks; i++) {
rs = max(start, node_memblk[i].start_paddr); rs = max(start, node_memblk[i].start_paddr);
re = min(end, node_memblk[i].start_paddr+node_memblk[i].size); re = min(end, node_memblk[i].start_paddr +
node_memblk[i].size);
if (rs < re) if (rs < re)
(*func)(rs, re-rs, node_memblk[i].nid, (*func)(rs, re - rs, node_memblk[i].nid);
node_memblk[i].bank);
if (re == end)
break;
} }
} }
/**
* count_node_pages - callback to build per-node memory info structures
* @start: physical start of range
* @len: length of range
* @node: node where this range resides
*
* Each node has it's own number of physical pages, DMAable pages, start, and
* end page frame number. This routine will be called by call_pernode_memory()
* for each piece of usable memory and will setup these values for each node.
* Very similar to build_maps().
*/
static int count_node_pages(unsigned long start, unsigned long len, int node)
{
unsigned long end = start + len;
mem_data[node].num_physpages += len >> PAGE_SHIFT;
if (start <= __pa(MAX_DMA_ADDRESS))
mem_data[node].num_dma_physpages +=
(min(end, __pa(MAX_DMA_ADDRESS)) - start) >>PAGE_SHIFT;
start = GRANULEROUNDDOWN(start);
start = ORDERROUNDDOWN(start);
end = GRANULEROUNDUP(end);
mem_data[node].max_pfn = max(mem_data[node].max_pfn,
end >> PAGE_SHIFT);
mem_data[node].min_pfn = min(mem_data[node].min_pfn,
start >> PAGE_SHIFT);
return 0;
}
/**
* paging_init - setup page tables
*
* paging_init() sets up the page tables for each node of the system and frees
* the bootmem allocator memory for general use.
*/
void paging_init(void)
{
unsigned long max_dma;
unsigned long zones_size[MAX_NR_ZONES];
unsigned long zholes_size[MAX_NR_ZONES];
unsigned long max_gap, pfn_offset = 0;
int node;
max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
max_gap = 0;
efi_memmap_walk(find_largest_hole, &max_gap);
/* so min() will work in count_node_pages */
for (node = 0; node < numnodes; node++)
mem_data[node].min_pfn = ~0UL;
efi_memmap_walk(filter_rsvd_memory, count_node_pages);
for (node = 0; node < numnodes; node++) {
memset(zones_size, 0, sizeof(zones_size));
memset(zholes_size, 0, sizeof(zholes_size));
num_physpages += mem_data[node].num_physpages;
if (mem_data[node].min_pfn >= max_dma) {
/* All of this node's memory is above ZONE_DMA */
zones_size[ZONE_NORMAL] = mem_data[node].max_pfn -
mem_data[node].min_pfn;
zholes_size[ZONE_NORMAL] = mem_data[node].max_pfn -
mem_data[node].min_pfn -
mem_data[node].num_physpages;
} else if (mem_data[node].max_pfn < max_dma) {
/* All of this node's memory is in ZONE_DMA */
zones_size[ZONE_DMA] = mem_data[node].max_pfn -
mem_data[node].min_pfn;
zholes_size[ZONE_DMA] = mem_data[node].max_pfn -
mem_data[node].min_pfn -
mem_data[node].num_dma_physpages;
} else {
/* This node has memory in both zones */
zones_size[ZONE_DMA] = max_dma -
mem_data[node].min_pfn;
zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] -
mem_data[node].num_dma_physpages;
zones_size[ZONE_NORMAL] = mem_data[node].max_pfn -
max_dma;
zholes_size[ZONE_NORMAL] = zones_size[ZONE_NORMAL] -
(mem_data[node].num_physpages -
mem_data[node].num_dma_physpages);
}
if (node == 0) {
vmalloc_end -=
PAGE_ALIGN(max_low_pfn * sizeof(struct page));
vmem_map = (struct page *) vmalloc_end;
efi_memmap_walk(create_mem_map_page_table, 0);
printk("Virtual mem_map starts at 0x%p\n", vmem_map);
}
pfn_offset = mem_data[node].min_pfn;
free_area_init_node(node, NODE_DATA(node),
vmem_map + pfn_offset, zones_size,
pfn_offset, zholes_size);
}
zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
}
...@@ -20,13 +20,46 @@ ...@@ -20,13 +20,46 @@
#define TASK_HPAGE_BASE (REGION_HPAGE << REGION_SHIFT) #define TASK_HPAGE_BASE (REGION_HPAGE << REGION_SHIFT)
static long htlbpagemem; static long htlbpagemem;
int htlbpage_max; int htlbpage_max;
static long htlbzone_pages; static long htlbzone_pages;
static LIST_HEAD(htlbpage_freelist); static struct list_head hugepage_freelists[MAX_NUMNODES];
static spinlock_t htlbpage_lock = SPIN_LOCK_UNLOCKED; static spinlock_t htlbpage_lock = SPIN_LOCK_UNLOCKED;
static void enqueue_huge_page(struct page *page)
{
list_add(&page->list,
&hugepage_freelists[page_zone(page)->zone_pgdat->node_id]);
}
static struct page *dequeue_huge_page(void)
{
int nid = numa_node_id();
struct page *page = NULL;
if (list_empty(&hugepage_freelists[nid])) {
for (nid = 0; nid < MAX_NUMNODES; ++nid)
if (!list_empty(&hugepage_freelists[nid]))
break;
}
if (nid >= 0 && nid < MAX_NUMNODES &&
!list_empty(&hugepage_freelists[nid])) {
page = list_entry(hugepage_freelists[nid].next, struct page, list);
list_del(&page->list);
}
return page;
}
static struct page *alloc_fresh_huge_page(void)
{
static int nid = 0;
struct page *page;
page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER);
nid = (nid + 1) % numnodes;
return page;
}
void free_huge_page(struct page *page); void free_huge_page(struct page *page);
static struct page *alloc_hugetlb_page(void) static struct page *alloc_hugetlb_page(void)
...@@ -35,13 +68,11 @@ static struct page *alloc_hugetlb_page(void) ...@@ -35,13 +68,11 @@ static struct page *alloc_hugetlb_page(void)
struct page *page; struct page *page;
spin_lock(&htlbpage_lock); spin_lock(&htlbpage_lock);
if (list_empty(&htlbpage_freelist)) { page = dequeue_huge_page();
if (!page) {
spin_unlock(&htlbpage_lock); spin_unlock(&htlbpage_lock);
return NULL; return NULL;
} }
page = list_entry(htlbpage_freelist.next, struct page, list);
list_del(&page->list);
htlbpagemem--; htlbpagemem--;
spin_unlock(&htlbpage_lock); spin_unlock(&htlbpage_lock);
set_page_count(page, 1); set_page_count(page, 1);
...@@ -228,7 +259,7 @@ void free_huge_page(struct page *page) ...@@ -228,7 +259,7 @@ void free_huge_page(struct page *page)
INIT_LIST_HEAD(&page->list); INIT_LIST_HEAD(&page->list);
spin_lock(&htlbpage_lock); spin_lock(&htlbpage_lock);
list_add(&page->list, &htlbpage_freelist); enqueue_huge_page(page);
htlbpagemem++; htlbpagemem++;
spin_unlock(&htlbpage_lock); spin_unlock(&htlbpage_lock);
} }
...@@ -371,7 +402,7 @@ int try_to_free_low(int count) ...@@ -371,7 +402,7 @@ int try_to_free_low(int count)
map = NULL; map = NULL;
spin_lock(&htlbpage_lock); spin_lock(&htlbpage_lock);
list_for_each(p, &htlbpage_freelist) { list_for_each(p, &hugepage_freelists[0]) {
if (map) { if (map) {
list_del(&map->list); list_del(&map->list);
update_and_free_page(map); update_and_free_page(map);
...@@ -408,11 +439,11 @@ int set_hugetlb_mem_size(int count) ...@@ -408,11 +439,11 @@ int set_hugetlb_mem_size(int count)
return (int)htlbzone_pages; return (int)htlbzone_pages;
if (lcount > 0) { /* Increase the mem size. */ if (lcount > 0) { /* Increase the mem size. */
while (lcount--) { while (lcount--) {
page = alloc_pages(__GFP_HIGHMEM, HUGETLB_PAGE_ORDER); page = alloc_fresh_huge_page();
if (page == NULL) if (page == NULL)
break; break;
spin_lock(&htlbpage_lock); spin_lock(&htlbpage_lock);
list_add(&page->list, &htlbpage_freelist); enqueue_huge_page(page);
htlbpagemem++; htlbpagemem++;
htlbzone_pages++; htlbzone_pages++;
spin_unlock(&htlbpage_lock); spin_unlock(&htlbpage_lock);
...@@ -449,17 +480,18 @@ __setup("hugepages=", hugetlb_setup); ...@@ -449,17 +480,18 @@ __setup("hugepages=", hugetlb_setup);
static int __init hugetlb_init(void) static int __init hugetlb_init(void)
{ {
int i, j; int i;
struct page *page; struct page *page;
for (i = 0; i < MAX_NUMNODES; ++i)
INIT_LIST_HEAD(&hugepage_freelists[i]);
for (i = 0; i < htlbpage_max; ++i) { for (i = 0; i < htlbpage_max; ++i) {
page = alloc_pages(__GFP_HIGHMEM, HUGETLB_PAGE_ORDER); page = alloc_fresh_huge_page();
if (!page) if (!page)
break; break;
for (j = 0; j < HPAGE_SIZE/PAGE_SIZE; ++j)
SetPageReserved(&page[j]);
spin_lock(&htlbpage_lock); spin_lock(&htlbpage_lock);
list_add(&page->list, &htlbpage_freelist); enqueue_huge_page(page);
spin_unlock(&htlbpage_lock); spin_unlock(&htlbpage_lock);
} }
htlbpage_max = htlbpagemem = htlbzone_pages = i; htlbpage_max = htlbpagemem = htlbzone_pages = i;
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include <asm/ia32.h> #include <asm/ia32.h>
#include <asm/io.h> #include <asm/io.h>
#include <asm/machvec.h> #include <asm/machvec.h>
#include <asm/numa.h>
#include <asm/patch.h> #include <asm/patch.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
#include <asm/sal.h> #include <asm/sal.h>
...@@ -40,10 +41,8 @@ extern void ia64_tlb_init (void); ...@@ -40,10 +41,8 @@ extern void ia64_tlb_init (void);
unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL; unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL;
#ifdef CONFIG_VIRTUAL_MEM_MAP #ifdef CONFIG_VIRTUAL_MEM_MAP
# define LARGE_GAP 0x40000000 /* Use virtual mem map if hole is > than this */
unsigned long vmalloc_end = VMALLOC_END_INIT; unsigned long vmalloc_end = VMALLOC_END_INIT;
static struct page *vmem_map; struct page *vmem_map;
static unsigned long num_dma_physpages;
#endif #endif
static int pgt_cache_water[2] = { 25, 50 }; static int pgt_cache_water[2] = { 25, 50 };
...@@ -337,11 +336,12 @@ ia64_mmu_init (void *my_cpu_data) ...@@ -337,11 +336,12 @@ ia64_mmu_init (void *my_cpu_data)
#ifdef CONFIG_VIRTUAL_MEM_MAP #ifdef CONFIG_VIRTUAL_MEM_MAP
static int int
create_mem_map_page_table (u64 start, u64 end, void *arg) create_mem_map_page_table (u64 start, u64 end, void *arg)
{ {
unsigned long address, start_page, end_page; unsigned long address, start_page, end_page;
struct page *map_start, *map_end; struct page *map_start, *map_end;
int node;
pgd_t *pgd; pgd_t *pgd;
pmd_t *pmd; pmd_t *pmd;
pte_t *pte; pte_t *pte;
...@@ -351,19 +351,20 @@ create_mem_map_page_table (u64 start, u64 end, void *arg) ...@@ -351,19 +351,20 @@ create_mem_map_page_table (u64 start, u64 end, void *arg)
start_page = (unsigned long) map_start & PAGE_MASK; start_page = (unsigned long) map_start & PAGE_MASK;
end_page = PAGE_ALIGN((unsigned long) map_end); end_page = PAGE_ALIGN((unsigned long) map_end);
node = paddr_to_nid(__pa(start));
for (address = start_page; address < end_page; address += PAGE_SIZE) { for (address = start_page; address < end_page; address += PAGE_SIZE) {
pgd = pgd_offset_k(address); pgd = pgd_offset_k(address);
if (pgd_none(*pgd)) if (pgd_none(*pgd))
pgd_populate(&init_mm, pgd, alloc_bootmem_pages(PAGE_SIZE)); pgd_populate(&init_mm, pgd, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE));
pmd = pmd_offset(pgd, address); pmd = pmd_offset(pgd, address);
if (pmd_none(*pmd)) if (pmd_none(*pmd))
pmd_populate_kernel(&init_mm, pmd, alloc_bootmem_pages(PAGE_SIZE)); pmd_populate_kernel(&init_mm, pmd, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE));
pte = pte_offset_kernel(pmd, address); pte = pte_offset_kernel(pmd, address);
if (pte_none(*pte)) if (pte_none(*pte))
set_pte(pte, pfn_pte(__pa(alloc_bootmem_pages(PAGE_SIZE)) >> PAGE_SHIFT, set_pte(pte, pfn_pte(__pa(alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE)) >> PAGE_SHIFT,
PAGE_KERNEL)); PAGE_KERNEL));
} }
return 0; return 0;
...@@ -433,17 +434,7 @@ ia64_pfn_valid (unsigned long pfn) ...@@ -433,17 +434,7 @@ ia64_pfn_valid (unsigned long pfn)
return __get_user(byte, (char *) pfn_to_page(pfn)) == 0; return __get_user(byte, (char *) pfn_to_page(pfn)) == 0;
} }
static int int
count_dma_pages (u64 start, u64 end, void *arg)
{
unsigned long *count = arg;
if (end <= MAX_DMA_ADDRESS)
*count += (end - start) >> PAGE_SHIFT;
return 0;
}
static int
find_largest_hole (u64 start, u64 end, void *arg) find_largest_hole (u64 start, u64 end, void *arg)
{ {
u64 *max_gap = arg; u64 *max_gap = arg;
...@@ -459,103 +450,6 @@ find_largest_hole (u64 start, u64 end, void *arg) ...@@ -459,103 +450,6 @@ find_largest_hole (u64 start, u64 end, void *arg)
} }
#endif /* CONFIG_VIRTUAL_MEM_MAP */ #endif /* CONFIG_VIRTUAL_MEM_MAP */
static int
count_pages (u64 start, u64 end, void *arg)
{
unsigned long *count = arg;
*count += (end - start) >> PAGE_SHIFT;
return 0;
}
/*
* Set up the page tables.
*/
#ifdef CONFIG_DISCONTIGMEM
void
paging_init (void)
{
extern void discontig_paging_init(void);
discontig_paging_init();
efi_memmap_walk(count_pages, &num_physpages);
zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
}
#else /* !CONFIG_DISCONTIGMEM */
void
paging_init (void)
{
unsigned long max_dma;
unsigned long zones_size[MAX_NR_ZONES];
# ifdef CONFIG_VIRTUAL_MEM_MAP
unsigned long zholes_size[MAX_NR_ZONES];
unsigned long max_gap;
# endif
/* initialize mem_map[] */
memset(zones_size, 0, sizeof(zones_size));
num_physpages = 0;
efi_memmap_walk(count_pages, &num_physpages);
max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
# ifdef CONFIG_VIRTUAL_MEM_MAP
memset(zholes_size, 0, sizeof(zholes_size));
num_dma_physpages = 0;
efi_memmap_walk(count_dma_pages, &num_dma_physpages);
if (max_low_pfn < max_dma) {
zones_size[ZONE_DMA] = max_low_pfn;
zholes_size[ZONE_DMA] = max_low_pfn - num_dma_physpages;
} else {
zones_size[ZONE_DMA] = max_dma;
zholes_size[ZONE_DMA] = max_dma - num_dma_physpages;
if (num_physpages > num_dma_physpages) {
zones_size[ZONE_NORMAL] = max_low_pfn - max_dma;
zholes_size[ZONE_NORMAL] = ((max_low_pfn - max_dma)
- (num_physpages - num_dma_physpages));
}
}
max_gap = 0;
efi_memmap_walk(find_largest_hole, (u64 *)&max_gap);
if (max_gap < LARGE_GAP) {
vmem_map = (struct page *) 0;
free_area_init_node(0, &contig_page_data, NULL, zones_size, 0, zholes_size);
mem_map = contig_page_data.node_mem_map;
}
else {
unsigned long map_size;
/* allocate virtual_mem_map */
map_size = PAGE_ALIGN(max_low_pfn * sizeof(struct page));
vmalloc_end -= map_size;
vmem_map = (struct page *) vmalloc_end;
efi_memmap_walk(create_mem_map_page_table, 0);
free_area_init_node(0, &contig_page_data, vmem_map, zones_size, 0, zholes_size);
mem_map = contig_page_data.node_mem_map;
printk("Virtual mem_map starts at 0x%p\n", mem_map);
}
# else /* !CONFIG_VIRTUAL_MEM_MAP */
if (max_low_pfn < max_dma)
zones_size[ZONE_DMA] = max_low_pfn;
else {
zones_size[ZONE_DMA] = max_dma;
zones_size[ZONE_NORMAL] = max_low_pfn - max_dma;
}
free_area_init(zones_size);
# endif /* !CONFIG_VIRTUAL_MEM_MAP */
zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
}
#endif /* !CONFIG_DISCONTIGMEM */
static int static int
count_reserved_pages (u64 start, u64 end, void *arg) count_reserved_pages (u64 start, u64 end, void *arg)
{ {
......
...@@ -867,6 +867,9 @@ sn_pci_init (void) ...@@ -867,6 +867,9 @@ sn_pci_init (void)
int i = 0; int i = 0;
struct pci_controller *controller; struct pci_controller *controller;
if (!ia64_platform_is("sn2"))
return 0;
/* /*
* set pci_raw_ops, etc. * set pci_raw_ops, etc.
*/ */
......
...@@ -285,7 +285,6 @@ static cpuid_t intr_cpu_choose_node(void) ...@@ -285,7 +285,6 @@ static cpuid_t intr_cpu_choose_node(void)
cpuid_t intr_heuristic(vertex_hdl_t dev, int req_bit, int *resp_bit) cpuid_t intr_heuristic(vertex_hdl_t dev, int req_bit, int *resp_bit)
{ {
cpuid_t cpuid; cpuid_t cpuid;
cpuid_t candidate = CPU_NONE;
vertex_hdl_t pconn_vhdl; vertex_hdl_t pconn_vhdl;
pcibr_soft_t pcibr_soft; pcibr_soft_t pcibr_soft;
int bit; int bit;
...@@ -293,30 +292,32 @@ cpuid_t intr_heuristic(vertex_hdl_t dev, int req_bit, int *resp_bit) ...@@ -293,30 +292,32 @@ cpuid_t intr_heuristic(vertex_hdl_t dev, int req_bit, int *resp_bit)
/* XXX: gross layering violation.. */ /* XXX: gross layering violation.. */
if (hwgraph_edge_get(dev, EDGE_LBL_PCI, &pconn_vhdl) == GRAPH_SUCCESS) { if (hwgraph_edge_get(dev, EDGE_LBL_PCI, &pconn_vhdl) == GRAPH_SUCCESS) {
pcibr_soft = pcibr_soft_get(pconn_vhdl); pcibr_soft = pcibr_soft_get(pconn_vhdl);
if (pcibr_soft && pcibr_soft->bsi_err_intr) if (pcibr_soft && pcibr_soft->bsi_err_intr) {
candidate = ((hub_intr_t)pcibr_soft->bsi_err_intr)->i_cpuid; /*
} * The cpu was chosen already when we assigned
* the error interrupt.
if (candidate != CPU_NONE) { */
/* cpuid = ((hub_intr_t)pcibr_soft->bsi_err_intr)->i_cpuid;
* The cpu was chosen already when we assigned goto done;
* the error interrupt.
*/
bit = intr_reserve_level(candidate, req_bit);
if (bit >= 0) {
*resp_bit = bit;
return candidate;
} }
printk("Cannot target interrupt to target node (%ld).\n",candidate);
return CPU_NONE;
} }
/* /*
* Need to choose one. Try the controlling c-brick first. * Need to choose one. Try the controlling c-brick first.
*/ */
cpuid = intr_cpu_choose_from_node(master_node_get(dev)); cpuid = intr_cpu_choose_from_node(master_node_get(dev));
if (cpuid != CPU_NONE) if (cpuid == CPU_NONE)
return cpuid; cpuid = intr_cpu_choose_node();
return intr_cpu_choose_node();
done:
if (cpuid != CPU_NONE) {
bit = intr_reserve_level(cpuid, req_bit);
if (bit >= 0) {
*resp_bit = bit;
return cpuid;
}
}
printk("Cannot target interrupt to target cpu (%ld).\n", cpuid);
return CPU_NONE;
} }
...@@ -147,7 +147,6 @@ char drive_info[4*16]; ...@@ -147,7 +147,6 @@ char drive_info[4*16];
* Sets up an initial console to aid debugging. Intended primarily * Sets up an initial console to aid debugging. Intended primarily
* for bringup. See start_kernel() in init/main.c. * for bringup. See start_kernel() in init/main.c.
*/ */
#if defined(CONFIG_IA64_EARLY_PRINTK_SGI_SN) || defined(CONFIG_IA64_SGI_SN_SIM)
void __init void __init
early_sn_setup(void) early_sn_setup(void)
...@@ -189,7 +188,6 @@ early_sn_setup(void) ...@@ -189,7 +188,6 @@ early_sn_setup(void)
printk(KERN_DEBUG "early_sn_setup: setting master_node_bedrock_address to 0x%lx\n", master_node_bedrock_address); printk(KERN_DEBUG "early_sn_setup: setting master_node_bedrock_address to 0x%lx\n", master_node_bedrock_address);
} }
} }
#endif /* CONFIG_IA64_EARLY_PRINTK_SGI_SN */
#ifdef CONFIG_IA64_MCA #ifdef CONFIG_IA64_MCA
extern int platform_intr_list[]; extern int platform_intr_list[];
......
...@@ -7,6 +7,8 @@ ...@@ -7,6 +7,8 @@
* for more details. * for more details.
*/ */
#include <linux/config.h>
/* /*
* Entries defined so far: * Entries defined so far:
* - boot param structure itself * - boot param structure itself
...@@ -32,10 +34,27 @@ extern void reserve_memory (void); ...@@ -32,10 +34,27 @@ extern void reserve_memory (void);
extern void find_initrd (void); extern void find_initrd (void);
extern int filter_rsvd_memory (unsigned long start, unsigned long end, void *arg); extern int filter_rsvd_memory (unsigned long start, unsigned long end, void *arg);
/*
* For rounding an address to the next IA64_GRANULE_SIZE or order
*/
#define GRANULEROUNDDOWN(n) ((n) & ~(IA64_GRANULE_SIZE-1))
#define GRANULEROUNDUP(n) (((n)+IA64_GRANULE_SIZE-1) & ~(IA64_GRANULE_SIZE-1))
#define ORDERROUNDDOWN(n) ((n) & ~((PAGE_SIZE<<MAX_ORDER)-1))
#ifdef CONFIG_DISCONTIGMEM #ifdef CONFIG_DISCONTIGMEM
extern void call_pernode_memory (unsigned long start, unsigned long end, void *arg); extern void call_pernode_memory (unsigned long start, unsigned long len, void *func);
#else
# define call_pernode_memory(start, len, func) (*func)(start, len, 0)
#endif #endif
#define IGNORE_PFN0 1 /* XXX fix me: ignore pfn 0 until TLB miss handler is updated... */ #define IGNORE_PFN0 1 /* XXX fix me: ignore pfn 0 until TLB miss handler is updated... */
#ifdef CONFIG_VIRTUAL_MEM_MAP
# define LARGE_GAP 0x40000000 /* Use virtual mem map if hole is > than this */
extern unsigned long vmalloc_end;
extern struct page *vmem_map;
extern int find_largest_hole (u64 start, u64 end, void *arg);
extern int create_mem_map_page_table (u64 start, u64 end, void *arg);
#endif
#endif /* meminit_h */ #endif /* meminit_h */
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
* License. See the file "COPYING" in the main directory of this archive * License. See the file "COPYING" in the main directory of this archive
* for more details. * for more details.
* *
* Copyright (c) 2000 Silicon Graphics, Inc. All rights reserved. * Copyright (c) 2000,2003 Silicon Graphics, Inc. All rights reserved.
* Copyright (c) 2002 NEC Corp. * Copyright (c) 2002 NEC Corp.
* Copyright (c) 2002 Erich Focht <efocht@ess.nec.de> * Copyright (c) 2002 Erich Focht <efocht@ess.nec.de>
* Copyright (c) 2002 Kimio Suganuma <k-suganuma@da.jp.nec.com> * Copyright (c) 2002 Kimio Suganuma <k-suganuma@da.jp.nec.com>
...@@ -12,148 +12,26 @@ ...@@ -12,148 +12,26 @@
#define _ASM_IA64_MMZONE_H #define _ASM_IA64_MMZONE_H
#include <linux/config.h> #include <linux/config.h>
#include <linux/init.h> #include <asm/page.h>
#include <asm/meminit.h>
/*
* Given a kaddr, find the base mem_map address for the start of the mem_map #ifdef CONFIG_DISCONTIGMEM
* entries for the bank containing the kaddr.
*/ #ifdef CONFIG_IA64_DIG /* DIG systems are small */
#define BANK_MEM_MAP_BASE(kaddr) local_node_data->bank_mem_map_base[BANK_MEM_MAP_INDEX(kaddr)] # define MAX_PHYSNODE_ID 8
# define NR_NODES 8
/* # define NR_MEMBLKS (NR_NODES * 32)
* Given a kaddr, this macro return the relative map number #else /* sn2 is the biggest case, so we use that if !DIG */
* within the bank. # define MAX_PHYSNODE_ID 2048
*/ # define NR_NODES 256
#define BANK_MAP_NR(kaddr) (BANK_OFFSET(kaddr) >> PAGE_SHIFT) # define NR_MEMBLKS (NR_NODES)
/*
* Given a pte, this macro returns a pointer to the page struct for the pte.
*/
#define pte_page(pte) virt_to_page(PAGE_OFFSET | (pte_val(pte)&_PFN_MASK))
/*
* Determine if a kaddr is a valid memory address of memory that
* actually exists.
*
* The check consists of 2 parts:
* - verify that the address is a region 7 address & does not
* contain any bits that preclude it from being a valid platform
* memory address
* - verify that the chunk actually exists.
*
* Note that IO addresses are NOT considered valid addresses.
*
* Note, many platforms can simply check if kaddr exceeds a specific size.
* (However, this won't work on SGI platforms since IO space is embedded
* within the range of valid memory addresses & nodes have holes in the
* address range between banks).
*/
#define kern_addr_valid(kaddr) ({long _kav=(long)(kaddr); \
VALID_MEM_KADDR(_kav);})
/*
* Given a kaddr, return a pointer to the page struct for the page.
* If the kaddr does not represent RAM memory that potentially exists, return
* a pointer the page struct for max_mapnr. IO addresses will
* return the page for max_nr. Addresses in unpopulated RAM banks may
* return undefined results OR may panic the system.
*
*/
#define virt_to_page(kaddr) ({long _kvtp=(long)(kaddr); \
(VALID_MEM_KADDR(_kvtp)) \
? BANK_MEM_MAP_BASE(_kvtp) + BANK_MAP_NR(_kvtp) \
: NULL;})
/*
* Given a page struct entry, return the physical address that the page struct represents.
* Since IA64 has all memory in the DMA zone, the following works:
*/
#define page_to_phys(page) __pa(page_address(page))
#define node_mem_map(nid) (NODE_DATA(nid)->node_mem_map)
#define node_localnr(pfn, nid) ((pfn) - NODE_DATA(nid)->node_start_pfn)
#define pfn_to_page(pfn) (struct page *)(node_mem_map(pfn_to_nid(pfn)) + node_localnr(pfn, pfn_to_nid(pfn)))
#define pfn_to_nid(pfn) local_node_data->node_id_map[(pfn << PAGE_SHIFT) >> BANKSHIFT]
#define page_to_pfn(page) (long)((page - page_zone(page)->zone_mem_map) + page_zone(page)->zone_start_pfn)
/*
* pfn_valid should be made as fast as possible, and the current definition
* is valid for machines that are NUMA, but still contiguous, which is what
* is currently supported. A more generalised, but slower definition would
* be something like this - mbligh:
* ( pfn_to_pgdat(pfn) && (pfn < node_end_pfn(pfn_to_nid(pfn))) )
*/
#define pfn_valid(pfn) (pfn < max_low_pfn)
extern unsigned long max_low_pfn;
#if defined(CONFIG_IA64_DIG)
/*
* Platform definitions for DIG platform with contiguous memory.
*/
#define MAX_PHYSNODE_ID 8 /* Maximum node number +1 */
#define MAX_PHYS_MEMORY (1UL << 40) /* 1 TB */
/*
* Bank definitions.
* Configurable settings for DIG: 512MB/bank: 16GB/node,
* 2048MB/bank: 64GB/node,
* 8192MB/bank: 256GB/node.
*/
#define NR_BANKS_PER_NODE 32
#if defined(CONFIG_IA64_NODESIZE_16GB)
# define BANKSHIFT 29
#elif defined(CONFIG_IA64_NODESIZE_64GB)
# define BANKSHIFT 31
#elif defined(CONFIG_IA64_NODESIZE_256GB)
# define BANKSHIFT 33
#else
# error Unsupported bank and nodesize!
#endif #endif
#define BANKSIZE (1UL << BANKSHIFT)
#elif defined(CONFIG_IA64_SGI_SN2) extern unsigned long max_low_pfn;
/*
* SGI SN2 discontig definitions
*/
#define MAX_PHYSNODE_ID 2048 /* 2048 node ids (also called nasid) */
#define MAX_PHYS_MEMORY (1UL << 49)
#define NR_BANKS_PER_NODE 4
#define BANKSHIFT 38
#define SN2_NODE_SIZE (64UL*1024*1024*1024) /* 64GB per node */
#define BANKSIZE (SN2_NODE_SIZE/NR_BANKS_PER_NODE)
#endif /* CONFIG_IA64_DIG */
#if defined(CONFIG_IA64_DIG) || defined (CONFIG_IA64_SGI_SN2)
/* Common defines for both platforms */
#include <asm/numnodes.h>
#define BANK_OFFSET(addr) ((unsigned long)(addr) & (BANKSIZE-1))
#define NR_BANKS (NR_BANKS_PER_NODE * (1 << NODES_SHIFT))
#define NR_MEMBLKS (NR_BANKS)
/*
* VALID_MEM_KADDR returns a boolean to indicate if a kaddr is
* potentially a valid cacheable identity mapped RAM memory address.
* Note that the RAM may or may not actually be present!!
*/
#define VALID_MEM_KADDR(kaddr) 1
/*
* Given a nodeid & a bank number, find the address of the mem_map
* entry for the first page of the bank.
*/
#define BANK_MEM_MAP_INDEX(kaddr) \
(((unsigned long)(kaddr) & (MAX_PHYS_MEMORY-1)) >> BANKSHIFT)
#endif /* CONFIG_IA64_DIG || CONFIG_IA64_SGI_SN2 */ #define pfn_valid(pfn) (((pfn) < max_low_pfn) && ia64_pfn_valid(pfn))
#define page_to_pfn(page) ((unsigned long) (page - vmem_map))
#define pfn_to_page(pfn) (vmem_map + (pfn))
#endif /* CONFIG_DISCONTIGMEM */
#endif /* _ASM_IA64_MMZONE_H */ #endif /* _ASM_IA64_MMZONE_H */
...@@ -11,9 +11,14 @@ ...@@ -11,9 +11,14 @@
#ifndef _ASM_IA64_NODEDATA_H #ifndef _ASM_IA64_NODEDATA_H
#define _ASM_IA64_NODEDATA_H #define _ASM_IA64_NODEDATA_H
#include <linux/config.h>
#include <linux/numa.h> #include <linux/numa.h>
#include <asm/percpu.h>
#include <asm/mmzone.h> #include <asm/mmzone.h>
#ifdef CONFIG_DISCONTIGMEM
/* /*
* Node Data. One of these structures is located on each node of a NUMA system. * Node Data. One of these structures is located on each node of a NUMA system.
*/ */
...@@ -22,10 +27,7 @@ struct pglist_data; ...@@ -22,10 +27,7 @@ struct pglist_data;
struct ia64_node_data { struct ia64_node_data {
short active_cpu_count; short active_cpu_count;
short node; short node;
struct pglist_data *pg_data_ptrs[MAX_NUMNODES]; struct pglist_data *pg_data_ptrs[NR_NODES];
struct page *bank_mem_map_base[NR_BANKS];
struct ia64_node_data *node_data_ptrs[MAX_NUMNODES];
short node_id_map[NR_BANKS];
}; };
...@@ -34,41 +36,17 @@ struct ia64_node_data { ...@@ -34,41 +36,17 @@ struct ia64_node_data {
*/ */
#define local_node_data (local_cpu_data->node_data) #define local_node_data (local_cpu_data->node_data)
/*
* Return a pointer to the node_data structure for the specified node.
*/
#define node_data(node) (local_node_data->node_data_ptrs[node])
/*
* Get a pointer to the node_id/node_data for the current cpu.
* (boot time only)
*/
extern int boot_get_local_nodeid(void);
extern struct ia64_node_data *get_node_data_ptr(void);
/* /*
* Given a node id, return a pointer to the pg_data_t for the node. * Given a node id, return a pointer to the pg_data_t for the node.
* The following 2 macros are similar.
* *
* NODE_DATA - should be used in all code not related to system * NODE_DATA - should be used in all code not related to system
* initialization. It uses pernode data structures to minimize * initialization. It uses pernode data structures to minimize
* offnode memory references. However, these structure are not * offnode memory references. However, these structure are not
* present during boot. This macro can be used once cpu_init * present during boot. This macro can be used once cpu_init
* completes. * completes.
*
* BOOT_NODE_DATA
* - should be used during system initialization
* prior to freeing __initdata. It does not depend on the percpu
* area being present.
*
* NOTE: The names of these macros are misleading but are difficult to change
* since they are used in generic linux & on other architecures.
*/ */
#define NODE_DATA(nid) (local_node_data->pg_data_ptrs[nid]) #define NODE_DATA(nid) (local_node_data->pg_data_ptrs[nid])
#define BOOT_NODE_DATA(nid) boot_get_pg_data_ptr((long)(nid))
struct pglist_data; #endif /* CONFIG_DISCONTIGMEM */
extern struct pglist_data * __init boot_get_pg_data_ptr(long);
#endif /* _ASM_IA64_NODEDATA_H */ #endif /* _ASM_IA64_NODEDATA_H */
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
* for more details. * for more details.
* *
* This file contains NUMA specific prototypes and definitions. * This file contains NUMA specific prototypes and definitions.
* *
* 2002/08/05 Erich Focht <efocht@ess.nec.de> * 2002/08/05 Erich Focht <efocht@ess.nec.de>
* *
*/ */
...@@ -12,12 +12,17 @@ ...@@ -12,12 +12,17 @@
#define _ASM_IA64_NUMA_H #define _ASM_IA64_NUMA_H
#include <linux/config.h> #include <linux/config.h>
#include <linux/cpumask.h>
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
#include <linux/numa.h>
#include <linux/cache.h> #include <linux/cache.h>
#include <linux/cache.h>
#include <linux/cpumask.h>
#include <linux/numa.h>
#include <linux/smp.h>
#include <linux/threads.h>
#include <asm/mmzone.h>
extern volatile char cpu_to_node_map[NR_CPUS] __cacheline_aligned; extern volatile char cpu_to_node_map[NR_CPUS] __cacheline_aligned;
extern volatile cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; extern volatile cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
...@@ -61,6 +66,10 @@ extern int paddr_to_nid(unsigned long paddr); ...@@ -61,6 +66,10 @@ extern int paddr_to_nid(unsigned long paddr);
#define local_nodeid (cpu_to_node_map[smp_processor_id()]) #define local_nodeid (cpu_to_node_map[smp_processor_id()])
#else /* !CONFIG_NUMA */
#define paddr_to_nid(addr) 0
#endif /* CONFIG_NUMA */ #endif /* CONFIG_NUMA */
#endif /* _ASM_IA64_NUMA_H */ #endif /* _ASM_IA64_NUMA_H */
...@@ -94,18 +94,20 @@ do { \ ...@@ -94,18 +94,20 @@ do { \
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
#ifdef CONFIG_VIRTUAL_MEM_MAP
extern int ia64_pfn_valid (unsigned long pfn);
#else
# define ia64_pfn_valid(pfn) 1
#endif
#ifndef CONFIG_DISCONTIGMEM #ifndef CONFIG_DISCONTIGMEM
# ifdef CONFIG_VIRTUAL_MEM_MAP #define pfn_valid(pfn) (((pfn) < max_mapnr) && ia64_pfn_valid(pfn))
extern int ia64_pfn_valid (unsigned long pfn);
# define pfn_valid(pfn) (((pfn) < max_mapnr) && ia64_pfn_valid(pfn))
# else
# define pfn_valid(pfn) ((pfn) < max_mapnr)
# endif
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
#define page_to_pfn(page) ((unsigned long) (page - mem_map)) #define page_to_pfn(page) ((unsigned long) (page - mem_map))
#define pfn_to_page(pfn) (mem_map + (pfn)) #define pfn_to_page(pfn) (mem_map + (pfn))
#endif /* CONFIG_DISCONTIGMEM */
#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) #define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
#endif #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
typedef union ia64_va { typedef union ia64_va {
struct { struct {
......
...@@ -46,11 +46,13 @@ DECLARE_PER_CPU(unsigned long, local_per_cpu_offset); ...@@ -46,11 +46,13 @@ DECLARE_PER_CPU(unsigned long, local_per_cpu_offset);
extern void percpu_modcopy(void *pcpudst, const void *src, unsigned long size); extern void percpu_modcopy(void *pcpudst, const void *src, unsigned long size);
extern void setup_per_cpu_areas (void); extern void setup_per_cpu_areas (void);
extern void *per_cpu_init(void);
#else /* ! SMP */ #else /* ! SMP */
#define per_cpu(var, cpu) ((void)cpu, per_cpu__##var) #define per_cpu(var, cpu) ((void)cpu, per_cpu__##var)
#define __get_cpu_var(var) per_cpu__##var #define __get_cpu_var(var) per_cpu__##var
#define per_cpu_init() (__phys_per_cpu_start)
#endif /* SMP */ #endif /* SMP */
......
...@@ -174,7 +174,6 @@ ia64_phys_addr_valid (unsigned long addr) ...@@ -174,7 +174,6 @@ ia64_phys_addr_valid (unsigned long addr)
return (addr & (local_cpu_data->unimpl_pa_mask)) == 0; return (addr & (local_cpu_data->unimpl_pa_mask)) == 0;
} }
#ifndef CONFIG_DISCONTIGMEM
/* /*
* kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
* memory. For the return value to be meaningful, ADDR must be >= * memory. For the return value to be meaningful, ADDR must be >=
...@@ -190,7 +189,6 @@ ia64_phys_addr_valid (unsigned long addr) ...@@ -190,7 +189,6 @@ ia64_phys_addr_valid (unsigned long addr)
*/ */
#define kern_addr_valid(addr) (1) #define kern_addr_valid(addr) (1)
#endif
/* /*
* Now come the defines and routines to manage and access the three-level * Now come the defines and routines to manage and access the three-level
...@@ -240,10 +238,8 @@ ia64_phys_addr_valid (unsigned long addr) ...@@ -240,10 +238,8 @@ ia64_phys_addr_valid (unsigned long addr)
#define pte_none(pte) (!pte_val(pte)) #define pte_none(pte) (!pte_val(pte))
#define pte_present(pte) (pte_val(pte) & (_PAGE_P | _PAGE_PROTNONE)) #define pte_present(pte) (pte_val(pte) & (_PAGE_P | _PAGE_PROTNONE))
#define pte_clear(pte) (pte_val(*(pte)) = 0UL) #define pte_clear(pte) (pte_val(*(pte)) = 0UL)
#ifndef CONFIG_DISCONTIGMEM
/* pte_page() returns the "struct page *" corresponding to the PTE: */ /* pte_page() returns the "struct page *" corresponding to the PTE: */
#define pte_page(pte) virt_to_page(((pte_val(pte) & _PFN_MASK) + PAGE_OFFSET)) #define pte_page(pte) virt_to_page(((pte_val(pte) & _PFN_MASK) + PAGE_OFFSET))
#endif
#define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_none(pmd) (!pmd_val(pmd))
#define pmd_bad(pmd) (!ia64_phys_addr_valid(pmd_val(pmd))) #define pmd_bad(pmd) (!ia64_phys_addr_valid(pmd_val(pmd)))
......
...@@ -128,7 +128,7 @@ typedef struct irqpda_s irqpda_t; ...@@ -128,7 +128,7 @@ typedef struct irqpda_s irqpda_t;
* Check if given a compact node id the corresponding node has all the * Check if given a compact node id the corresponding node has all the
* cpus disabled. * cpus disabled.
*/ */
#define is_headless_node(cnode) (!any_online_cpu(node_to_cpumask(cnode))) #define is_headless_node(cnode) (!node_to_cpu_mask[cnode])
/* /*
* Check if given a node vertex handle the corresponding node has all the * Check if given a node vertex handle the corresponding node has all the
......
...@@ -408,11 +408,7 @@ struct exception_table_entry { ...@@ -408,11 +408,7 @@ struct exception_table_entry {
extern void handle_exception (struct pt_regs *regs, const struct exception_table_entry *e); extern void handle_exception (struct pt_regs *regs, const struct exception_table_entry *e);
extern const struct exception_table_entry *search_exception_tables (unsigned long addr); extern const struct exception_table_entry *search_exception_tables (unsigned long addr);
#ifdef GAS_HAS_LOCAL_TAGS
# define SEARCH_EXCEPTION_TABLE(regs) search_exception_tables(regs->cr_iip + ia64_psr(regs)->ri) # define SEARCH_EXCEPTION_TABLE(regs) search_exception_tables(regs->cr_iip + ia64_psr(regs)->ri)
#else
# define SEARCH_EXCEPTION_TABLE(regs) search_exception_tables(regs->cr_iip)
#endif
static inline int static inline int
done_with_exception (struct pt_regs *regs) done_with_exception (struct pt_regs *regs)
......
...@@ -237,17 +237,17 @@ ...@@ -237,17 +237,17 @@
#define __NR_epoll_wait 1245 #define __NR_epoll_wait 1245
#define __NR_restart_syscall 1246 #define __NR_restart_syscall 1246
#define __NR_semtimedop 1247 #define __NR_semtimedop 1247
#define __NR_sys_timer_create 1248 #define __NR_timer_create 1248
#define __NR_sys_timer_settime 1249 #define __NR_timer_settime 1249
#define __NR_sys_timer_gettime 1250 #define __NR_timer_gettime 1250
#define __NR_sys_timer_getoverrun 1251 #define __NR_timer_getoverrun 1251
#define __NR_sys_timer_delete 1252 #define __NR_timer_delete 1252
#define __NR_sys_clock_settime 1253 #define __NR_clock_settime 1253
#define __NR_sys_clock_gettime 1254 #define __NR_clock_gettime 1254
#define __NR_sys_clock_getres 1255 #define __NR_clock_getres 1255
#define __NR_sys_clock_nanosleep 1256 #define __NR_clock_nanosleep 1256
#define __NR_sys_fstatfs64 1257 #define __NR_fstatfs64 1257
#define __NR_sys_statfs64 1258 #define __NR_statfs64 1258
#ifdef __KERNEL__ #ifdef __KERNEL__
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment