Commit a9532b64 authored by David Mosberger's avatar David Mosberger

Merge wailua.hpl.hp.com:/bk/vanilla/linux-2.5

into wailua.hpl.hp.com:/bk/lia64/to-linus-2.5
parents 7fb7f2ac 349714fb
...@@ -557,3 +557,8 @@ CONFIG_DEBUG_SPINLOCK ...@@ -557,3 +557,8 @@ CONFIG_DEBUG_SPINLOCK
best used in conjunction with the NMI watchdog so that spinlock best used in conjunction with the NMI watchdog so that spinlock
deadlocks are also debuggable. deadlocks are also debuggable.
CONFIG_IA64_GRANULE_16MB
IA64 identity-mapped regions use a large page size called "granules".
Select "16MB" for a small granule size.
Select "64MB" for a large granule size. This is the current default.
...@@ -83,7 +83,7 @@ fi ...@@ -83,7 +83,7 @@ fi
define_bool CONFIG_KCORE_ELF y # On IA-64, we always want an ELF /proc/kcore. define_bool CONFIG_KCORE_ELF y # On IA-64, we always want an ELF /proc/kcore.
bool 'SMP support' CONFIG_SMP bool 'SMP support' CONFIG_SMP
tristate 'Support running of Linux/x86 binaries' CONFIG_IA32_SUPPORT bool 'Support running of Linux/x86 binaries' CONFIG_IA32_SUPPORT
bool 'Performance monitor support' CONFIG_PERFMON bool 'Performance monitor support' CONFIG_PERFMON
tristate '/proc/pal support' CONFIG_IA64_PALINFO tristate '/proc/pal support' CONFIG_IA64_PALINFO
tristate '/proc/efi/vars support' CONFIG_EFI_VARS tristate '/proc/efi/vars support' CONFIG_EFI_VARS
...@@ -123,6 +123,7 @@ source drivers/block/Config.in ...@@ -123,6 +123,7 @@ source drivers/block/Config.in
source drivers/ieee1394/Config.in source drivers/ieee1394/Config.in
source drivers/message/i2o/Config.in source drivers/message/i2o/Config.in
source drivers/md/Config.in source drivers/md/Config.in
source drivers/message/fusion/Config.in
mainmenu_option next_comment mainmenu_option next_comment
comment 'ATA/IDE/MFM/RLL support' comment 'ATA/IDE/MFM/RLL support'
......
...@@ -214,6 +214,7 @@ struct sba_device { ...@@ -214,6 +214,7 @@ struct sba_device {
static struct sba_device *sba_list; static struct sba_device *sba_list;
static int sba_count; static int sba_count;
static int reserve_sba_gart = 1; static int reserve_sba_gart = 1;
static struct pci_dev sac_only_dev;
#define sba_sg_iova(sg) (sg->address) #define sba_sg_iova(sg) (sg->address)
#define sba_sg_len(sg) (sg->length) #define sba_sg_len(sg) (sg->length)
...@@ -950,7 +951,12 @@ sba_alloc_consistent(struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle) ...@@ -950,7 +951,12 @@ sba_alloc_consistent(struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle)
if (ret) { if (ret) {
memset(ret, 0, size); memset(ret, 0, size);
*dma_handle = sba_map_single(hwdev, ret, size, 0); /*
* REVISIT: if sba_map_single starts needing more
* than dma_mask from the device, this needs to be
* updated.
*/
*dma_handle = sba_map_single(&sac_only_dev, ret, size, 0);
} }
return ret; return ret;
...@@ -1807,1854 +1813,10 @@ void __init sba_init(void) ...@@ -1807,1854 +1813,10 @@ void __init sba_init(void)
sba_dev->sba_hpa = hpa; sba_dev->sba_hpa = hpa;
/* /*
* We need to check for an AGP device, if we find one, then only * We pass this fake device from alloc_consistent to ensure
* use part of the IOVA space for PCI DMA, the rest is for GART. * we only use SAC for alloc_consistent mappings.
* REVISIT for multiple IOC.
*/ */
pci_for_each_dev(device) sac_only_dev.dma_mask = 0xFFFFFFFFUL;
agp_found |= pci_find_capability(device, PCI_CAP_ID_AGP);
if (agp_found && reserve_sba_gart)
SBA_SET_AGP(sba_dev);
sba_hw_init(sba_dev);
sba_common_init(sba_dev);
#ifdef CONFIG_PROC_FS
{
struct proc_dir_entry * proc_mckinley_root;
proc_mckinley_root = proc_mkdir("bus/mckinley",0);
create_proc_info_entry(sba_rev, 0, proc_mckinley_root, sba_proc_info);
create_proc_info_entry("bitmap", 0, proc_mckinley_root, sba_resource_map);
}
#endif
}
static int __init
nosbagart (char *str)
{
reserve_sba_gart = 0;
return 1;
}
__setup("nosbagart",nosbagart);
EXPORT_SYMBOL(sba_init);
EXPORT_SYMBOL(sba_map_single);
EXPORT_SYMBOL(sba_unmap_single);
EXPORT_SYMBOL(sba_map_sg);
EXPORT_SYMBOL(sba_unmap_sg);
EXPORT_SYMBOL(sba_dma_address);
EXPORT_SYMBOL(sba_alloc_consistent);
EXPORT_SYMBOL(sba_free_consistent);
/*
** IA64 System Bus Adapter (SBA) I/O MMU manager
**
** (c) Copyright 2002 Alex Williamson
** (c) Copyright 2002 Hewlett-Packard Company
**
** Portions (c) 2000 Grant Grundler (from parisc I/O MMU code)
** Portions (c) 1999 Dave S. Miller (from sparc64 I/O MMU code)
**
** This program is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation; either version 2 of the License, or
** (at your option) any later version.
**
**
** This module initializes the IOC (I/O Controller) found on HP
** McKinley machines and their successors.
**
*/
#include <linux/config.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/pci.h>
#include <linux/proc_fs.h>
#include <asm/delay.h> /* ia64_get_itc() */
#include <asm/io.h>
#include <asm/page.h> /* PAGE_OFFSET */
#include <asm/efi.h>
#define DRIVER_NAME "SBA"
#ifndef CONFIG_IA64_HP_PROTO
#define ALLOW_IOV_BYPASS
#endif
#define ENABLE_MARK_CLEAN
/*
** The number of debug flags is a clue - this code is fragile.
*/
#undef DEBUG_SBA_INIT
#undef DEBUG_SBA_RUN
#undef DEBUG_SBA_RUN_SG
#undef DEBUG_SBA_RESOURCE
#undef ASSERT_PDIR_SANITY
#undef DEBUG_LARGE_SG_ENTRIES
#undef DEBUG_BYPASS
#define SBA_INLINE __inline__
/* #define SBA_INLINE */
#ifdef DEBUG_SBA_INIT
#define DBG_INIT(x...) printk(x)
#else
#define DBG_INIT(x...)
#endif
#ifdef DEBUG_SBA_RUN
#define DBG_RUN(x...) printk(x)
#else
#define DBG_RUN(x...)
#endif
#ifdef DEBUG_SBA_RUN_SG
#define DBG_RUN_SG(x...) printk(x)
#else
#define DBG_RUN_SG(x...)
#endif
#ifdef DEBUG_SBA_RESOURCE
#define DBG_RES(x...) printk(x)
#else
#define DBG_RES(x...)
#endif
#ifdef DEBUG_BYPASS
#define DBG_BYPASS(x...) printk(x)
#else
#define DBG_BYPASS(x...)
#endif
#ifdef ASSERT_PDIR_SANITY
#define ASSERT(expr) \
if(!(expr)) { \
printk( "\n" __FILE__ ":%d: Assertion " #expr " failed!\n",__LINE__); \
panic(#expr); \
}
#else
#define ASSERT(expr)
#endif
#define KB(x) ((x) * 1024)
#define MB(x) (KB (KB (x)))
#define GB(x) (MB (KB (x)))
/*
** The number of pdir entries to "free" before issueing
** a read to PCOM register to flush out PCOM writes.
** Interacts with allocation granularity (ie 4 or 8 entries
** allocated and free'd/purged at a time might make this
** less interesting).
*/
#define DELAYED_RESOURCE_CNT 16
#define DEFAULT_DMA_HINT_REG 0
#define ZX1_FUNC_ID_VALUE ((PCI_DEVICE_ID_HP_ZX1_SBA << 16) | PCI_VENDOR_ID_HP)
#define ZX1_MC_ID ((PCI_DEVICE_ID_HP_ZX1_MC << 16) | PCI_VENDOR_ID_HP)
#define SBA_FUNC_ID 0x0000 /* function id */
#define SBA_FCLASS 0x0008 /* function class, bist, header, rev... */
#define SBA_FUNC_SIZE 0x10000 /* SBA configuration function reg set */
unsigned int __initdata zx1_func_offsets[] = {0x1000, 0x4000, 0x8000,
0x9000, 0xa000, -1};
#define SBA_IOC_OFFSET 0x1000
#define MAX_IOC 1 /* we only have 1 for now*/
#define IOC_IBASE 0x300 /* IO TLB */
#define IOC_IMASK 0x308
#define IOC_PCOM 0x310
#define IOC_TCNFG 0x318
#define IOC_PDIR_BASE 0x320
#define IOC_IOVA_SPACE_BASE 0x40000000 /* IOVA ranges start at 1GB */
/*
** IOC supports 4/8/16/64KB page sizes (see TCNFG register)
** It's safer (avoid memory corruption) to keep DMA page mappings
** equivalently sized to VM PAGE_SIZE.
**
** We really can't avoid generating a new mapping for each
** page since the Virtual Coherence Index has to be generated
** and updated for each page.
**
** IOVP_SIZE could only be greater than PAGE_SIZE if we are
** confident the drivers really only touch the next physical
** page iff that driver instance owns it.
*/
#define IOVP_SIZE PAGE_SIZE
#define IOVP_SHIFT PAGE_SHIFT
#define IOVP_MASK PAGE_MASK
struct ioc {
unsigned long ioc_hpa; /* I/O MMU base address */
char *res_map; /* resource map, bit == pdir entry */
u64 *pdir_base; /* physical base address */
unsigned long ibase; /* pdir IOV Space base */
unsigned long imask; /* pdir IOV Space mask */
unsigned long *res_hint; /* next avail IOVP - circular search */
spinlock_t res_lock;
unsigned long hint_mask_pdir; /* bits used for DMA hints */
unsigned int res_bitshift; /* from the RIGHT! */
unsigned int res_size; /* size of resource map in bytes */
unsigned int hint_shift_pdir;
unsigned long dma_mask;
#if DELAYED_RESOURCE_CNT > 0
int saved_cnt;
struct sba_dma_pair {
dma_addr_t iova;
size_t size;
} saved[DELAYED_RESOURCE_CNT];
#endif
#ifdef CONFIG_PROC_FS
#define SBA_SEARCH_SAMPLE 0x100
unsigned long avg_search[SBA_SEARCH_SAMPLE];
unsigned long avg_idx; /* current index into avg_search */
unsigned long used_pages;
unsigned long msingle_calls;
unsigned long msingle_pages;
unsigned long msg_calls;
unsigned long msg_pages;
unsigned long usingle_calls;
unsigned long usingle_pages;
unsigned long usg_calls;
unsigned long usg_pages;
#ifdef ALLOW_IOV_BYPASS
unsigned long msingle_bypass;
unsigned long usingle_bypass;
unsigned long msg_bypass;
#endif
#endif
/* STUFF We don't need in performance path */
unsigned int pdir_size; /* in bytes, determined by IOV Space size */
};
struct sba_device {
struct sba_device *next; /* list of SBA's in system */
const char *name;
unsigned long sba_hpa; /* base address */
spinlock_t sba_lock;
unsigned int flags; /* state/functionality enabled */
unsigned int hw_rev; /* HW revision of chip */
unsigned int num_ioc; /* number of on-board IOC's */
struct ioc ioc[MAX_IOC];
};
static struct sba_device *sba_list;
static int sba_count;
static int reserve_sba_gart = 1;
#define sba_sg_iova(sg) (sg->address)
#define sba_sg_len(sg) (sg->length)
#define sba_sg_buffer(sg) (sg->orig_address)
/* REVISIT - fix me for multiple SBAs/IOCs */
#define GET_IOC(dev) (sba_list->ioc)
#define SBA_SET_AGP(sba_dev) (sba_dev->flags |= 0x1)
#define SBA_GET_AGP(sba_dev) (sba_dev->flags & 0x1)
/*
** DMA_CHUNK_SIZE is used by the SCSI mid-layer to break up
** (or rather not merge) DMA's into managable chunks.
** On parisc, this is more of the software/tuning constraint
** rather than the HW. I/O MMU allocation alogorithms can be
** faster with smaller size is (to some degree).
*/
#define DMA_CHUNK_SIZE (BITS_PER_LONG*PAGE_SIZE)
/* Looks nice and keeps the compiler happy */
#define SBA_DEV(d) ((struct sba_device *) (d))
#define ROUNDUP(x,y) ((x + ((y)-1)) & ~((y)-1))
/************************************
** SBA register read and write support
**
** BE WARNED: register writes are posted.
** (ie follow writes which must reach HW with a read)
**
*/
#define READ_REG(addr) __raw_readq(addr)
#define WRITE_REG(val, addr) __raw_writeq(val, addr)
#ifdef DEBUG_SBA_INIT
/**
* sba_dump_tlb - debugging only - print IOMMU operating parameters
* @hpa: base address of the IOMMU
*
* Print the size/location of the IO MMU PDIR.
*/
static void
sba_dump_tlb(char *hpa)
{
DBG_INIT("IO TLB at 0x%p\n", (void *)hpa);
DBG_INIT("IOC_IBASE : %016lx\n", READ_REG(hpa+IOC_IBASE));
DBG_INIT("IOC_IMASK : %016lx\n", READ_REG(hpa+IOC_IMASK));
DBG_INIT("IOC_TCNFG : %016lx\n", READ_REG(hpa+IOC_TCNFG));
DBG_INIT("IOC_PDIR_BASE: %016lx\n", READ_REG(hpa+IOC_PDIR_BASE));
DBG_INIT("\n");
}
#endif
#ifdef ASSERT_PDIR_SANITY
/**
* sba_dump_pdir_entry - debugging only - print one IOMMU PDIR entry
* @ioc: IO MMU structure which owns the pdir we are interested in.
* @msg: text to print ont the output line.
* @pide: pdir index.
*
* Print one entry of the IO MMU PDIR in human readable form.
*/
static void
sba_dump_pdir_entry(struct ioc *ioc, char *msg, uint pide)
{
/* start printing from lowest pde in rval */
u64 *ptr = &(ioc->pdir_base[pide & ~(BITS_PER_LONG - 1)]);
unsigned long *rptr = (unsigned long *) &(ioc->res_map[(pide >>3) & ~(sizeof(unsigned long) - 1)]);
uint rcnt;
/* printk(KERN_DEBUG "SBA: %s rp %p bit %d rval 0x%lx\n", */
printk("SBA: %s rp %p bit %d rval 0x%lx\n",
msg, rptr, pide & (BITS_PER_LONG - 1), *rptr);
rcnt = 0;
while (rcnt < BITS_PER_LONG) {
printk("%s %2d %p %016Lx\n",
(rcnt == (pide & (BITS_PER_LONG - 1)))
? " -->" : " ",
rcnt, ptr, *ptr );
rcnt++;
ptr++;
}
printk("%s", msg);
}
/**
* sba_check_pdir - debugging only - consistency checker
* @ioc: IO MMU structure which owns the pdir we are interested in.
* @msg: text to print ont the output line.
*
* Verify the resource map and pdir state is consistent
*/
static int
sba_check_pdir(struct ioc *ioc, char *msg)
{
u64 *rptr_end = (u64 *) &(ioc->res_map[ioc->res_size]);
u64 *rptr = (u64 *) ioc->res_map; /* resource map ptr */
u64 *pptr = ioc->pdir_base; /* pdir ptr */
uint pide = 0;
while (rptr < rptr_end) {
u64 rval;
int rcnt; /* number of bits we might check */
rval = *rptr;
rcnt = 64;
while (rcnt) {
/* Get last byte and highest bit from that */
u32 pde = ((u32)((*pptr >> (63)) & 0x1));
if ((rval & 0x1) ^ pde)
{
/*
** BUMMER! -- res_map != pdir --
** Dump rval and matching pdir entries
*/
sba_dump_pdir_entry(ioc, msg, pide);
return(1);
}
rcnt--;
rval >>= 1; /* try the next bit */
pptr++;
pide++;
}
rptr++; /* look at next word of res_map */
}
/* It'd be nice if we always got here :^) */
return 0;
}
/**
* sba_dump_sg - debugging only - print Scatter-Gather list
* @ioc: IO MMU structure which owns the pdir we are interested in.
* @startsg: head of the SG list
* @nents: number of entries in SG list
*
* print the SG list so we can verify it's correct by hand.
*/
static void
sba_dump_sg( struct ioc *ioc, struct scatterlist *startsg, int nents)
{
while (nents-- > 0) {
printk(" %d : %08lx/%05x %p\n",
nents,
(unsigned long) sba_sg_iova(startsg),
sba_sg_len(startsg),
sba_sg_buffer(startsg));
startsg++;
}
}
static void
sba_check_sg( struct ioc *ioc, struct scatterlist *startsg, int nents)
{
struct scatterlist *the_sg = startsg;
int the_nents = nents;
while (the_nents-- > 0) {
if (sba_sg_buffer(the_sg) == 0x0UL)
sba_dump_sg(NULL, startsg, nents);
the_sg++;
}
}
#endif /* ASSERT_PDIR_SANITY */
/**************************************************************
*
* I/O Pdir Resource Management
*
* Bits set in the resource map are in use.
* Each bit can represent a number of pages.
* LSbs represent lower addresses (IOVA's).
*
***************************************************************/
#define PAGES_PER_RANGE 1 /* could increase this to 4 or 8 if needed */
/* Convert from IOVP to IOVA and vice versa. */
#define SBA_IOVA(ioc,iovp,offset,hint_reg) ((ioc->ibase) | (iovp) | (offset) | ((hint_reg)<<(ioc->hint_shift_pdir)))
#define SBA_IOVP(ioc,iova) (((iova) & ioc->hint_mask_pdir) & ~(ioc->ibase))
/* FIXME : review these macros to verify correctness and usage */
#define PDIR_INDEX(iovp) ((iovp)>>IOVP_SHIFT)
#define RESMAP_MASK(n) ~(~0UL << (n))
#define RESMAP_IDX_MASK (sizeof(unsigned long) - 1)
/**
* sba_search_bitmap - find free space in IO PDIR resource bitmap
* @ioc: IO MMU structure which owns the pdir we are interested in.
* @bits_wanted: number of entries we need.
*
* Find consecutive free bits in resource bitmap.
* Each bit represents one entry in the IO Pdir.
* Cool perf optimization: search for log2(size) bits at a time.
*/
static SBA_INLINE unsigned long
sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted)
{
unsigned long *res_ptr = ioc->res_hint;
unsigned long *res_end = (unsigned long *) &(ioc->res_map[ioc->res_size]);
unsigned long pide = ~0UL;
ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0);
ASSERT(res_ptr < res_end);
if (bits_wanted > (BITS_PER_LONG/2)) {
/* Search word at a time - no mask needed */
for(; res_ptr < res_end; ++res_ptr) {
if (*res_ptr == 0) {
*res_ptr = RESMAP_MASK(bits_wanted);
pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
pide <<= 3; /* convert to bit address */
break;
}
}
/* point to the next word on next pass */
res_ptr++;
ioc->res_bitshift = 0;
} else {
/*
** Search the resource bit map on well-aligned values.
** "o" is the alignment.
** We need the alignment to invalidate I/O TLB using
** SBA HW features in the unmap path.
*/
unsigned long o = 1 << get_order(bits_wanted << PAGE_SHIFT);
uint bitshiftcnt = ROUNDUP(ioc->res_bitshift, o);
unsigned long mask;
if (bitshiftcnt >= BITS_PER_LONG) {
bitshiftcnt = 0;
res_ptr++;
}
mask = RESMAP_MASK(bits_wanted) << bitshiftcnt;
DBG_RES("%s() o %ld %p", __FUNCTION__, o, res_ptr);
while(res_ptr < res_end)
{
DBG_RES(" %p %lx %lx\n", res_ptr, mask, *res_ptr);
ASSERT(0 != mask);
if(0 == ((*res_ptr) & mask)) {
*res_ptr |= mask; /* mark resources busy! */
pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
pide <<= 3; /* convert to bit address */
pide += bitshiftcnt;
break;
}
mask <<= o;
bitshiftcnt += o;
if (0 == mask) {
mask = RESMAP_MASK(bits_wanted);
bitshiftcnt=0;
res_ptr++;
}
}
/* look in the same word on the next pass */
ioc->res_bitshift = bitshiftcnt + bits_wanted;
}
/* wrapped ? */
if (res_end <= res_ptr) {
ioc->res_hint = (unsigned long *) ioc->res_map;
ioc->res_bitshift = 0;
} else {
ioc->res_hint = res_ptr;
}
return (pide);
}
/**
* sba_alloc_range - find free bits and mark them in IO PDIR resource bitmap
* @ioc: IO MMU structure which owns the pdir we are interested in.
* @size: number of bytes to create a mapping for
*
* Given a size, find consecutive unmarked and then mark those bits in the
* resource bit map.
*/
static int
sba_alloc_range(struct ioc *ioc, size_t size)
{
unsigned int pages_needed = size >> IOVP_SHIFT;
#ifdef CONFIG_PROC_FS
unsigned long itc_start = ia64_get_itc();
#endif
unsigned long pide;
ASSERT(pages_needed);
ASSERT((pages_needed * IOVP_SIZE) <= DMA_CHUNK_SIZE);
ASSERT(pages_needed <= BITS_PER_LONG);
ASSERT(0 == (size & ~IOVP_MASK));
/*
** "seek and ye shall find"...praying never hurts either...
*/
pide = sba_search_bitmap(ioc, pages_needed);
if (pide >= (ioc->res_size << 3)) {
pide = sba_search_bitmap(ioc, pages_needed);
if (pide >= (ioc->res_size << 3))
panic(__FILE__ ": I/O MMU @ %lx is out of mapping resources\n", ioc->ioc_hpa);
}
#ifdef ASSERT_PDIR_SANITY
/* verify the first enable bit is clear */
if(0x00 != ((u8 *) ioc->pdir_base)[pide*sizeof(u64) + 7]) {
sba_dump_pdir_entry(ioc, "sba_search_bitmap() botched it?", pide);
}
#endif
DBG_RES("%s(%x) %d -> %lx hint %x/%x\n",
__FUNCTION__, size, pages_needed, pide,
(uint) ((unsigned long) ioc->res_hint - (unsigned long) ioc->res_map),
ioc->res_bitshift );
#ifdef CONFIG_PROC_FS
{
unsigned long itc_end = ia64_get_itc();
unsigned long tmp = itc_end - itc_start;
/* check for roll over */
itc_start = (itc_end < itc_start) ? -(tmp) : (tmp);
}
ioc->avg_search[ioc->avg_idx++] = itc_start;
ioc->avg_idx &= SBA_SEARCH_SAMPLE - 1;
ioc->used_pages += pages_needed;
#endif
return (pide);
}
/**
* sba_free_range - unmark bits in IO PDIR resource bitmap
* @ioc: IO MMU structure which owns the pdir we are interested in.
* @iova: IO virtual address which was previously allocated.
* @size: number of bytes to create a mapping for
*
* clear bits in the ioc's resource map
*/
static SBA_INLINE void
sba_free_range(struct ioc *ioc, dma_addr_t iova, size_t size)
{
unsigned long iovp = SBA_IOVP(ioc, iova);
unsigned int pide = PDIR_INDEX(iovp);
unsigned int ridx = pide >> 3; /* convert bit to byte address */
unsigned long *res_ptr = (unsigned long *) &((ioc)->res_map[ridx & ~RESMAP_IDX_MASK]);
int bits_not_wanted = size >> IOVP_SHIFT;
/* 3-bits "bit" address plus 2 (or 3) bits for "byte" == bit in word */
unsigned long m = RESMAP_MASK(bits_not_wanted) << (pide & (BITS_PER_LONG - 1));
DBG_RES("%s( ,%x,%x) %x/%lx %x %p %lx\n",
__FUNCTION__, (uint) iova, size,
bits_not_wanted, m, pide, res_ptr, *res_ptr);
#ifdef CONFIG_PROC_FS
ioc->used_pages -= bits_not_wanted;
#endif
ASSERT(m != 0);
ASSERT(bits_not_wanted);
ASSERT((bits_not_wanted * IOVP_SIZE) <= DMA_CHUNK_SIZE);
ASSERT(bits_not_wanted <= BITS_PER_LONG);
ASSERT((*res_ptr & m) == m); /* verify same bits are set */
*res_ptr &= ~m;
}
/**************************************************************
*
* "Dynamic DMA Mapping" support (aka "Coherent I/O")
*
***************************************************************/
#define SBA_DMA_HINT(ioc, val) ((val) << (ioc)->hint_shift_pdir)
/**
* sba_io_pdir_entry - fill in one IO PDIR entry
* @pdir_ptr: pointer to IO PDIR entry
* @vba: Virtual CPU address of buffer to map
*
* SBA Mapping Routine
*
* Given a virtual address (vba, arg1) sba_io_pdir_entry()
* loads the I/O PDIR entry pointed to by pdir_ptr (arg0).
* Each IO Pdir entry consists of 8 bytes as shown below
* (LSB == bit 0):
*
* 63 40 11 7 0
* +-+---------------------+----------------------------------+----+--------+
* |V| U | PPN[39:12] | U | FF |
* +-+---------------------+----------------------------------+----+--------+
*
* V == Valid Bit
* U == Unused
* PPN == Physical Page Number
*
* The physical address fields are filled with the results of virt_to_phys()
* on the vba.
*/
#if 1
#define sba_io_pdir_entry(pdir_ptr, vba) *pdir_ptr = ((vba & ~0xE000000000000FFFULL) | 0x80000000000000FFULL)
#else
void SBA_INLINE
sba_io_pdir_entry(u64 *pdir_ptr, unsigned long vba)
{
*pdir_ptr = ((vba & ~0xE000000000000FFFULL) | 0x80000000000000FFULL);
}
#endif
#ifdef ENABLE_MARK_CLEAN
/**
* Since DMA is i-cache coherent, any (complete) pages that were written via
* DMA can be marked as "clean" so that update_mmu_cache() doesn't have to
* flush them when they get mapped into an executable vm-area.
*/
static void
mark_clean (void *addr, size_t size)
{
unsigned long pg_addr, end;
pg_addr = PAGE_ALIGN((unsigned long) addr);
end = (unsigned long) addr + size;
while (pg_addr + PAGE_SIZE <= end) {
struct page *page = virt_to_page(pg_addr);
set_bit(PG_arch_1, &page->flags);
pg_addr += PAGE_SIZE;
}
}
#endif
/**
* sba_mark_invalid - invalidate one or more IO PDIR entries
* @ioc: IO MMU structure which owns the pdir we are interested in.
* @iova: IO Virtual Address mapped earlier
* @byte_cnt: number of bytes this mapping covers.
*
* Marking the IO PDIR entry(ies) as Invalid and invalidate
* corresponding IO TLB entry. The PCOM (Purge Command Register)
* is to purge stale entries in the IO TLB when unmapping entries.
*
* The PCOM register supports purging of multiple pages, with a minium
* of 1 page and a maximum of 2GB. Hardware requires the address be
* aligned to the size of the range being purged. The size of the range
* must be a power of 2. The "Cool perf optimization" in the
* allocation routine helps keep that true.
*/
static SBA_INLINE void
sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt)
{
u32 iovp = (u32) SBA_IOVP(ioc,iova);
int off = PDIR_INDEX(iovp);
/* Must be non-zero and rounded up */
ASSERT(byte_cnt > 0);
ASSERT(0 == (byte_cnt & ~IOVP_MASK));
#ifdef ASSERT_PDIR_SANITY
/* Assert first pdir entry is set */
if (!(ioc->pdir_base[off] >> 60)) {
sba_dump_pdir_entry(ioc,"sba_mark_invalid()", PDIR_INDEX(iovp));
}
#endif
if (byte_cnt <= IOVP_SIZE)
{
ASSERT(off < ioc->pdir_size);
iovp |= IOVP_SHIFT; /* set "size" field for PCOM */
/*
** clear I/O PDIR entry "valid" bit
** Do NOT clear the rest - save it for debugging.
** We should only clear bits that have previously
** been enabled.
*/
ioc->pdir_base[off] &= ~(0x80000000000000FFULL);
} else {
u32 t = get_order(byte_cnt) + PAGE_SHIFT;
iovp |= t;
ASSERT(t <= 31); /* 2GB! Max value of "size" field */
do {
/* verify this pdir entry is enabled */
ASSERT(ioc->pdir_base[off] >> 63);
/* clear I/O Pdir entry "valid" bit first */
ioc->pdir_base[off] &= ~(0x80000000000000FFULL);
off++;
byte_cnt -= IOVP_SIZE;
} while (byte_cnt > 0);
}
WRITE_REG(iovp, ioc->ioc_hpa+IOC_PCOM);
}
/**
* sba_map_single - map one buffer and return IOVA for DMA
* @dev: instance of PCI owned by the driver that's asking.
* @addr: driver buffer to map.
* @size: number of bytes to map in driver buffer.
* @direction: R/W or both.
*
* See Documentation/DMA-mapping.txt
*/
dma_addr_t
sba_map_single(struct pci_dev *dev, void *addr, size_t size, int direction)
{
struct ioc *ioc;
unsigned long flags;
dma_addr_t iovp;
dma_addr_t offset;
u64 *pdir_start;
int pide;
#ifdef ALLOW_IOV_BYPASS
unsigned long pci_addr = virt_to_phys(addr);
#endif
ioc = GET_IOC(dev);
ASSERT(ioc);
#ifdef ALLOW_IOV_BYPASS
/*
** Check if the PCI device can DMA to ptr... if so, just return ptr
*/
if ((pci_addr & ~dev->dma_mask) == 0) {
/*
** Device is bit capable of DMA'ing to the buffer...
** just return the PCI address of ptr
*/
#ifdef CONFIG_PROC_FS
spin_lock_irqsave(&ioc->res_lock, flags);
ioc->msingle_bypass++;
spin_unlock_irqrestore(&ioc->res_lock, flags);
#endif
DBG_BYPASS("sba_map_single() bypass mask/addr: 0x%lx/0x%lx\n",
dev->dma_mask, pci_addr);
return pci_addr;
}
#endif
ASSERT(size > 0);
ASSERT(size <= DMA_CHUNK_SIZE);
/* save offset bits */
offset = ((dma_addr_t) (long) addr) & ~IOVP_MASK;
/* round up to nearest IOVP_SIZE */
size = (size + offset + ~IOVP_MASK) & IOVP_MASK;
spin_lock_irqsave(&ioc->res_lock, flags);
#ifdef ASSERT_PDIR_SANITY
if (sba_check_pdir(ioc,"Check before sba_map_single()"))
panic("Sanity check failed");
#endif
#ifdef CONFIG_PROC_FS
ioc->msingle_calls++;
ioc->msingle_pages += size >> IOVP_SHIFT;
#endif
pide = sba_alloc_range(ioc, size);
iovp = (dma_addr_t) pide << IOVP_SHIFT;
DBG_RUN("%s() 0x%p -> 0x%lx\n",
__FUNCTION__, addr, (long) iovp | offset);
pdir_start = &(ioc->pdir_base[pide]);
while (size > 0) {
ASSERT(((u8 *)pdir_start)[7] == 0); /* verify availability */
sba_io_pdir_entry(pdir_start, (unsigned long) addr);
DBG_RUN(" pdir 0x%p %lx\n", pdir_start, *pdir_start);
addr += IOVP_SIZE;
size -= IOVP_SIZE;
pdir_start++;
}
/* form complete address */
#ifdef ASSERT_PDIR_SANITY
sba_check_pdir(ioc,"Check after sba_map_single()");
#endif
spin_unlock_irqrestore(&ioc->res_lock, flags);
return SBA_IOVA(ioc, iovp, offset, DEFAULT_DMA_HINT_REG);
}
/**
* sba_unmap_single - unmap one IOVA and free resources
* @dev: instance of PCI owned by the driver that's asking.
* @iova: IOVA of driver buffer previously mapped.
* @size: number of bytes mapped in driver buffer.
* @direction: R/W or both.
*
* See Documentation/DMA-mapping.txt
*/
void sba_unmap_single(struct pci_dev *dev, dma_addr_t iova, size_t size,
int direction)
{
struct ioc *ioc;
#if DELAYED_RESOURCE_CNT > 0
struct sba_dma_pair *d;
#endif
unsigned long flags;
dma_addr_t offset;
ioc = GET_IOC(dev);
ASSERT(ioc);
#ifdef ALLOW_IOV_BYPASS
if ((iova & ioc->imask) != ioc->ibase) {
/*
** Address does not fall w/in IOVA, must be bypassing
*/
#ifdef CONFIG_PROC_FS
spin_lock_irqsave(&ioc->res_lock, flags);
ioc->usingle_bypass++;
spin_unlock_irqrestore(&ioc->res_lock, flags);
#endif
DBG_BYPASS("sba_unmap_single() bypass addr: 0x%lx\n", iova);
#ifdef ENABLE_MARK_CLEAN
if (direction == PCI_DMA_FROMDEVICE) {
mark_clean(phys_to_virt(iova), size);
}
#endif
return;
}
#endif
offset = iova & ~IOVP_MASK;
DBG_RUN("%s() iovp 0x%lx/%x\n",
__FUNCTION__, (long) iova, size);
iova ^= offset; /* clear offset bits */
size += offset;
size = ROUNDUP(size, IOVP_SIZE);
spin_lock_irqsave(&ioc->res_lock, flags);
#ifdef CONFIG_PROC_FS
ioc->usingle_calls++;
ioc->usingle_pages += size >> IOVP_SHIFT;
#endif
#if DELAYED_RESOURCE_CNT > 0
d = &(ioc->saved[ioc->saved_cnt]);
d->iova = iova;
d->size = size;
if (++(ioc->saved_cnt) >= DELAYED_RESOURCE_CNT) {
int cnt = ioc->saved_cnt;
while (cnt--) {
sba_mark_invalid(ioc, d->iova, d->size);
sba_free_range(ioc, d->iova, d->size);
d--;
}
ioc->saved_cnt = 0;
READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */
}
#else /* DELAYED_RESOURCE_CNT == 0 */
sba_mark_invalid(ioc, iova, size);
sba_free_range(ioc, iova, size);
READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */
#endif /* DELAYED_RESOURCE_CNT == 0 */
#ifdef ENABLE_MARK_CLEAN
if (direction == PCI_DMA_FROMDEVICE) {
u32 iovp = (u32) SBA_IOVP(ioc,iova);
int off = PDIR_INDEX(iovp);
void *addr;
if (size <= IOVP_SIZE) {
addr = phys_to_virt(ioc->pdir_base[off] &
~0xE000000000000FFFULL);
mark_clean(addr, size);
} else {
size_t byte_cnt = size;
do {
addr = phys_to_virt(ioc->pdir_base[off] &
~0xE000000000000FFFULL);
mark_clean(addr, min(byte_cnt, IOVP_SIZE));
off++;
byte_cnt -= IOVP_SIZE;
} while (byte_cnt > 0);
}
}
#endif
spin_unlock_irqrestore(&ioc->res_lock, flags);
/* XXX REVISIT for 2.5 Linux - need syncdma for zero-copy support.
** For Astro based systems this isn't a big deal WRT performance.
** As long as 2.4 kernels copyin/copyout data from/to userspace,
** we don't need the syncdma. The issue here is I/O MMU cachelines
** are *not* coherent in all cases. May be hwrev dependent.
** Need to investigate more.
asm volatile("syncdma");
*/
}
/**
* sba_alloc_consistent - allocate/map shared mem for DMA
* @hwdev: instance of PCI owned by the driver that's asking.
* @size: number of bytes mapped in driver buffer.
* @dma_handle: IOVA of new buffer.
*
* See Documentation/DMA-mapping.txt
*/
void *
sba_alloc_consistent(struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle)
{
void *ret;
if (!hwdev) {
/* only support PCI */
*dma_handle = 0;
return 0;
}
ret = (void *) __get_free_pages(GFP_ATOMIC, get_order(size));
if (ret) {
memset(ret, 0, size);
*dma_handle = sba_map_single(hwdev, ret, size, 0);
}
return ret;
}
/**
* sba_free_consistent - free/unmap shared mem for DMA
* @hwdev: instance of PCI owned by the driver that's asking.
* @size: number of bytes mapped in driver buffer.
* @vaddr: virtual address IOVA of "consistent" buffer.
* @dma_handler: IO virtual address of "consistent" buffer.
*
* See Documentation/DMA-mapping.txt
*/
void sba_free_consistent(struct pci_dev *hwdev, size_t size, void *vaddr,
dma_addr_t dma_handle)
{
sba_unmap_single(hwdev, dma_handle, size, 0);
free_pages((unsigned long) vaddr, get_order(size));
}
/*
** Since 0 is a valid pdir_base index value, can't use that
** to determine if a value is valid or not. Use a flag to indicate
** the SG list entry contains a valid pdir index.
*/
#define PIDE_FLAG 0x1UL
#ifdef DEBUG_LARGE_SG_ENTRIES
int dump_run_sg = 0;
#endif
/**
* sba_fill_pdir - write allocated SG entries into IO PDIR
* @ioc: IO MMU structure which owns the pdir we are interested in.
* @startsg: list of IOVA/size pairs
* @nents: number of entries in startsg list
*
* Take preprocessed SG list and write corresponding entries
* in the IO PDIR.
*/
static SBA_INLINE int
sba_fill_pdir(
struct ioc *ioc,
struct scatterlist *startsg,
int nents)
{
struct scatterlist *dma_sg = startsg; /* pointer to current DMA */
int n_mappings = 0;
u64 *pdirp = 0;
unsigned long dma_offset = 0;
dma_sg--;
while (nents-- > 0) {
int cnt = sba_sg_len(startsg);
sba_sg_len(startsg) = 0;
#ifdef DEBUG_LARGE_SG_ENTRIES
if (dump_run_sg)
printk(" %2d : %08lx/%05x %p\n",
nents,
(unsigned long) sba_sg_iova(startsg), cnt,
sba_sg_buffer(startsg)
);
#else
DBG_RUN_SG(" %d : %08lx/%05x %p\n",
nents,
(unsigned long) sba_sg_iova(startsg), cnt,
sba_sg_buffer(startsg)
);
#endif
/*
** Look for the start of a new DMA stream
*/
if ((u64)sba_sg_iova(startsg) & PIDE_FLAG) {
u32 pide = (u64)sba_sg_iova(startsg) & ~PIDE_FLAG;
dma_offset = (unsigned long) pide & ~IOVP_MASK;
sba_sg_iova(startsg) = 0;
dma_sg++;
sba_sg_iova(dma_sg) = (char *)(pide | ioc->ibase);
pdirp = &(ioc->pdir_base[pide >> IOVP_SHIFT]);
n_mappings++;
}
/*
** Look for a VCONTIG chunk
*/
if (cnt) {
unsigned long vaddr = (unsigned long) sba_sg_buffer(startsg);
ASSERT(pdirp);
/* Since multiple Vcontig blocks could make up
** one DMA stream, *add* cnt to dma_len.
*/
sba_sg_len(dma_sg) += cnt;
cnt += dma_offset;
dma_offset=0; /* only want offset on first chunk */
cnt = ROUNDUP(cnt, IOVP_SIZE);
#ifdef CONFIG_PROC_FS
ioc->msg_pages += cnt >> IOVP_SHIFT;
#endif
do {
sba_io_pdir_entry(pdirp, vaddr);
vaddr += IOVP_SIZE;
cnt -= IOVP_SIZE;
pdirp++;
} while (cnt > 0);
}
startsg++;
}
#ifdef DEBUG_LARGE_SG_ENTRIES
dump_run_sg = 0;
#endif
return(n_mappings);
}
/*
** Two address ranges are DMA contiguous *iff* "end of prev" and
** "start of next" are both on a page boundry.
**
** (shift left is a quick trick to mask off upper bits)
*/
#define DMA_CONTIG(__X, __Y) \
(((((unsigned long) __X) | ((unsigned long) __Y)) << (BITS_PER_LONG - PAGE_SHIFT)) == 0UL)
/**
* sba_coalesce_chunks - preprocess the SG list
* @ioc: IO MMU structure which owns the pdir we are interested in.
* @startsg: list of IOVA/size pairs
* @nents: number of entries in startsg list
*
* First pass is to walk the SG list and determine where the breaks are
* in the DMA stream. Allocates PDIR entries but does not fill them.
* Returns the number of DMA chunks.
*
* Doing the fill seperate from the coalescing/allocation keeps the
* code simpler. Future enhancement could make one pass through
* the sglist do both.
*/
static SBA_INLINE int
sba_coalesce_chunks( struct ioc *ioc,
struct scatterlist *startsg,
int nents)
{
struct scatterlist *vcontig_sg; /* VCONTIG chunk head */
unsigned long vcontig_len; /* len of VCONTIG chunk */
unsigned long vcontig_end;
struct scatterlist *dma_sg; /* next DMA stream head */
unsigned long dma_offset, dma_len; /* start/len of DMA stream */
int n_mappings = 0;
while (nents > 0) {
unsigned long vaddr = (unsigned long) (startsg->address);
/*
** Prepare for first/next DMA stream
*/
dma_sg = vcontig_sg = startsg;
dma_len = vcontig_len = vcontig_end = sba_sg_len(startsg);
vcontig_end += vaddr;
dma_offset = vaddr & ~IOVP_MASK;
/* PARANOID: clear entries */
sba_sg_buffer(startsg) = sba_sg_iova(startsg);
sba_sg_iova(startsg) = 0;
sba_sg_len(startsg) = 0;
/*
** This loop terminates one iteration "early" since
** it's always looking one "ahead".
*/
while (--nents > 0) {
unsigned long vaddr; /* tmp */
startsg++;
/* catch brokenness in SCSI layer */
ASSERT(startsg->length <= DMA_CHUNK_SIZE);
/*
** First make sure current dma stream won't
** exceed DMA_CHUNK_SIZE if we coalesce the
** next entry.
*/
if (((dma_len + dma_offset + startsg->length + ~IOVP_MASK) & IOVP_MASK) > DMA_CHUNK_SIZE)
break;
/*
** Then look for virtually contiguous blocks.
**
** append the next transaction?
*/
vaddr = (unsigned long) sba_sg_iova(startsg);
if (vcontig_end == vaddr)
{
vcontig_len += sba_sg_len(startsg);
vcontig_end += sba_sg_len(startsg);
dma_len += sba_sg_len(startsg);
sba_sg_buffer(startsg) = (char *)vaddr;
sba_sg_iova(startsg) = 0;
sba_sg_len(startsg) = 0;
continue;
}
#ifdef DEBUG_LARGE_SG_ENTRIES
dump_run_sg = (vcontig_len > IOVP_SIZE);
#endif
/*
** Not virtually contigous.
** Terminate prev chunk.
** Start a new chunk.
**
** Once we start a new VCONTIG chunk, dma_offset
** can't change. And we need the offset from the first
** chunk - not the last one. Ergo Successive chunks
** must start on page boundaries and dove tail
** with it's predecessor.
*/
sba_sg_len(vcontig_sg) = vcontig_len;
vcontig_sg = startsg;
vcontig_len = sba_sg_len(startsg);
/*
** 3) do the entries end/start on page boundaries?
** Don't update vcontig_end until we've checked.
*/
if (DMA_CONTIG(vcontig_end, vaddr))
{
vcontig_end = vcontig_len + vaddr;
dma_len += vcontig_len;
sba_sg_buffer(startsg) = (char *)vaddr;
sba_sg_iova(startsg) = 0;
continue;
} else {
break;
}
}
/*
** End of DMA Stream
** Terminate last VCONTIG block.
** Allocate space for DMA stream.
*/
sba_sg_len(vcontig_sg) = vcontig_len;
dma_len = (dma_len + dma_offset + ~IOVP_MASK) & IOVP_MASK;
ASSERT(dma_len <= DMA_CHUNK_SIZE);
sba_sg_iova(dma_sg) = (char *) (PIDE_FLAG
| (sba_alloc_range(ioc, dma_len) << IOVP_SHIFT)
| dma_offset);
n_mappings++;
}
return n_mappings;
}
/**
* sba_map_sg - map Scatter/Gather list
* @dev: instance of PCI owned by the driver that's asking.
* @sglist: array of buffer/length pairs
* @nents: number of entries in list
* @direction: R/W or both.
*
* See Documentation/DMA-mapping.txt
*/
int sba_map_sg(struct pci_dev *dev, struct scatterlist *sglist, int nents,
int direction)
{
struct ioc *ioc;
int coalesced, filled = 0;
unsigned long flags;
#ifdef ALLOW_IOV_BYPASS
struct scatterlist *sg;
#endif
DBG_RUN_SG("%s() START %d entries\n", __FUNCTION__, nents);
ioc = GET_IOC(dev);
ASSERT(ioc);
#ifdef ALLOW_IOV_BYPASS
if (dev->dma_mask >= ioc->dma_mask) {
for (sg = sglist ; filled < nents ; filled++, sg++){
sba_sg_buffer(sg) = sba_sg_iova(sg);
sba_sg_iova(sg) = (char *)virt_to_phys(sba_sg_buffer(sg));
}
#ifdef CONFIG_PROC_FS
spin_lock_irqsave(&ioc->res_lock, flags);
ioc->msg_bypass++;
spin_unlock_irqrestore(&ioc->res_lock, flags);
#endif
return filled;
}
#endif
/* Fast path single entry scatterlists. */
if (nents == 1) {
sba_sg_buffer(sglist) = sba_sg_iova(sglist);
sba_sg_iova(sglist) = (char *)sba_map_single(dev,
sba_sg_buffer(sglist),
sba_sg_len(sglist), direction);
#ifdef CONFIG_PROC_FS
/*
** Should probably do some stats counting, but trying to
** be precise quickly starts wasting CPU time.
*/
#endif
return 1;
}
spin_lock_irqsave(&ioc->res_lock, flags);
#ifdef ASSERT_PDIR_SANITY
if (sba_check_pdir(ioc,"Check before sba_map_sg()"))
{
sba_dump_sg(ioc, sglist, nents);
panic("Check before sba_map_sg()");
}
#endif
#ifdef CONFIG_PROC_FS
ioc->msg_calls++;
#endif
/*
** First coalesce the chunks and allocate I/O pdir space
**
** If this is one DMA stream, we can properly map using the
** correct virtual address associated with each DMA page.
** w/o this association, we wouldn't have coherent DMA!
** Access to the virtual address is what forces a two pass algorithm.
*/
coalesced = sba_coalesce_chunks(ioc, sglist, nents);
/*
** Program the I/O Pdir
**
** map the virtual addresses to the I/O Pdir
** o dma_address will contain the pdir index
** o dma_len will contain the number of bytes to map
** o address contains the virtual address.
*/
filled = sba_fill_pdir(ioc, sglist, nents);
#ifdef ASSERT_PDIR_SANITY
if (sba_check_pdir(ioc,"Check after sba_map_sg()"))
{
sba_dump_sg(ioc, sglist, nents);
panic("Check after sba_map_sg()\n");
}
#endif
spin_unlock_irqrestore(&ioc->res_lock, flags);
ASSERT(coalesced == filled);
DBG_RUN_SG("%s() DONE %d mappings\n", __FUNCTION__, filled);
return filled;
}
/**
* sba_unmap_sg - unmap Scatter/Gather list
* @dev: instance of PCI owned by the driver that's asking.
* @sglist: array of buffer/length pairs
* @nents: number of entries in list
* @direction: R/W or both.
*
* See Documentation/DMA-mapping.txt
*/
void sba_unmap_sg(struct pci_dev *dev, struct scatterlist *sglist, int nents,
int direction)
{
struct ioc *ioc;
#ifdef ASSERT_PDIR_SANITY
unsigned long flags;
#endif
DBG_RUN_SG("%s() START %d entries, %p,%x\n",
__FUNCTION__, nents, sba_sg_buffer(sglist), sglist->length);
ioc = GET_IOC(dev);
ASSERT(ioc);
#ifdef CONFIG_PROC_FS
ioc->usg_calls++;
#endif
#ifdef ASSERT_PDIR_SANITY
spin_lock_irqsave(&ioc->res_lock, flags);
sba_check_pdir(ioc,"Check before sba_unmap_sg()");
spin_unlock_irqrestore(&ioc->res_lock, flags);
#endif
while (sba_sg_len(sglist) && nents--) {
sba_unmap_single(dev, (dma_addr_t)sba_sg_iova(sglist),
sba_sg_len(sglist), direction);
#ifdef CONFIG_PROC_FS
/*
** This leaves inconsistent data in the stats, but we can't
** tell which sg lists were mapped by map_single and which
** were coalesced to a single entry. The stats are fun,
** but speed is more important.
*/
ioc->usg_pages += (((u64)sba_sg_iova(sglist) & ~IOVP_MASK) + sba_sg_len(sglist) + IOVP_SIZE - 1) >> PAGE_SHIFT;
#endif
++sglist;
}
DBG_RUN_SG("%s() DONE (nents %d)\n", __FUNCTION__, nents);
#ifdef ASSERT_PDIR_SANITY
spin_lock_irqsave(&ioc->res_lock, flags);
sba_check_pdir(ioc,"Check after sba_unmap_sg()");
spin_unlock_irqrestore(&ioc->res_lock, flags);
#endif
}
unsigned long
sba_dma_address (struct scatterlist *sg)
{
return ((unsigned long)sba_sg_iova(sg));
}
/**************************************************************
*
* Initialization and claim
*
***************************************************************/
static void
sba_ioc_init(struct sba_device *sba_dev, struct ioc *ioc, int ioc_num)
{
u32 iova_space_size, iova_space_mask;
void * pdir_base;
int pdir_size, iov_order, tcnfg;
/*
** Firmware programs the maximum IOV space size into the imask reg
*/
iova_space_size = ~(READ_REG(ioc->ioc_hpa + IOC_IMASK) & 0xFFFFFFFFUL) + 1;
#ifdef CONFIG_IA64_HP_PROTO
if (!iova_space_size)
iova_space_size = GB(1);
#endif
/*
** iov_order is always based on a 1GB IOVA space since we want to
** turn on the other half for AGP GART.
*/
iov_order = get_order(iova_space_size >> (IOVP_SHIFT-PAGE_SHIFT));
ioc->pdir_size = pdir_size = (iova_space_size/IOVP_SIZE) * sizeof(u64);
DBG_INIT("%s() hpa 0x%lx IOV %dMB (%d bits) PDIR size 0x%0x\n",
__FUNCTION__, ioc->ioc_hpa, iova_space_size>>20,
iov_order + PAGE_SHIFT, ioc->pdir_size);
/* FIXME : DMA HINTs not used */
ioc->hint_shift_pdir = iov_order + PAGE_SHIFT;
ioc->hint_mask_pdir = ~(0x3 << (iov_order + PAGE_SHIFT));
ioc->pdir_base =
pdir_base = (void *) __get_free_pages(GFP_KERNEL, get_order(pdir_size));
if (NULL == pdir_base)
{
panic(__FILE__ ":%s() could not allocate I/O Page Table\n", __FUNCTION__);
}
memset(pdir_base, 0, pdir_size);
DBG_INIT("%s() pdir %p size %x hint_shift_pdir %x hint_mask_pdir %lx\n",
__FUNCTION__, pdir_base, pdir_size,
ioc->hint_shift_pdir, ioc->hint_mask_pdir);
ASSERT((((unsigned long) pdir_base) & PAGE_MASK) == (unsigned long) pdir_base);
WRITE_REG(virt_to_phys(pdir_base), ioc->ioc_hpa + IOC_PDIR_BASE);
DBG_INIT(" base %p\n", pdir_base);
/* build IMASK for IOC and Elroy */
iova_space_mask = 0xffffffff;
iova_space_mask <<= (iov_order + PAGE_SHIFT);
#ifdef CONFIG_IA64_HP_PROTO
/*
** REVISIT - this is a kludge, but we won't be supporting anything but
** zx1 2.0 or greater for real. When fw is in shape, ibase will
** be preprogrammed w/ the IOVA hole base and imask will give us
** the size.
*/
if ((sba_dev->hw_rev & 0xFF) < 0x20) {
DBG_INIT("%s() Found SBA rev < 2.0, setting IOVA base to 0. This device will not be supported in the future.\n", __FUNCTION__);
ioc->ibase = 0x0;
} else
#endif
ioc->ibase = READ_REG(ioc->ioc_hpa + IOC_IBASE) & 0xFFFFFFFEUL;
ioc->imask = iova_space_mask; /* save it */
DBG_INIT("%s() IOV base 0x%lx mask 0x%0lx\n",
__FUNCTION__, ioc->ibase, ioc->imask);
/*
** FIXME: Hint registers are programmed with default hint
** values during boot, so hints should be sane even if we
** can't reprogram them the way drivers want.
*/
WRITE_REG(ioc->imask, ioc->ioc_hpa+IOC_IMASK);
/*
** Setting the upper bits makes checking for bypass addresses
** a little faster later on.
*/
ioc->imask |= 0xFFFFFFFF00000000UL;
/* Set I/O PDIR Page size to system page size */
switch (PAGE_SHIFT) {
case 12: /* 4K */
tcnfg = 0;
break;
case 13: /* 8K */
tcnfg = 1;
break;
case 14: /* 16K */
tcnfg = 2;
break;
case 16: /* 64K */
tcnfg = 3;
break;
}
WRITE_REG(tcnfg, ioc->ioc_hpa+IOC_TCNFG);
/*
** Program the IOC's ibase and enable IOVA translation
** Bit zero == enable bit.
*/
WRITE_REG(ioc->ibase | 1, ioc->ioc_hpa+IOC_IBASE);
/*
** Clear I/O TLB of any possible entries.
** (Yes. This is a bit paranoid...but so what)
*/
WRITE_REG(0 | 31, ioc->ioc_hpa+IOC_PCOM);
/*
** If an AGP device is present, only use half of the IOV space
** for PCI DMA. Unfortunately we can't know ahead of time
** whether GART support will actually be used, for now we
** can just key on an AGP device found in the system.
** We program the next pdir index after we stop w/ a key for
** the GART code to handshake on.
*/
if (SBA_GET_AGP(sba_dev)) {
DBG_INIT("%s() AGP Device found, reserving 512MB for GART support\n", __FUNCTION__);
ioc->pdir_size /= 2;
((u64 *)pdir_base)[PDIR_INDEX(iova_space_size/2)] = 0x0000badbadc0ffeeULL;
}
DBG_INIT("%s() DONE\n", __FUNCTION__);
}
/**************************************************************************
**
** SBA initialization code (HW and SW)
**
** o identify SBA chip itself
** o FIXME: initialize DMA hints for reasonable defaults
**
**************************************************************************/
static void
sba_hw_init(struct sba_device *sba_dev)
{
int i;
int num_ioc;
u64 dma_mask;
u32 func_id;
/*
** Identify the SBA so we can set the dma_mask. We can make a virtual
** dma_mask of the memory subsystem such that devices not implmenting
** a full 64bit mask might still be able to bypass efficiently.
*/
func_id = READ_REG(sba_dev->sba_hpa + SBA_FUNC_ID);
if (func_id == ZX1_FUNC_ID_VALUE) {
dma_mask = 0xFFFFFFFFFFUL;
} else {
dma_mask = 0xFFFFFFFFFFFFFFFFUL;
}
DBG_INIT("%s(): ioc->dma_mask == 0x%lx\n", __FUNCTION__, dma_mask);
/*
** Leaving in the multiple ioc code from parisc for the future,
** currently there are no muli-ioc mckinley sbas
*/
sba_dev->ioc[0].ioc_hpa = SBA_IOC_OFFSET;
num_ioc = 1;
sba_dev->num_ioc = num_ioc;
for (i = 0; i < num_ioc; i++) {
sba_dev->ioc[i].dma_mask = dma_mask;
sba_dev->ioc[i].ioc_hpa += sba_dev->sba_hpa;
sba_ioc_init(sba_dev, &(sba_dev->ioc[i]), i);
}
}
static void
sba_common_init(struct sba_device *sba_dev)
{
int i;
/* add this one to the head of the list (order doesn't matter)
** This will be useful for debugging - especially if we get coredumps
*/
sba_dev->next = sba_list;
sba_list = sba_dev;
sba_count++;
for(i=0; i< sba_dev->num_ioc; i++) {
int res_size;
/* resource map size dictated by pdir_size */
res_size = sba_dev->ioc[i].pdir_size/sizeof(u64); /* entries */
res_size >>= 3; /* convert bit count to byte count */
DBG_INIT("%s() res_size 0x%x\n",
__FUNCTION__, res_size);
sba_dev->ioc[i].res_size = res_size;
sba_dev->ioc[i].res_map = (char *) __get_free_pages(GFP_KERNEL, get_order(res_size));
if (NULL == sba_dev->ioc[i].res_map)
{
panic(__FILE__ ":%s() could not allocate resource map\n", __FUNCTION__ );
}
memset(sba_dev->ioc[i].res_map, 0, res_size);
/* next available IOVP - circular search */
if ((sba_dev->hw_rev & 0xFF) >= 0x20) {
sba_dev->ioc[i].res_hint = (unsigned long *)
sba_dev->ioc[i].res_map;
} else {
u64 reserved_iov;
/* Yet another 1.x hack */
printk("zx1 1.x: Starting resource hint offset into IOV space to avoid initial zero value IOVA\n");
sba_dev->ioc[i].res_hint = (unsigned long *)
&(sba_dev->ioc[i].res_map[L1_CACHE_BYTES]);
sba_dev->ioc[i].res_map[0] = 0x1;
sba_dev->ioc[i].pdir_base[0] = 0x8000badbadc0ffeeULL;
for (reserved_iov = 0xA0000 ; reserved_iov < 0xC0000 ; reserved_iov += IOVP_SIZE) {
u64 *res_ptr = sba_dev->ioc[i].res_map;
int index = PDIR_INDEX(reserved_iov);
int res_word;
u64 mask;
res_word = (int)(index / BITS_PER_LONG);
mask = 0x1UL << (index - (res_word * BITS_PER_LONG));
res_ptr[res_word] |= mask;
sba_dev->ioc[i].pdir_base[PDIR_INDEX(reserved_iov)] = (0x80000000000000FFULL | reserved_iov);
}
}
#ifdef ASSERT_PDIR_SANITY
/* Mark first bit busy - ie no IOVA 0 */
sba_dev->ioc[i].res_map[0] = 0x1;
sba_dev->ioc[i].pdir_base[0] = 0x8000badbadc0ffeeULL;
#endif
DBG_INIT("%s() %d res_map %x %p\n", __FUNCTION__,
i, res_size, (void *)sba_dev->ioc[i].res_map);
}
sba_dev->sba_lock = SPIN_LOCK_UNLOCKED;
}
#ifdef CONFIG_PROC_FS
static int sba_proc_info(char *buf, char **start, off_t offset, int len)
{
struct sba_device *sba_dev = sba_list;
struct ioc *ioc = &sba_dev->ioc[0]; /* FIXME: Multi-IOC support! */
int total_pages = (int) (ioc->res_size << 3); /* 8 bits per byte */
unsigned long i = 0, avg = 0, min, max;
sprintf(buf, "%s rev %d.%d\n",
"Hewlett Packard zx1 SBA",
((sba_dev->hw_rev >> 4) & 0xF),
(sba_dev->hw_rev & 0xF)
);
sprintf(buf, "%sIO PDIR size : %d bytes (%d entries)\n",
buf,
(int) ((ioc->res_size << 3) * sizeof(u64)), /* 8 bits/byte */
total_pages);
sprintf(buf, "%sIO PDIR entries : %ld free %ld used (%d%%)\n", buf,
total_pages - ioc->used_pages, ioc->used_pages,
(int) (ioc->used_pages * 100 / total_pages));
sprintf(buf, "%sResource bitmap : %d bytes (%d pages)\n",
buf, ioc->res_size, ioc->res_size << 3); /* 8 bits per byte */
min = max = ioc->avg_search[0];
for (i = 0; i < SBA_SEARCH_SAMPLE; i++) {
avg += ioc->avg_search[i];
if (ioc->avg_search[i] > max) max = ioc->avg_search[i];
if (ioc->avg_search[i] < min) min = ioc->avg_search[i];
}
avg /= SBA_SEARCH_SAMPLE;
sprintf(buf, "%s Bitmap search : %ld/%ld/%ld (min/avg/max CPU Cycles)\n",
buf, min, avg, max);
sprintf(buf, "%spci_map_single(): %12ld calls %12ld pages (avg %d/1000)\n",
buf, ioc->msingle_calls, ioc->msingle_pages,
(int) ((ioc->msingle_pages * 1000)/ioc->msingle_calls));
#ifdef ALLOW_IOV_BYPASS
sprintf(buf, "%spci_map_single(): %12ld bypasses\n",
buf, ioc->msingle_bypass);
#endif
sprintf(buf, "%spci_unmap_single: %12ld calls %12ld pages (avg %d/1000)\n",
buf, ioc->usingle_calls, ioc->usingle_pages,
(int) ((ioc->usingle_pages * 1000)/ioc->usingle_calls));
#ifdef ALLOW_IOV_BYPASS
sprintf(buf, "%spci_unmap_single: %12ld bypasses\n",
buf, ioc->usingle_bypass);
#endif
sprintf(buf, "%spci_map_sg() : %12ld calls %12ld pages (avg %d/1000)\n",
buf, ioc->msg_calls, ioc->msg_pages,
(int) ((ioc->msg_pages * 1000)/ioc->msg_calls));
#ifdef ALLOW_IOV_BYPASS
sprintf(buf, "%spci_map_sg() : %12ld bypasses\n",
buf, ioc->msg_bypass);
#endif
sprintf(buf, "%spci_unmap_sg() : %12ld calls %12ld pages (avg %d/1000)\n",
buf, ioc->usg_calls, ioc->usg_pages,
(int) ((ioc->usg_pages * 1000)/ioc->usg_calls));
return strlen(buf);
}
static int
sba_resource_map(char *buf, char **start, off_t offset, int len)
{
struct ioc *ioc = sba_list->ioc; /* FIXME: Multi-IOC support! */
unsigned int *res_ptr = (unsigned int *)ioc->res_map;
int i;
buf[0] = '\0';
for(i = 0; i < (ioc->res_size / sizeof(unsigned int)); ++i, ++res_ptr) {
if ((i & 7) == 0)
strcat(buf,"\n ");
sprintf(buf, "%s %08x", buf, *res_ptr);
}
strcat(buf, "\n");
return strlen(buf);
}
#endif
/*
** Determine if sba should claim this chip (return 0) or not (return 1).
** If so, initialize the chip and tell other partners in crime they
** have work to do.
*/
void __init sba_init(void)
{
struct sba_device *sba_dev;
u32 func_id, hw_rev;
u32 *func_offset = NULL;
int i, agp_found = 0;
static char sba_rev[6];
struct pci_dev *device = NULL;
u64 hpa = 0;
if (!(device = pci_find_device(PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_ZX1_SBA, NULL)))
return;
for (i = 0; i < PCI_NUM_RESOURCES; i++) {
if (pci_resource_flags(device, i) == IORESOURCE_MEM) {
hpa = ioremap(pci_resource_start(device, i),
pci_resource_len(device, i));
break;
}
}
func_id = READ_REG(hpa + SBA_FUNC_ID);
if (func_id == ZX1_FUNC_ID_VALUE) {
(void)strcpy(sba_rev, "zx1");
func_offset = zx1_func_offsets;
} else {
return;
}
/* Read HW Rev First */
hw_rev = READ_REG(hpa + SBA_FCLASS) & 0xFFUL;
/*
* Not all revision registers of the chipset are updated on every
* turn. Must scan through all functions looking for the highest rev
*/
if (func_offset) {
for (i = 0 ; func_offset[i] != -1 ; i++) {
u32 func_rev;
func_rev = READ_REG(hpa + SBA_FCLASS + func_offset[i]) & 0xFFUL;
DBG_INIT("%s() func offset: 0x%x rev: 0x%x\n",
__FUNCTION__, func_offset[i], func_rev);
if (func_rev > hw_rev)
hw_rev = func_rev;
}
}
printk(KERN_INFO "%s found %s %d.%d at %s, HPA 0x%lx\n", DRIVER_NAME,
sba_rev, ((hw_rev >> 4) & 0xF), (hw_rev & 0xF),
device->slot_name, hpa);
if ((hw_rev & 0xFF) < 0x20) {
printk(KERN_INFO "%s WARNING rev 2.0 or greater will be required for IO MMU support in the future\n", DRIVER_NAME);
#ifndef CONFIG_IA64_HP_PROTO
panic("%s: CONFIG_IA64_HP_PROTO MUST be enabled to support SBA rev less than 2.0", DRIVER_NAME);
#endif
}
sba_dev = kmalloc(sizeof(struct sba_device), GFP_KERNEL);
if (NULL == sba_dev) {
printk(KERN_ERR DRIVER_NAME " - couldn't alloc sba_device\n");
return;
}
memset(sba_dev, 0, sizeof(struct sba_device));
for(i=0; i<MAX_IOC; i++)
spin_lock_init(&(sba_dev->ioc[i].res_lock));
sba_dev->hw_rev = hw_rev;
sba_dev->sba_hpa = hpa;
/* /*
* We need to check for an AGP device, if we find one, then only * We need to check for an AGP device, if we find one, then only
......
...@@ -42,7 +42,7 @@ struct fake_pci_dev { ...@@ -42,7 +42,7 @@ struct fake_pci_dev {
static struct fake_pci_dev *fake_pci_head, **fake_pci_tail = &fake_pci_head; static struct fake_pci_dev *fake_pci_head, **fake_pci_tail = &fake_pci_head;
static struct pci_ops orig_pci_ops; static struct pci_ops *orig_pci_ops;
static inline struct fake_pci_dev * static inline struct fake_pci_dev *
fake_pci_find_slot(unsigned char bus, unsigned int devfn) fake_pci_find_slot(unsigned char bus, unsigned int devfn)
...@@ -77,7 +77,7 @@ static int hp_cfg_read##sz (struct pci_dev *dev, int where, u##bits *value) \ ...@@ -77,7 +77,7 @@ static int hp_cfg_read##sz (struct pci_dev *dev, int where, u##bits *value) \
{ \ { \
struct fake_pci_dev *fake_dev; \ struct fake_pci_dev *fake_dev; \
if (!(fake_dev = fake_pci_find_slot(dev->bus->number, dev->devfn))) \ if (!(fake_dev = fake_pci_find_slot(dev->bus->number, dev->devfn))) \
return orig_pci_ops.name(dev, where, value); \ return orig_pci_ops->name(dev, where, value); \
\ \
switch (where) { \ switch (where) { \
case PCI_COMMAND: \ case PCI_COMMAND: \
...@@ -105,7 +105,7 @@ static int hp_cfg_write##sz (struct pci_dev *dev, int where, u##bits value) \ ...@@ -105,7 +105,7 @@ static int hp_cfg_write##sz (struct pci_dev *dev, int where, u##bits value) \
{ \ { \
struct fake_pci_dev *fake_dev; \ struct fake_pci_dev *fake_dev; \
if (!(fake_dev = fake_pci_find_slot(dev->bus->number, dev->devfn))) \ if (!(fake_dev = fake_pci_find_slot(dev->bus->number, dev->devfn))) \
return orig_pci_ops.name(dev, where, value); \ return orig_pci_ops->name(dev, where, value); \
\ \
switch (where) { \ switch (where) { \
case PCI_BASE_ADDRESS_0: \ case PCI_BASE_ADDRESS_0: \
...@@ -295,7 +295,7 @@ hpzx1_lba_probe(acpi_handle obj, u32 depth, void *context, void **ret) ...@@ -295,7 +295,7 @@ hpzx1_lba_probe(acpi_handle obj, u32 depth, void *context, void **ret)
if (status != AE_OK) if (status != AE_OK)
return status; return status;
status = acpi_cf_evaluate_method(obj, METHOD_NAME__BBN, &busnum); status = acpi_evaluate_integer(obj, METHOD_NAME__BBN, NULL, &busnum);
if (ACPI_FAILURE(status)) { if (ACPI_FAILURE(status)) {
printk(KERN_ERR PFX "evaluate _BBN fail=0x%x\n", status); printk(KERN_ERR PFX "evaluate _BBN fail=0x%x\n", status);
busnum = 0; // no _BBN; stick it on bus 0 busnum = 0; // no _BBN; stick it on bus 0
...@@ -313,7 +313,7 @@ hpzx1_lba_probe(acpi_handle obj, u32 depth, void *context, void **ret) ...@@ -313,7 +313,7 @@ hpzx1_lba_probe(acpi_handle obj, u32 depth, void *context, void **ret)
static void static void
hpzx1_acpi_dev_init(void) hpzx1_acpi_dev_init(void)
{ {
extern struct pci_ops pci_conf; extern struct pci_ops *pci_root_ops;
/* /*
* Make fake PCI devices for the following hardware in the * Make fake PCI devices for the following hardware in the
...@@ -383,8 +383,8 @@ hpzx1_acpi_dev_init(void) ...@@ -383,8 +383,8 @@ hpzx1_acpi_dev_init(void)
/* /*
* Replace PCI ops, but only if we made fake devices. * Replace PCI ops, but only if we made fake devices.
*/ */
orig_pci_ops = pci_conf; orig_pci_ops = pci_root_ops;
pci_conf = hp_pci_conf; pci_root_ops = &hp_pci_conf;
} }
extern void sba_init(void); extern void sba_init(void);
......
...@@ -56,18 +56,43 @@ asm (".weak iosapic_version"); ...@@ -56,18 +56,43 @@ asm (".weak iosapic_version");
void (*pm_idle) (void); void (*pm_idle) (void);
void (*pm_power_off) (void); void (*pm_power_off) (void);
/*
* TBD: Should go away once we have an ACPI parser.
*/
const char * const char *
acpi_get_sysname (void) acpi_get_sysname (void)
{ {
#ifdef CONFIG_IA64_GENERIC #ifdef CONFIG_IA64_GENERIC
return "hpsim"; unsigned long rsdp_phys = 0;
struct acpi20_table_rsdp *rsdp;
struct acpi_table_xsdt *xsdt;
struct acpi_table_header *hdr;
if ((0 != acpi_find_rsdp(&rsdp_phys)) || !rsdp_phys) {
printk("ACPI 2.0 RSDP not found, default to \"dig\"\n");
return "dig";
}
rsdp = (struct acpi20_table_rsdp *) __va(rsdp_phys);
if (strncmp(rsdp->signature, RSDP_SIG, sizeof(RSDP_SIG) - 1)) {
printk("ACPI 2.0 RSDP signature incorrect, default to \"dig\"\n");
return "dig";
}
xsdt = (struct acpi_table_xsdt *) __va(rsdp->xsdt_address);
hdr = &xsdt->header;
if (strncmp(hdr->signature, XSDT_SIG, sizeof(XSDT_SIG) - 1)) {
printk("ACPI 2.0 XSDT signature incorrect, default to \"dig\"\n");
return "dig";
}
if (!strcmp(hdr->oem_id, "HP")) {
return "hpzx1";
}
return "dig";
#else #else
# if defined (CONFIG_IA64_HP_SIM) # if defined (CONFIG_IA64_HP_SIM)
return "hpsim"; return "hpsim";
# elif defined (CONFIG_IA64_HP_ZX1)
return "hpzx1";
# elif defined (CONFIG_IA64_SGI_SN1) # elif defined (CONFIG_IA64_SGI_SN1)
return "sn1"; return "sn1";
# elif defined (CONFIG_IA64_SGI_SN2) # elif defined (CONFIG_IA64_SGI_SN2)
...@@ -80,6 +105,69 @@ acpi_get_sysname (void) ...@@ -80,6 +105,69 @@ acpi_get_sysname (void)
#endif #endif
} }
#ifdef CONFIG_ACPI
/**
* acpi_get_crs - Return the current resource settings for a device
* obj: A handle for this device
* buf: A buffer to be populated by this call.
*
* Pass a valid handle, typically obtained by walking the namespace and a
* pointer to an allocated buffer, and this function will fill in the buffer
* with a list of acpi_resource structures.
*/
acpi_status
acpi_get_crs (acpi_handle obj, acpi_buffer *buf)
{
acpi_status result;
buf->length = 0;
buf->pointer = NULL;
result = acpi_get_current_resources(obj, buf);
if (result != AE_BUFFER_OVERFLOW)
return result;
buf->pointer = kmalloc(buf->length, GFP_KERNEL);
if (!buf->pointer)
return -ENOMEM;
result = acpi_get_current_resources(obj, buf);
return result;
}
acpi_resource *
acpi_get_crs_next (acpi_buffer *buf, int *offset)
{
acpi_resource *res;
if (*offset >= buf->length)
return NULL;
res = buf->pointer + *offset;
*offset += res->length;
return res;
}
acpi_resource_data *
acpi_get_crs_type (acpi_buffer *buf, int *offset, int type)
{
for (;;) {
acpi_resource *res = acpi_get_crs_next(buf, offset);
if (!res)
return NULL;
if (res->id == type)
return &res->data;
}
}
void
acpi_dispose_crs (acpi_buffer *buf)
{
kfree(buf->pointer);
}
#endif /* CONFIG_ACPI */
#ifdef CONFIG_ACPI_BOOT #ifdef CONFIG_ACPI_BOOT
#define ACPI_MAX_PLATFORM_IRQS 256 #define ACPI_MAX_PLATFORM_IRQS 256
......
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
# define UNAT_OFF IA64_SIGCONTEXT_AR_UNAT_OFFSET # define UNAT_OFF IA64_SIGCONTEXT_AR_UNAT_OFFSET
# define FPSR_OFF IA64_SIGCONTEXT_AR_FPSR_OFFSET # define FPSR_OFF IA64_SIGCONTEXT_AR_FPSR_OFFSET
# define PR_OFF IA64_SIGCONTEXT_PR_OFFSET # define PR_OFF IA64_SIGCONTEXT_PR_OFFSET
# define RP_OFF IA64_SIGCONTEXT_B0_OFFSET # define RP_OFF IA64_SIGCONTEXT_IP_OFFSET
# define SP_OFF IA64_SIGCONTEXT_R12_OFFSET # define SP_OFF IA64_SIGCONTEXT_R12_OFFSET
# define RBS_BASE_OFF IA64_SIGCONTEXT_RBS_BASE_OFFSET # define RBS_BASE_OFF IA64_SIGCONTEXT_RBS_BASE_OFFSET
# define LOADRS_OFF IA64_SIGCONTEXT_LOADRS_OFFSET # define LOADRS_OFF IA64_SIGCONTEXT_LOADRS_OFFSET
......
...@@ -6,11 +6,7 @@ ...@@ -6,11 +6,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/string.h> #include <linux/string.h>
#undef memset
extern void *memset (void *, int, size_t);
EXPORT_SYMBOL_NOVERS(memset); /* gcc generates direct calls to memset()... */ EXPORT_SYMBOL_NOVERS(memset); /* gcc generates direct calls to memset()... */
EXPORT_SYMBOL_NOVERS(__memset_generic);
EXPORT_SYMBOL_NOVERS(__bzero);
EXPORT_SYMBOL(memchr); EXPORT_SYMBOL(memchr);
EXPORT_SYMBOL(memcmp); EXPORT_SYMBOL(memcmp);
EXPORT_SYMBOL_NOVERS(memcpy); EXPORT_SYMBOL_NOVERS(memcpy);
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
* iosapic_set_affinity(), initializations for * iosapic_set_affinity(), initializations for
* /proc/irq/#/smp_affinity * /proc/irq/#/smp_affinity
* 02/04/02 P. Diefenbaugh Cleaned up ACPI PCI IRQ routing. * 02/04/02 P. Diefenbaugh Cleaned up ACPI PCI IRQ routing.
* 02/04/18 J.I. Lee bug fix in iosapic_init_pci_irq
*/ */
/* /*
* Here is what the interrupt logic between a PCI device and the CPU looks like: * Here is what the interrupt logic between a PCI device and the CPU looks like:
...@@ -70,7 +71,7 @@ ...@@ -70,7 +71,7 @@
#undef DEBUG_IRQ_ROUTING #undef DEBUG_IRQ_ROUTING
#undef OVERRIDE_DEBUG #undef OVERRIDE_DEBUG
static spinlock_t iosapic_lock = SPIN_LOCK_UNLOCKED; static spinlock_t iosapic_lock = SPIN_LOCK_UNLOCKED;
...@@ -676,6 +677,11 @@ iosapic_init_pci_irq (void) ...@@ -676,6 +677,11 @@ iosapic_init_pci_irq (void)
pci_irq.route[i].bus, pci_irq.route[i].pci_id>>16, pci_irq.route[i].pin, pci_irq.route[i].bus, pci_irq.route[i].pci_id>>16, pci_irq.route[i].pin,
iosapic_irq[vector].base_irq + iosapic_irq[vector].pin, vector); iosapic_irq[vector].base_irq + iosapic_irq[vector].pin, vector);
#endif #endif
/*
* Forget not to program the IOSAPIC RTE per ACPI _PRT
*/
set_rte(vector, (ia64_get_lid() >> 16) & 0xffff);
} }
} }
......
...@@ -330,12 +330,15 @@ ENTRY(alt_dtlb_miss) ...@@ -330,12 +330,15 @@ ENTRY(alt_dtlb_miss)
(p8) br.cond.dptk dtlb_fault (p8) br.cond.dptk dtlb_fault
#endif #endif
extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl
and r22=IA64_ISR_CODE_MASK,r20 // get the isr.code field
tbit.nz p6,p7=r20,IA64_ISR_SP_BIT // is speculation bit on? tbit.nz p6,p7=r20,IA64_ISR_SP_BIT // is speculation bit on?
and r19=r19,r16 // clear ed, reserved bits, and PTE control bits shr.u r18=r16,57 // move address bit 61 to bit 4
shr.u r18=r16,57 // move address bit 61 to bit 4 and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on?
;; ;;
andcm r18=0x10,r18 // bit 4=~address-bit(61) andcm r18=0x10,r18 // bit 4=~address-bit(61)
cmp.ne p8,p0=r0,r23 cmp.ne p8,p0=r0,r23
(p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field
(p8) br.cond.spnt page_fault (p8) br.cond.spnt page_fault
dep r21=-1,r21,IA64_PSR_ED_BIT,1 dep r21=-1,r21,IA64_PSR_ED_BIT,1
......
...@@ -395,7 +395,7 @@ show_cpuinfo (struct seq_file *m, void *v) ...@@ -395,7 +395,7 @@ show_cpuinfo (struct seq_file *m, void *v)
switch (c->family) { switch (c->family) {
case 0x07: memcpy(family, "Itanium", 8); break; case 0x07: memcpy(family, "Itanium", 8); break;
case 0x1f: memcpy(family, "McKinley", 9); break; case 0x1f: memcpy(family, "Itanium 2", 9); break;
default: sprintf(family, "%u", c->family); break; default: sprintf(family, "%u", c->family); break;
} }
......
...@@ -559,7 +559,7 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall) ...@@ -559,7 +559,7 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
continue; continue;
switch (signr) { switch (signr) {
case SIGCONT: case SIGCHLD: case SIGWINCH: case SIGCONT: case SIGCHLD: case SIGWINCH: case SIGURG:
continue; continue;
case SIGTSTP: case SIGTTIN: case SIGTTOU: case SIGTSTP: case SIGTTIN: case SIGTTOU:
......
...@@ -186,6 +186,10 @@ ia64_bad_break (unsigned long break_num, struct pt_regs *regs) ...@@ -186,6 +186,10 @@ ia64_bad_break (unsigned long break_num, struct pt_regs *regs)
sig = SIGSEGV; code = __SEGV_PSTKOVF; sig = SIGSEGV; code = __SEGV_PSTKOVF;
break; break;
case 0x3f000 ... 0x3ffff: /* bundle-update in progress */
sig = SIGILL; code = __ILL_BNDMOD;
break;
default: default:
if (break_num < 0x40000 || break_num > 0x100000) if (break_num < 0x40000 || break_num > 0x100000)
die_if_kernel("Bad break", regs, break_num); die_if_kernel("Bad break", regs, break_num);
...@@ -443,30 +447,14 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, ...@@ -443,30 +447,14 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
"Unknown fault 13", "Unknown fault 14", "Unknown fault 15" "Unknown fault 13", "Unknown fault 14", "Unknown fault 15"
}; };
#if 0 if ((isr & IA64_ISR_NA) && ((isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) {
/* this is for minimal trust debugging; yeah this kind of stuff is useful at times... */ /*
* This fault was due to lfetch.fault, set "ed" bit in the psr to cancel
if (vector != 25) { * the lfetch.
static unsigned long last_time; */
static char count; ia64_psr(regs)->ed = 1;
unsigned long n = vector; return;
char buf[32], *cp;
if (jiffies - last_time > 5*HZ)
count = 0;
if (count++ < 5) {
last_time = jiffies;
cp = buf + sizeof(buf);
*--cp = '\0';
while (n) {
*--cp = "0123456789abcdef"[n & 0xf];
n >>= 4;
}
printk("<0x%s>", cp);
}
} }
#endif
switch (vector) { switch (vector) {
case 24: /* General Exception */ case 24: /* General Exception */
......
...@@ -13,7 +13,7 @@ obj-y := __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \ ...@@ -13,7 +13,7 @@ obj-y := __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \
__divdi3.o __udivdi3.o __moddi3.o __umoddi3.o \ __divdi3.o __udivdi3.o __moddi3.o __umoddi3.o \
checksum.o clear_page.o csum_partial_copy.o copy_page.o \ checksum.o clear_page.o csum_partial_copy.o copy_page.o \
copy_user.o clear_user.o strncpy_from_user.o strlen_user.o strnlen_user.o \ copy_user.o clear_user.o strncpy_from_user.o strlen_user.o strnlen_user.o \
flush.o io.o do_csum.o \ flush.o io.o ip_fast_csum.o do_csum.o \
memcpy.o memset.o strlen.o swiotlb.o memcpy.o memset.o strlen.o swiotlb.o
obj-$(CONFIG_ITANIUM) += copy_page.o obj-$(CONFIG_ITANIUM) += copy_page.o
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#include <asm/byteorder.h> #include <asm/byteorder.h>
static inline unsigned short static inline unsigned short
from64to16(unsigned long x) from64to16 (unsigned long x)
{ {
/* add up 32-bit words for 33 bits */ /* add up 32-bit words for 33 bits */
x = (x & 0xffffffff) + (x >> 32); x = (x & 0xffffffff) + (x >> 32);
...@@ -32,22 +32,17 @@ from64to16(unsigned long x) ...@@ -32,22 +32,17 @@ from64to16(unsigned long x)
* computes the checksum of the TCP/UDP pseudo-header * computes the checksum of the TCP/UDP pseudo-header
* returns a 16-bit checksum, already complemented. * returns a 16-bit checksum, already complemented.
*/ */
unsigned short int csum_tcpudp_magic(unsigned long saddr, unsigned short int
unsigned long daddr, csum_tcpudp_magic (unsigned long saddr, unsigned long daddr, unsigned short len,
unsigned short len, unsigned short proto, unsigned int sum)
unsigned short proto,
unsigned int sum)
{ {
return ~from64to16(saddr + daddr + sum + return ~from64to16(saddr + daddr + sum + ((unsigned long) ntohs(len) << 16) +
((unsigned long) ntohs(len) << 16) + ((unsigned long) proto << 8));
((unsigned long) proto << 8));
} }
unsigned int csum_tcpudp_nofold(unsigned long saddr, unsigned int
unsigned long daddr, csum_tcpudp_nofold (unsigned long saddr, unsigned long daddr, unsigned short len,
unsigned short len, unsigned short proto, unsigned int sum)
unsigned short proto,
unsigned int sum)
{ {
unsigned long result; unsigned long result;
...@@ -65,15 +60,6 @@ unsigned int csum_tcpudp_nofold(unsigned long saddr, ...@@ -65,15 +60,6 @@ unsigned int csum_tcpudp_nofold(unsigned long saddr,
extern unsigned long do_csum (const unsigned char *, long); extern unsigned long do_csum (const unsigned char *, long);
/*
* This is a version of ip_compute_csum() optimized for IP headers,
* which always checksum on 4 octet boundaries.
*/
unsigned short ip_fast_csum(unsigned char * iph, unsigned int ihl)
{
return ~do_csum(iph, ihl*4);
}
/* /*
* computes the checksum of a memory block at buff, length len, * computes the checksum of a memory block at buff, length len,
* and adds in "sum" (32-bit) * and adds in "sum" (32-bit)
...@@ -86,7 +72,8 @@ unsigned short ip_fast_csum(unsigned char * iph, unsigned int ihl) ...@@ -86,7 +72,8 @@ unsigned short ip_fast_csum(unsigned char * iph, unsigned int ihl)
* *
* it's best to have buff aligned on a 32-bit boundary * it's best to have buff aligned on a 32-bit boundary
*/ */
unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) unsigned int
csum_partial (const unsigned char * buff, int len, unsigned int sum)
{ {
unsigned long result = do_csum(buff, len); unsigned long result = do_csum(buff, len);
...@@ -102,7 +89,8 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) ...@@ -102,7 +89,8 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
* this routine is used for miscellaneous IP-like checksums, mainly * this routine is used for miscellaneous IP-like checksums, mainly
* in icmp.c * in icmp.c
*/ */
unsigned short ip_compute_csum(unsigned char * buff, int len) unsigned short
ip_compute_csum (unsigned char * buff, int len)
{ {
return ~do_csum(buff,len); return ~do_csum(buff,len);
} }
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#define tgt2 r23 #define tgt2 r23
#define srcf r24 #define srcf r24
#define tgtf r25 #define tgtf r25
#define tgt_last r26
#define Nrot ((8*PIPE_DEPTH+7)&~7) #define Nrot ((8*PIPE_DEPTH+7)&~7)
...@@ -55,18 +56,21 @@ GLOBAL_ENTRY(copy_page) ...@@ -55,18 +56,21 @@ GLOBAL_ENTRY(copy_page)
mov src1=in1 mov src1=in1
adds src2=8,in1 adds src2=8,in1
mov tgt_last = PAGE_SIZE
;; ;;
adds tgt2=8,in0 adds tgt2=8,in0
add srcf=512,in1 add srcf=512,in1
mov ar.lc=lcount mov ar.lc=lcount
mov tgt1=in0 mov tgt1=in0
add tgtf=512,in0 add tgtf=512,in0
add tgt_last = tgt_last, in0
;; ;;
1: 1:
(p[0]) ld8 t1[0]=[src1],16 (p[0]) ld8 t1[0]=[src1],16
(EPI) st8 [tgt1]=t1[PIPE_DEPTH-1],16 (EPI) st8 [tgt1]=t1[PIPE_DEPTH-1],16
(p[0]) ld8 t2[0]=[src2],16 (p[0]) ld8 t2[0]=[src2],16
(EPI) st8 [tgt2]=t2[PIPE_DEPTH-1],16 (EPI) st8 [tgt2]=t2[PIPE_DEPTH-1],16
cmp.ltu p6,p0 = tgtf, tgt_last
;; ;;
(p[0]) ld8 t3[0]=[src1],16 (p[0]) ld8 t3[0]=[src1],16
(EPI) st8 [tgt1]=t3[PIPE_DEPTH-1],16 (EPI) st8 [tgt1]=t3[PIPE_DEPTH-1],16
...@@ -83,8 +87,8 @@ GLOBAL_ENTRY(copy_page) ...@@ -83,8 +87,8 @@ GLOBAL_ENTRY(copy_page)
(p[0]) ld8 t8[0]=[src2],16 (p[0]) ld8 t8[0]=[src2],16
(EPI) st8 [tgt2]=t8[PIPE_DEPTH-1],16 (EPI) st8 [tgt2]=t8[PIPE_DEPTH-1],16
lfetch [srcf], 64 (p6) lfetch [srcf], 64
lfetch [tgtf], 64 (p6) lfetch [tgtf], 64
br.ctop.sptk.few 1b br.ctop.sptk.few 1b
;; ;;
mov pr=saved_pr,0xffffffffffff0000 // restore predicates mov pr=saved_pr,0xffffffffffff0000 // restore predicates
......
...@@ -11,6 +11,9 @@ ...@@ -11,6 +11,9 @@
* Copyright (C) 1999, 2001-2002 Hewlett-Packard Co * Copyright (C) 1999, 2001-2002 Hewlett-Packard Co
* Stephane Eranian <eranian@hpl.hp.com> * Stephane Eranian <eranian@hpl.hp.com>
* *
* 02/04/22 Ken Chen <kenneth.w.chen@intel.com>
* Data locality study on the checksum buffer.
* More optimization cleanup - remove excessive stop bits.
* 02/04/08 David Mosberger <davidm@hpl.hp.com> * 02/04/08 David Mosberger <davidm@hpl.hp.com>
* More cleanup and tuning. * More cleanup and tuning.
* 01/04/18 Jun Nakajima <jun.nakajima@intel.com> * 01/04/18 Jun Nakajima <jun.nakajima@intel.com>
...@@ -80,6 +83,12 @@ ...@@ -80,6 +83,12 @@
// type of packet or alignment we get. Like the ip_fast_csum() routine // type of packet or alignment we get. Like the ip_fast_csum() routine
// where we know we have at least 20bytes worth of data to checksum. // where we know we have at least 20bytes worth of data to checksum.
// - Do a better job of handling small packets. // - Do a better job of handling small packets.
// - Note on prefetching: it was found that under various load, i.e. ftp read/write,
// nfs read/write, the L1 cache hit rate is at 60% and L2 cache hit rate is at 99.8%
// on the data that buffer points to (partly because the checksum is often preceded by
// a copy_from_user()). This finding indiate that lfetch will not be beneficial since
// the data is already in the cache.
//
#define saved_pfs r11 #define saved_pfs r11
#define hmask r16 #define hmask r16
...@@ -117,7 +126,7 @@ ...@@ -117,7 +126,7 @@
GLOBAL_ENTRY(do_csum) GLOBAL_ENTRY(do_csum)
.prologue .prologue
.save ar.pfs, saved_pfs .save ar.pfs, saved_pfs
alloc saved_pfs=ar.pfs,2,16,1,16 alloc saved_pfs=ar.pfs,2,16,0,16
.rotr word1[4], word2[4],result1[LOAD_LATENCY+2],result2[LOAD_LATENCY+2] .rotr word1[4], word2[4],result1[LOAD_LATENCY+2],result2[LOAD_LATENCY+2]
.rotp p[PIPE_DEPTH], pC1[2], pC2[2] .rotp p[PIPE_DEPTH], pC1[2], pC2[2]
mov ret0=r0 // in case we have zero length mov ret0=r0 // in case we have zero length
...@@ -197,22 +206,21 @@ GLOBAL_ENTRY(do_csum) ...@@ -197,22 +206,21 @@ GLOBAL_ENTRY(do_csum)
// Calculate the checksum loading two 8-byte words per loop. // Calculate the checksum loading two 8-byte words per loop.
// //
.do_csum16: .do_csum16:
mov saved_lc=ar.lc
shr.u count=count,1 // we do 16 bytes per loop shr.u count=count,1 // we do 16 bytes per loop
brp.loop.imp 1f,2f
;; ;;
cmp.eq p9,p10=r0,count // if (count == 0) cmp.eq p9,p10=r0,count // if (count == 0)
adds count=-1,count adds count=-1,count
brp.loop.imp 1f,2f
;;
mov ar.ec=PIPE_DEPTH mov ar.ec=PIPE_DEPTH
mov ar.lc=count // set lc
// result1[0] must be initialized in advance.
mov result2[0]=r0
mov pr.rot=1<<16
mov carry1=r0 mov carry1=r0
mov carry2=r0 mov carry2=r0
add first2=8,first1 add first2=8,first1
;;
mov ar.lc=count // set lc
mov pr.rot=1<<16
// result1[0] must be initialized in advance.
mov result2[0]=r0
(p9) br.cond.sptk .do_csum_exit (p9) br.cond.sptk .do_csum_exit
;; ;;
.align 32 .align 32
...@@ -223,7 +231,7 @@ GLOBAL_ENTRY(do_csum) ...@@ -223,7 +231,7 @@ GLOBAL_ENTRY(do_csum)
(pC2[1])adds carry2=1,carry2 (pC2[1])adds carry2=1,carry2
(ELD) add result1[LOAD_LATENCY-1]=result1[LOAD_LATENCY],word1[LOAD_LATENCY] (ELD) add result1[LOAD_LATENCY-1]=result1[LOAD_LATENCY],word1[LOAD_LATENCY]
(ELD) add result2[LOAD_LATENCY-1]=result2[LOAD_LATENCY],word2[LOAD_LATENCY] (ELD) add result2[LOAD_LATENCY-1]=result2[LOAD_LATENCY],word2[LOAD_LATENCY]
[2:] 2:
(p[0]) ld8 word1[0]=[first1],16 (p[0]) ld8 word1[0]=[first1],16
(p[0]) ld8 word2[0]=[first2],16 (p[0]) ld8 word2[0]=[first2],16
br.ctop.sptk 1b br.ctop.sptk 1b
......
/*
* Optmized version of the ip_fast_csum() function
* Used for calculating IP header checksum
*
* Return: 16bit checksum, complemented
*
* Inputs:
* in0: address of buffer to checksum (char *)
* in1: length of the buffer (int)
*
* Copyright (C) 2002 Intel Corp.
* Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com>
*/
#include <asm/asmmacro.h>
/*
* Since we know that most likely this function is called with buf aligned
* on 4-byte boundary and 20 bytes in length, we can execution rather quickly
* versus calling generic version of do_csum, which has lots of overhead in
* handling various alignments and sizes. However, due to lack of constrains
* put on the function input argument, cases with alignment not on 4-byte or
* size not equal to 20 bytes will be handled by the generic do_csum function.
*/
#define in0 r32
#define in1 r33
#define ret0 r8
GLOBAL_ENTRY(ip_fast_csum)
.prologue
.body
cmp.ne p6,p7=5,in1 // size other than 20 byte?
and r14=3,in0 // is it aligned on 4-byte?
add r15=4,in0 // second source pointer
;;
cmp.ne.or.andcm p6,p7=r14,r0
;;
(p7) ld4 r20=[in0],8
(p7) ld4 r21=[r15],8
(p6) br.spnt .generic
;;
ld4 r22=[in0],8
ld4 r23=[r15],8
;;
ld4 r24=[in0]
add r20=r20,r21
add r22=r22,r23
;;
add r20=r20,r22
;;
add r20=r20,r24
;;
shr.u ret0=r20,16 // now need to add the carry
zxt2 r20=r20
;;
add r20=ret0,r20
;;
shr.u ret0=r20,16 // add carry again
zxt2 r20=r20
;;
add r20=ret0,r20
;;
shr.u ret0=r20,16
zxt2 r20=r20
;;
add r20=ret0,r20
;;
andcm ret0=-1,r20
.restore sp // reset frame state
br.ret.sptk.many b0
;;
.generic:
.prologue
.save ar.pfs, r35
alloc r35=ar.pfs,2,2,2,0
.save rp, r34
mov r34=b0
.body
dep.z out1=in1,2,30
mov out0=in0
;;
br.call.sptk.many b0=do_csum
;;
andcm ret0=-1,ret0
mov ar.pfs=r35
mov b0=r34
br.ret.sptk.many b0
END(ip_fast_csum)
/* /* Optimized version of the standard memset() function.
*
* Optimized version of the standard memset() function Copyright (c) 2002 Hewlett-Packard Co/CERN
* Sverre Jarp <Sverre.Jarp@cern.ch>
* Return: none
* Return: dest
* Inputs:
* in0: address of buffer Inputs:
* in1: byte value to use for storing in0: dest
* in2: length of the buffer in1: value
* in2: count
* Copyright (C) 1999, 2001, 2002 Hewlett-Packard Co
* Stephane Eranian <eranian@hpl.hp.com> The algorithm is fairly straightforward: set byte by byte until we
*/ we get to a 16B-aligned address, then loop on 128 B chunks using an
early store as prefetching, then loop on 32B chucks, then clear remaining
words, finally clear remaining bytes.
Since a stf.spill f0 can store 16B in one go, we use this instruction
to get peak speed when value = 0. */
#include <asm/asmmacro.h> #include <asm/asmmacro.h>
#undef ret
#define dest in0
#define value in1
#define cnt in2
// arguments #define tmp r31
// #define save_lc r30
#define buf r32 #define ptr0 r29
#define val r33 #define ptr1 r28
#define len r34 #define ptr2 r27
#define ptr3 r26
// #define ptr9 r24
// local registers #define loopcnt r23
// #define linecnt r22
#define saved_pfs r14 #define bytecnt r21
#define cnt r18
#define buf2 r19 #define fvalue f6
#define saved_lc r20
#define tmp r21 // This routine uses only scratch predicate registers (p6 - p15)
#define p_scr p6 // default register for same-cycle branches
GLOBAL_ENTRY(__bzero) #define p_nz p7
#define p_zr p8
#define p_unalgn p9
#define p_y p11
#define p_n p12
#define p_yy p13
#define p_nn p14
#define MIN1 15
#define MIN1P1HALF 8
#define LINE_SIZE 128
#define LSIZE_SH 7 // shift amount
#define PREF_AHEAD 8
GLOBAL_ENTRY(memset)
{ .mmi
.prologue .prologue
.save ar.pfs, saved_pfs alloc tmp = ar.pfs, 3, 0, 0, 0
alloc saved_pfs=ar.pfs,0,0,3,0 .body
mov out2=out1 lfetch.nt1 [dest] //
mov out1=0 .save ar.lc, save_lc
/* FALL THROUGH (explicit NOPs so that next alloc is preceded by stop bit!) */ mov.i save_lc = ar.lc
} { .mmi
mov ret0 = dest // return value
cmp.ne p_nz, p_zr = value, r0 // use stf.spill if value is zero
cmp.eq p_scr, p0 = cnt, r0
;; }
{ .mmi
and ptr2 = -(MIN1+1), dest // aligned address
and tmp = MIN1, dest // prepare to check for correct alignment
tbit.nz p_y, p_n = dest, 0 // Do we have an odd address? (M_B_U)
} { .mib
mov ptr1 = dest
mux1 value = value, @brcst // create 8 identical bytes in word
(p_scr) br.ret.dpnt.many rp // return immediately if count = 0
;; }
{ .mib
cmp.ne p_unalgn, p0 = tmp, r0 //
} { .mib
sub bytecnt = (MIN1+1), tmp // NB: # of bytes to move is 1 higher than loopcnt
cmp.gt p_scr, p0 = 16, cnt // is it a minimalistic task?
(p_scr) br.cond.dptk.many .move_bytes_unaligned // go move just a few (M_B_U)
;; }
{ .mmi
(p_unalgn) add ptr1 = (MIN1+1), ptr2 // after alignment
(p_unalgn) add ptr2 = MIN1P1HALF, ptr2 // after alignment
(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3 // should we do a st8 ?
;; }
{ .mib
(p_y) add cnt = -8, cnt //
(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2 // should we do a st4 ?
} { .mib
(p_y) st8 [ptr2] = value,-4 //
(p_n) add ptr2 = 4, ptr2 //
;; }
{ .mib
(p_yy) add cnt = -4, cnt //
(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1 // should we do a st2 ?
} { .mib
(p_yy) st4 [ptr2] = value,-2 //
(p_nn) add ptr2 = 2, ptr2 //
;; }
{ .mmi
mov tmp = LINE_SIZE+1 // for compare
(p_y) add cnt = -2, cnt //
(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0 // should we do a st1 ?
} { .mmi
setf.sig fvalue=value // transfer value to FLP side
(p_y) st2 [ptr2] = value,-1 //
(p_n) add ptr2 = 1, ptr2 //
;; }
{ .mmi
(p_yy) st1 [ptr2] = value //
cmp.gt p_scr, p0 = tmp, cnt // is it a minimalistic task?
} { .mbb
(p_yy) add cnt = -1, cnt //
(p_scr) br.cond.dpnt.many .fraction_of_line // go move just a few
;; }
{ .mib
nop.m 0 nop.m 0
nop.f 0 shr.u linecnt = cnt, LSIZE_SH
nop.i 0 (p_zr) br.cond.dptk.many .l1b // Jump to use stf.spill
;; ;; }
END(__bzero)
GLOBAL_ENTRY(__memset_generic)
.prologue
.save ar.pfs, saved_pfs
alloc saved_pfs=ar.pfs,3,0,0,0 // cnt is sink here
cmp.eq p8,p0=r0,len // check for zero length
.save ar.lc, saved_lc
mov saved_lc=ar.lc // preserve ar.lc (slow)
;;
.body .align 32 // -------------------------- // L1A: store ahead into cache lines; fill later
{ .mmi
and tmp = -(LINE_SIZE), cnt // compute end of range
mov ptr9 = ptr1 // used for prefetching
and cnt = (LINE_SIZE-1), cnt // remainder
} { .mmi
mov loopcnt = PREF_AHEAD-1 // default prefetch loop
cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value
;; }
{ .mmi
(p_scr) add loopcnt = -1, linecnt //
add ptr2 = 8, ptr1 // start of stores (beyond prefetch stores)
add ptr1 = tmp, ptr1 // first address beyond total range
;; }
{ .mmi
add tmp = -1, linecnt // next loop count
mov.i ar.lc = loopcnt //
;; }
.pref_l1a:
{ .mib
stf8 [ptr9] = fvalue, 128 // Do stores one cache line apart
nop.i 0
br.cloop.dptk.few .pref_l1a
;; }
{ .mmi
add ptr0 = 16, ptr2 // Two stores in parallel
mov.i ar.lc = tmp //
;; }
.l1ax:
{ .mmi
stf8 [ptr2] = fvalue, 8
stf8 [ptr0] = fvalue, 8
;; }
{ .mmi
stf8 [ptr2] = fvalue, 24
stf8 [ptr0] = fvalue, 24
;; }
{ .mmi
stf8 [ptr2] = fvalue, 8
stf8 [ptr0] = fvalue, 8
;; }
{ .mmi
stf8 [ptr2] = fvalue, 24
stf8 [ptr0] = fvalue, 24
;; }
{ .mmi
stf8 [ptr2] = fvalue, 8
stf8 [ptr0] = fvalue, 8
;; }
{ .mmi
stf8 [ptr2] = fvalue, 24
stf8 [ptr0] = fvalue, 24
;; }
{ .mmi
stf8 [ptr2] = fvalue, 8
stf8 [ptr0] = fvalue, 32
cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching?
;; }
{ .mmb
stf8 [ptr2] = fvalue, 24
(p_scr) stf8 [ptr9] = fvalue, 128
br.cloop.dptk.few .l1ax
;; }
{ .mbb
cmp.le p_scr, p0 = 8, cnt // just a few bytes left ?
(p_scr) br.cond.dpnt.many .fraction_of_line // Branch no. 2
br.cond.dpnt.many .move_bytes_from_alignment // Branch no. 3
;; }
.align 32
.l1b: // ------------------------------------ // L1B: store ahead into cache lines; fill later
{ .mmi
and tmp = -(LINE_SIZE), cnt // compute end of range
mov ptr9 = ptr1 // used for prefetching
and cnt = (LINE_SIZE-1), cnt // remainder
} { .mmi
mov loopcnt = PREF_AHEAD-1 // default prefetch loop
cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value
;; }
{ .mmi
(p_scr) add loopcnt = -1, linecnt
add ptr2 = 16, ptr1 // start of stores (beyond prefetch stores)
add ptr1 = tmp, ptr1 // first address beyond total range
;; }
{ .mmi
add tmp = -1, linecnt // next loop count
mov.i ar.lc = loopcnt
;; }
.pref_l1b:
{ .mib
stf.spill [ptr9] = f0, 128 // Do stores one cache line apart
nop.i 0
br.cloop.dptk.few .pref_l1b
;; }
{ .mmi
add ptr0 = 16, ptr2 // Two stores in parallel
mov.i ar.lc = tmp
;; }
.l1bx:
{ .mmi
stf.spill [ptr2] = f0, 32
stf.spill [ptr0] = f0, 32
;; }
{ .mmi
stf.spill [ptr2] = f0, 32
stf.spill [ptr0] = f0, 32
;; }
{ .mmi
stf.spill [ptr2] = f0, 32
stf.spill [ptr0] = f0, 64
cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching?
;; }
{ .mmb
stf.spill [ptr2] = f0, 32
(p_scr) stf.spill [ptr9] = f0, 128
br.cloop.dptk.few .l1bx
;; }
{ .mib
cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ?
(p_scr) br.cond.dpnt.many .move_bytes_from_alignment //
;; }
adds tmp=-1,len // br.ctop is repeat/until .fraction_of_line:
tbit.nz p6,p0=buf,0 // odd alignment { .mib
(p8) br.ret.spnt.many rp add ptr2 = 16, ptr1
shr.u loopcnt = cnt, 5 // loopcnt = cnt / 32
cmp.lt p7,p0=16,len // if len > 16 then long memset ;; }
mux1 val=val,@brcst // prepare value { .mib
(p7) br.cond.dptk .long_memset cmp.eq p_scr, p0 = loopcnt, r0
;; add loopcnt = -1, loopcnt
mov ar.lc=tmp // initialize lc for small count (p_scr) br.cond.dpnt.many .store_words
;; // avoid RAW and WAW on ar.lc ;; }
1: // worst case 15 cyles, avg 8 cycles { .mib
st1 [buf]=val,1 and cnt = 0x1f, cnt // compute the remaining cnt
br.cloop.dptk.few 1b mov.i ar.lc = loopcnt
;; // avoid RAW on ar.lc ;; }
mov ar.lc=saved_lc .align 32
mov ar.pfs=saved_pfs .l2: // ------------------------------------ // L2A: store 32B in 2 cycles
br.ret.sptk.many rp // end of short memset { .mmb
stf8 [ptr1] = fvalue, 8
// at this point we know we have more than 16 bytes to copy stf8 [ptr2] = fvalue, 8
// so we focus on alignment ;; } { .mmb
.long_memset: stf8 [ptr1] = fvalue, 24
(p6) st1 [buf]=val,1 // 1-byte aligned stf8 [ptr2] = fvalue, 24
(p6) adds len=-1,len;; // sync because buf is modified br.cloop.dptk.many .l2
tbit.nz p6,p0=buf,1 ;; }
;; .store_words:
(p6) st2 [buf]=val,2 // 2-byte aligned { .mib
(p6) adds len=-2,len;; cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ?
tbit.nz p6,p0=buf,2 (p_scr) br.cond.dpnt.many .move_bytes_from_alignment // Branch
;; ;; }
(p6) st4 [buf]=val,4 // 4-byte aligned
(p6) adds len=-4,len;; { .mmi
tbit.nz p6,p0=buf,3 stf8 [ptr1] = fvalue, 8 // store
;; cmp.le p_y, p_n = 16, cnt
(p6) st8 [buf]=val,8 // 8-byte aligned add cnt = -8, cnt // subtract
(p6) adds len=-8,len;; ;; }
shr.u cnt=len,4 // number of 128-bit (2x64bit) words { .mmi
;; (p_y) stf8 [ptr1] = fvalue, 8 // store
cmp.eq p6,p0=r0,cnt (p_y) cmp.le.unc p_yy, p_nn = 16, cnt
adds tmp=-1,cnt (p_y) add cnt = -8, cnt // subtract
(p6) br.cond.dpnt .dotail // we have less than 16 bytes left ;; }
;; { .mmi // store
adds buf2=8,buf // setup second base pointer (p_yy) stf8 [ptr1] = fvalue, 8
mov ar.lc=tmp (p_yy) add cnt = -8, cnt // subtract
;; ;; }
2: // 16bytes/iteration
st8 [buf]=val,16 .move_bytes_from_alignment:
st8 [buf2]=val,16 { .mib
br.cloop.dptk.few 2b cmp.eq p_scr, p0 = cnt, r0
;; tbit.nz.unc p_y, p0 = cnt, 2 // should we terminate with a st4 ?
.dotail: // tail correction based on len only (p_scr) br.cond.dpnt.few .restore_and_exit
tbit.nz p6,p0=len,3 ;; }
;; { .mib
(p6) st8 [buf]=val,8 // at least 8 bytes (p_y) st4 [ptr1] = value,4
tbit.nz p6,p0=len,2 tbit.nz.unc p_yy, p0 = cnt, 1 // should we terminate with a st2 ?
;; ;; }
(p6) st4 [buf]=val,4 // at least 4 bytes { .mib
tbit.nz p6,p0=len,1 (p_yy) st2 [ptr1] = value,2
;; tbit.nz.unc p_y, p0 = cnt, 0 // should we terminate with a st1 ?
(p6) st2 [buf]=val,2 // at least 2 bytes ;; }
tbit.nz p6,p0=len,0
mov ar.lc=saved_lc { .mib
;; (p_y) st1 [ptr1] = value
(p6) st1 [buf]=val // only 1 byte left ;; }
.restore_and_exit:
{ .mib
nop.m 0
mov.i ar.lc = save_lc
br.ret.sptk.many rp br.ret.sptk.many rp
END(__memset_generic) ;; }
.global memset .move_bytes_unaligned:
memset = __memset_generic // alias needed for gcc { .mmi
.pred.rel "mutex",p_y, p_n
.pred.rel "mutex",p_yy, p_nn
(p_n) cmp.le p_yy, p_nn = 4, cnt
(p_y) cmp.le p_yy, p_nn = 5, cnt
(p_n) add ptr2 = 2, ptr1
} { .mmi
(p_y) add ptr2 = 3, ptr1
(p_y) st1 [ptr1] = value, 1 // fill 1 (odd-aligned) byte [15, 14 (or less) left]
(p_y) add cnt = -1, cnt
;; }
{ .mmi
(p_yy) cmp.le.unc p_y, p0 = 8, cnt
add ptr3 = ptr1, cnt // prepare last store
mov.i ar.lc = save_lc
} { .mmi
(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes
(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [11, 10 (o less) left]
(p_yy) add cnt = -4, cnt
;; }
{ .mmi
(p_y) cmp.le.unc p_yy, p0 = 8, cnt
add ptr3 = -1, ptr3 // last store
tbit.nz p_scr, p0 = cnt, 1 // will there be a st2 at the end ?
} { .mmi
(p_y) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes
(p_y) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [7, 6 (or less) left]
(p_y) add cnt = -4, cnt
;; }
{ .mmi
(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes
(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [3, 2 (or less) left]
tbit.nz p_y, p0 = cnt, 0 // will there be a st1 at the end ?
} { .mmi
(p_yy) add cnt = -4, cnt
;; }
{ .mmb
(p_scr) st2 [ptr1] = value // fill 2 (aligned) bytes
(p_y) st1 [ptr3] = value // fill last byte (using ptr3)
br.ret.sptk.many rp
}
END(memset)
...@@ -137,10 +137,13 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re ...@@ -137,10 +137,13 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
bad_area: bad_area:
up_read(&mm->mmap_sem); up_read(&mm->mmap_sem);
if (isr & IA64_ISR_SP) { if ((isr & IA64_ISR_SP)
|| ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH))
{
/* /*
* This fault was due to a speculative load set the "ed" bit in the psr to * This fault was due to a speculative load or lfetch.fault, set the "ed"
* ensure forward progress (target register will get a NaT). * bit in the psr to ensure forward progress. (Target register will get a
* NaT for ld.s, lfetch will be canceled.)
*/ */
ia64_psr(regs)->ed = 1; ia64_psr(regs)->ed = 1;
return; return;
......
...@@ -143,6 +143,7 @@ tab[] = ...@@ -143,6 +143,7 @@ tab[] =
{ "IA64_SWITCH_STACK_AR_RNAT_OFFSET", offsetof (struct switch_stack, ar_rnat) }, { "IA64_SWITCH_STACK_AR_RNAT_OFFSET", offsetof (struct switch_stack, ar_rnat) },
{ "IA64_SWITCH_STACK_AR_BSPSTORE_OFFSET", offsetof (struct switch_stack, ar_bspstore) }, { "IA64_SWITCH_STACK_AR_BSPSTORE_OFFSET", offsetof (struct switch_stack, ar_bspstore) },
{ "IA64_SWITCH_STACK_PR_OFFSET", offsetof (struct switch_stack, pr) }, { "IA64_SWITCH_STACK_PR_OFFSET", offsetof (struct switch_stack, pr) },
{ "IA64_SIGCONTEXT_IP_OFFSET", offsetof (struct sigcontext, sc_ip) },
{ "IA64_SIGCONTEXT_AR_BSP_OFFSET", offsetof (struct sigcontext, sc_ar_bsp) }, { "IA64_SIGCONTEXT_AR_BSP_OFFSET", offsetof (struct sigcontext, sc_ar_bsp) },
{ "IA64_SIGCONTEXT_AR_FPSR_OFFSET", offsetof (struct sigcontext, sc_ar_fpsr) }, { "IA64_SIGCONTEXT_AR_FPSR_OFFSET", offsetof (struct sigcontext, sc_ar_fpsr) },
{ "IA64_SIGCONTEXT_AR_RNAT_OFFSET", offsetof (struct sigcontext, sc_ar_rnat) }, { "IA64_SIGCONTEXT_AR_RNAT_OFFSET", offsetof (struct sigcontext, sc_ar_rnat) },
......
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
#define __acpi_map_table(phys_addr, size) __va(phys_addr) #define __acpi_map_table(phys_addr, size) __va(phys_addr)
const char *acpi_get_sysname (void);
int acpi_boot_init (char *cdline); int acpi_boot_init (char *cdline);
int acpi_find_rsdp (unsigned long *phys_addr); int acpi_find_rsdp (unsigned long *phys_addr);
int acpi_request_vector (u32 int_type); int acpi_request_vector (u32 int_type);
......
...@@ -4,8 +4,8 @@ ...@@ -4,8 +4,8 @@
/* /*
* This is derived from the Linux/x86 version. * This is derived from the Linux/x86 version.
* *
* Copyright (C) 1998, 1999 Hewlett-Packard Co * Copyright (C) 1998, 1999, 2002 Hewlett-Packard Co
* Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com> * David Mosberger-Tang <davidm@hpl.hp.com>
*/ */
#define EPERM 1 /* Operation not permitted */ #define EPERM 1 /* Operation not permitted */
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
#define EINTR 4 /* Interrupted system call */ #define EINTR 4 /* Interrupted system call */
#define EIO 5 /* I/O error */ #define EIO 5 /* I/O error */
#define ENXIO 6 /* No such device or address */ #define ENXIO 6 /* No such device or address */
#define E2BIG 7 /* Arg list too long */ #define E2BIG 7 /* Argument list too long */
#define ENOEXEC 8 /* Exec format error */ #define ENOEXEC 8 /* Exec format error */
#define EBADF 9 /* Bad file number */ #define EBADF 9 /* Bad file number */
#define ECHILD 10 /* No child processes */ #define ECHILD 10 /* No child processes */
......
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
#define _ASM_IA64_IOCTLS_H #define _ASM_IA64_IOCTLS_H
/* /*
* Copyright (C) 1998, 1999 Hewlett-Packard Co * Copyright (C) 1998, 1999, 2002 Hewlett-Packard Co
* Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com> * David Mosberger-Tang <davidm@hpl.hp.com>
*/ */
#include <asm/ioctl.h> #include <asm/ioctl.h>
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
/* 0x54 is just a magic number to make these relatively unique ('T') */ /* 0x54 is just a magic number to make these relatively unique ('T') */
#define TCGETS 0x5401 #define TCGETS 0x5401
#define TCSETS 0x5402 #define TCSETS 0x5402 /* Clashes with SNDCTL_TMR_START sound ioctl */
#define TCSETSW 0x5403 #define TCSETSW 0x5403
#define TCSETSF 0x5404 #define TCSETSF 0x5404
#define TCGETA 0x5405 #define TCGETA 0x5405
......
...@@ -71,7 +71,7 @@ do { \ ...@@ -71,7 +71,7 @@ do { \
#ifdef CONFIG_IA64_GENERIC #ifdef CONFIG_IA64_GENERIC
# include <asm/machvec.h> # include <asm/machvec.h>
# define virt_to_page(kaddr) (mem_map + platform_map_nr(kaddr)) # define virt_to_page(kaddr) (mem_map + platform_map_nr(kaddr))
# define page_to_phys(page) XXX fix me # define page_to_phys(page) ((page - mem_map) << PAGE_SHIFT)
#elif defined (CONFIG_IA64_SGI_SN1) #elif defined (CONFIG_IA64_SGI_SN1)
# ifndef CONFIG_DISCONTIGMEM # ifndef CONFIG_DISCONTIGMEM
# define virt_to_page(kaddr) (mem_map + MAP_NR_DENSE(kaddr)) # define virt_to_page(kaddr) (mem_map + MAP_NR_DENSE(kaddr))
......
...@@ -154,6 +154,7 @@ ...@@ -154,6 +154,7 @@
#define IA64_ISR_SP_BIT 36 /* speculative load exception */ #define IA64_ISR_SP_BIT 36 /* speculative load exception */
#define IA64_ISR_RS_BIT 37 /* mandatory register-stack exception */ #define IA64_ISR_RS_BIT 37 /* mandatory register-stack exception */
#define IA64_ISR_IR_BIT 38 /* invalid register frame exception */ #define IA64_ISR_IR_BIT 38 /* invalid register frame exception */
#define IA64_ISR_CODE_MASK 0xf
#define IA64_ISR_X (__IA64_UL(1) << IA64_ISR_X_BIT) #define IA64_ISR_X (__IA64_UL(1) << IA64_ISR_X_BIT)
#define IA64_ISR_W (__IA64_UL(1) << IA64_ISR_W_BIT) #define IA64_ISR_W (__IA64_UL(1) << IA64_ISR_W_BIT)
...@@ -163,6 +164,14 @@ ...@@ -163,6 +164,14 @@
#define IA64_ISR_RS (__IA64_UL(1) << IA64_ISR_RS_BIT) #define IA64_ISR_RS (__IA64_UL(1) << IA64_ISR_RS_BIT)
#define IA64_ISR_IR (__IA64_UL(1) << IA64_ISR_IR_BIT) #define IA64_ISR_IR (__IA64_UL(1) << IA64_ISR_IR_BIT)
/* ISR code field for non-access instructions */
#define IA64_ISR_CODE_TPA 0
#define IA64_ISR_CODE_FC 1
#define IA64_ISR_CODE_PROBE 2
#define IA64_ISR_CODE_TAK 3
#define IA64_ISR_CODE_LFETCH 4
#define IA64_ISR_CODE_PROBEF 5
#define IA64_THREAD_FPH_VALID (__IA64_UL(1) << 0) /* floating-point high state valid? */ #define IA64_THREAD_FPH_VALID (__IA64_UL(1) << 0) /* floating-point high state valid? */
#define IA64_THREAD_DBG_VALID (__IA64_UL(1) << 1) /* debug registers valid? */ #define IA64_THREAD_DBG_VALID (__IA64_UL(1) << 1) /* debug registers valid? */
#define IA64_THREAD_PM_VALID (__IA64_UL(1) << 2) /* performance registers valid? */ #define IA64_THREAD_PM_VALID (__IA64_UL(1) << 2) /* performance registers valid? */
......
...@@ -154,7 +154,8 @@ typedef struct siginfo { ...@@ -154,7 +154,8 @@ typedef struct siginfo {
#define ILL_BADSTK (__SI_FAULT|8) /* internal stack error */ #define ILL_BADSTK (__SI_FAULT|8) /* internal stack error */
#define ILL_BADIADDR (__SI_FAULT|9) /* unimplemented instruction address */ #define ILL_BADIADDR (__SI_FAULT|9) /* unimplemented instruction address */
#define __ILL_BREAK (__SI_FAULT|10) /* illegal break */ #define __ILL_BREAK (__SI_FAULT|10) /* illegal break */
#define NSIGILL 10 #define __ILL_BNDMOD (__SI_FAULT|11) /* bundle-update (modification) in progress */
#define NSIGILL 11
/* /*
* SIGFPE si_codes * SIGFPE si_codes
......
/************************************************************************** /*
* * *
* Copyright (C) 2001 Silicon Graphics, Inc. All rights reserved. * * This file is subject to the terms and conditions of the GNU General Public
* * * License. See the file "COPYING" in the main directory of this archive
* These coded instructions, statements, and computer programs contain * * for more details.
* unpublished proprietary information of Silicon Graphics, Inc., and * *
* are protected by Federal copyright law. They may not be disclosed * * Copyright (c) 2001, 2002 Silicon Graphics, Inc. All rights reserved.
* to third parties or copied or duplicated in any form, in whole or * */
* in part, without the prior written consent of Silicon Graphics, Inc. *
* *
**************************************************************************/
#ifndef _SHUB_MD_H #ifndef _SHUB_MD_H
#define _SHUB_MD_H #define _SHUB_MD_H
......
...@@ -18,20 +18,6 @@ ...@@ -18,20 +18,6 @@
extern __kernel_size_t strlen (const char *); extern __kernel_size_t strlen (const char *);
extern void *memcpy (void *, const void *, __kernel_size_t); extern void *memcpy (void *, const void *, __kernel_size_t);
extern void *memset (void *, int, __kernel_size_t);
extern void *__memset_generic (void *, int, __kernel_size_t);
extern void __bzero (void *, __kernel_size_t);
#define memset(s, c, count) \
({ \
void *_s = (s); \
int _c = (c); \
__kernel_size_t _count = (count); \
\
if (__builtin_constant_p(_c) && _c == 0) \
__bzero(_s, _count); \
else \
__memset_generic(_s, _c, _count); \
})
#endif /* _ASM_IA64_STRING_H */ #endif /* _ASM_IA64_STRING_H */
...@@ -353,6 +353,9 @@ extern long __cmpxchg_called_with_bad_pointer(void); ...@@ -353,6 +353,9 @@ extern long __cmpxchg_called_with_bad_pointer(void);
# define IS_IA32_PROCESS(regs) (ia64_psr(regs)->is != 0) # define IS_IA32_PROCESS(regs) (ia64_psr(regs)->is != 0)
#else #else
# define IS_IA32_PROCESS(regs) 0 # define IS_IA32_PROCESS(regs) 0
struct task_struct;
static inline void ia32_save_state(struct task_struct *t __attribute__((unused))){}
static inline void ia32_load_state(struct task_struct *t __attribute__((unused))){}
#endif #endif
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment