Commit 58cf3522 authored by Ingo Molnar's avatar Ingo Molnar

Merge branches 'x86/mmio', 'x86/delay', 'x86/idle', 'x86/oprofile',...

Merge branches 'x86/mmio', 'x86/delay', 'x86/idle', 'x86/oprofile', 'x86/debug', 'x86/ptrace' and 'x86/amd-iommu' into x86/devel
...@@ -271,6 +271,17 @@ and is between 256 and 4096 characters. It is defined in the file ...@@ -271,6 +271,17 @@ and is between 256 and 4096 characters. It is defined in the file
aic79xx= [HW,SCSI] aic79xx= [HW,SCSI]
See Documentation/scsi/aic79xx.txt. See Documentation/scsi/aic79xx.txt.
amd_iommu= [HW,X86-84]
Pass parameters to the AMD IOMMU driver in the system.
Possible values are:
isolate - enable device isolation (each device, as far
as possible, will get its own protection
domain)
amd_iommu_size= [HW,X86-64]
Define the size of the aperture for the AMD IOMMU
driver. Possible values are:
'32M', '64M' (default), '128M', '256M', '512M', '1G'
amijoy.map= [HW,JOY] Amiga joystick support amijoy.map= [HW,JOY] Amiga joystick support
Map of devices attached to JOY0DAT and JOY1DAT Map of devices attached to JOY0DAT and JOY1DAT
Format: <a>,<b> Format: <a>,<b>
......
...@@ -376,6 +376,12 @@ L: linux-geode@lists.infradead.org (moderated for non-subscribers) ...@@ -376,6 +376,12 @@ L: linux-geode@lists.infradead.org (moderated for non-subscribers)
W: http://www.amd.com/us-en/ConnectivitySolutions/TechnicalResources/0,,50_2334_2452_11363,00.html W: http://www.amd.com/us-en/ConnectivitySolutions/TechnicalResources/0,,50_2334_2452_11363,00.html
S: Supported S: Supported
AMD IOMMU (AMD-VI)
P: Joerg Roedel
M: joerg.roedel@amd.com
L: iommu@lists.linux-foundation.org
S: Supported
AMS (Apple Motion Sensor) DRIVER AMS (Apple Motion Sensor) DRIVER
P: Stelian Pop P: Stelian Pop
M: stelian@popies.net M: stelian@popies.net
......
...@@ -533,6 +533,21 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT ...@@ -533,6 +533,21 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT
Calgary anyway, pass 'iommu=calgary' on the kernel command line. Calgary anyway, pass 'iommu=calgary' on the kernel command line.
If unsure, say Y. If unsure, say Y.
config AMD_IOMMU
bool "AMD IOMMU support"
select SWIOTLB
depends on X86_64 && PCI && ACPI
help
With this option you can enable support for AMD IOMMU hardware in
your system. An IOMMU is a hardware component which provides
remapping of DMA memory accesses from devices. With an AMD IOMMU you
can isolate the the DMA memory of different devices and protect the
system from misbehaving device drivers or hardware.
You can find out if your system has an AMD IOMMU if you look into
your BIOS for an option to enable it or if you have an IVRS ACPI
table.
# need this always selected by IOMMU for the VIA workaround # need this always selected by IOMMU for the VIA workaround
config SWIOTLB config SWIOTLB
bool bool
......
...@@ -99,6 +99,7 @@ ifeq ($(CONFIG_X86_64),y) ...@@ -99,6 +99,7 @@ ifeq ($(CONFIG_X86_64),y)
obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o
obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o
obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o
obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o
obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o
......
/*
* Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
* Author: Joerg Roedel <joerg.roedel@amd.com>
* Leo Duran <leo.duran@amd.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/pci.h>
#include <linux/gfp.h>
#include <linux/bitops.h>
#include <linux/scatterlist.h>
#include <linux/iommu-helper.h>
#include <asm/proto.h>
#include <asm/gart.h>
#include <asm/amd_iommu_types.h>
#include <asm/amd_iommu.h>
#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
#define to_pages(addr, size) \
(round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
static DEFINE_RWLOCK(amd_iommu_devtable_lock);
struct command {
u32 data[4];
};
static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
struct unity_map_entry *e);
static int iommu_has_npcache(struct amd_iommu *iommu)
{
return iommu->cap & IOMMU_CAP_NPCACHE;
}
static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
{
u32 tail, head;
u8 *target;
tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
target = (iommu->cmd_buf + tail);
memcpy_toio(target, cmd, sizeof(*cmd));
tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
if (tail == head)
return -ENOMEM;
writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
return 0;
}
static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
{
unsigned long flags;
int ret;
spin_lock_irqsave(&iommu->lock, flags);
ret = __iommu_queue_command(iommu, cmd);
spin_unlock_irqrestore(&iommu->lock, flags);
return ret;
}
static int iommu_completion_wait(struct amd_iommu *iommu)
{
int ret;
struct command cmd;
volatile u64 ready = 0;
unsigned long ready_phys = virt_to_phys(&ready);
memset(&cmd, 0, sizeof(cmd));
cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK;
cmd.data[1] = HIGH_U32(ready_phys);
cmd.data[2] = 1; /* value written to 'ready' */
CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
iommu->need_sync = 0;
ret = iommu_queue_command(iommu, &cmd);
if (ret)
return ret;
while (!ready)
cpu_relax();
return 0;
}
static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
{
struct command cmd;
BUG_ON(iommu == NULL);
memset(&cmd, 0, sizeof(cmd));
CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY);
cmd.data[0] = devid;
iommu->need_sync = 1;
return iommu_queue_command(iommu, &cmd);
}
static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
u64 address, u16 domid, int pde, int s)
{
struct command cmd;
memset(&cmd, 0, sizeof(cmd));
address &= PAGE_MASK;
CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
cmd.data[1] |= domid;
cmd.data[2] = LOW_U32(address);
cmd.data[3] = HIGH_U32(address);
if (s)
cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
if (pde)
cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
iommu->need_sync = 1;
return iommu_queue_command(iommu, &cmd);
}
static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
u64 address, size_t size)
{
int s = 0;
unsigned pages = to_pages(address, size);
address &= PAGE_MASK;
if (pages > 1) {
/*
* If we have to flush more than one page, flush all
* TLB entries for this domain
*/
address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
s = 1;
}
iommu_queue_inv_iommu_pages(iommu, address, domid, 0, s);
return 0;
}
static int iommu_map(struct protection_domain *dom,
unsigned long bus_addr,
unsigned long phys_addr,
int prot)
{
u64 __pte, *pte, *page;
bus_addr = PAGE_ALIGN(bus_addr);
phys_addr = PAGE_ALIGN(bus_addr);
/* only support 512GB address spaces for now */
if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK))
return -EINVAL;
pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
if (!IOMMU_PTE_PRESENT(*pte)) {
page = (u64 *)get_zeroed_page(GFP_KERNEL);
if (!page)
return -ENOMEM;
*pte = IOMMU_L2_PDE(virt_to_phys(page));
}
pte = IOMMU_PTE_PAGE(*pte);
pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
if (!IOMMU_PTE_PRESENT(*pte)) {
page = (u64 *)get_zeroed_page(GFP_KERNEL);
if (!page)
return -ENOMEM;
*pte = IOMMU_L1_PDE(virt_to_phys(page));
}
pte = IOMMU_PTE_PAGE(*pte);
pte = &pte[IOMMU_PTE_L0_INDEX(bus_addr)];
if (IOMMU_PTE_PRESENT(*pte))
return -EBUSY;
__pte = phys_addr | IOMMU_PTE_P;
if (prot & IOMMU_PROT_IR)
__pte |= IOMMU_PTE_IR;
if (prot & IOMMU_PROT_IW)
__pte |= IOMMU_PTE_IW;
*pte = __pte;
return 0;
}
static int iommu_for_unity_map(struct amd_iommu *iommu,
struct unity_map_entry *entry)
{
u16 bdf, i;
for (i = entry->devid_start; i <= entry->devid_end; ++i) {
bdf = amd_iommu_alias_table[i];
if (amd_iommu_rlookup_table[bdf] == iommu)
return 1;
}
return 0;
}
static int iommu_init_unity_mappings(struct amd_iommu *iommu)
{
struct unity_map_entry *entry;
int ret;
list_for_each_entry(entry, &amd_iommu_unity_map, list) {
if (!iommu_for_unity_map(iommu, entry))
continue;
ret = dma_ops_unity_map(iommu->default_dom, entry);
if (ret)
return ret;
}
return 0;
}
static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
struct unity_map_entry *e)
{
u64 addr;
int ret;
for (addr = e->address_start; addr < e->address_end;
addr += PAGE_SIZE) {
ret = iommu_map(&dma_dom->domain, addr, addr, e->prot);
if (ret)
return ret;
/*
* if unity mapping is in aperture range mark the page
* as allocated in the aperture
*/
if (addr < dma_dom->aperture_size)
__set_bit(addr >> PAGE_SHIFT, dma_dom->bitmap);
}
return 0;
}
static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
u16 devid)
{
struct unity_map_entry *e;
int ret;
list_for_each_entry(e, &amd_iommu_unity_map, list) {
if (!(devid >= e->devid_start && devid <= e->devid_end))
continue;
ret = dma_ops_unity_map(dma_dom, e);
if (ret)
return ret;
}
return 0;
}
static unsigned long dma_mask_to_pages(unsigned long mask)
{
return (mask >> PAGE_SHIFT) +
(PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT);
}
static unsigned long dma_ops_alloc_addresses(struct device *dev,
struct dma_ops_domain *dom,
unsigned int pages)
{
unsigned long limit = dma_mask_to_pages(*dev->dma_mask);
unsigned long address;
unsigned long size = dom->aperture_size >> PAGE_SHIFT;
unsigned long boundary_size;
boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
PAGE_SIZE) >> PAGE_SHIFT;
limit = limit < size ? limit : size;
if (dom->next_bit >= limit)
dom->next_bit = 0;
address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages,
0 , boundary_size, 0);
if (address == -1)
address = iommu_area_alloc(dom->bitmap, limit, 0, pages,
0, boundary_size, 0);
if (likely(address != -1)) {
dom->next_bit = address + pages;
address <<= PAGE_SHIFT;
} else
address = bad_dma_address;
WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
return address;
}
static void dma_ops_free_addresses(struct dma_ops_domain *dom,
unsigned long address,
unsigned int pages)
{
address >>= PAGE_SHIFT;
iommu_area_free(dom->bitmap, address, pages);
}
static u16 domain_id_alloc(void)
{
unsigned long flags;
int id;
write_lock_irqsave(&amd_iommu_devtable_lock, flags);
id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
BUG_ON(id == 0);
if (id > 0 && id < MAX_DOMAIN_ID)
__set_bit(id, amd_iommu_pd_alloc_bitmap);
else
id = 0;
write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
return id;
}
static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
unsigned long start_page,
unsigned int pages)
{
unsigned int last_page = dom->aperture_size >> PAGE_SHIFT;
if (start_page + pages > last_page)
pages = last_page - start_page;
set_bit_string(dom->bitmap, start_page, pages);
}
static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
{
int i, j;
u64 *p1, *p2, *p3;
p1 = dma_dom->domain.pt_root;
if (!p1)
return;
for (i = 0; i < 512; ++i) {
if (!IOMMU_PTE_PRESENT(p1[i]))
continue;
p2 = IOMMU_PTE_PAGE(p1[i]);
for (j = 0; j < 512; ++i) {
if (!IOMMU_PTE_PRESENT(p2[j]))
continue;
p3 = IOMMU_PTE_PAGE(p2[j]);
free_page((unsigned long)p3);
}
free_page((unsigned long)p2);
}
free_page((unsigned long)p1);
}
static void dma_ops_domain_free(struct dma_ops_domain *dom)
{
if (!dom)
return;
dma_ops_free_pagetable(dom);
kfree(dom->pte_pages);
kfree(dom->bitmap);
kfree(dom);
}
static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
unsigned order)
{
struct dma_ops_domain *dma_dom;
unsigned i, num_pte_pages;
u64 *l2_pde;
u64 address;
/*
* Currently the DMA aperture must be between 32 MB and 1GB in size
*/
if ((order < 25) || (order > 30))
return NULL;
dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
if (!dma_dom)
return NULL;
spin_lock_init(&dma_dom->domain.lock);
dma_dom->domain.id = domain_id_alloc();
if (dma_dom->domain.id == 0)
goto free_dma_dom;
dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
dma_dom->domain.priv = dma_dom;
if (!dma_dom->domain.pt_root)
goto free_dma_dom;
dma_dom->aperture_size = (1ULL << order);
dma_dom->bitmap = kzalloc(dma_dom->aperture_size / (PAGE_SIZE * 8),
GFP_KERNEL);
if (!dma_dom->bitmap)
goto free_dma_dom;
/*
* mark the first page as allocated so we never return 0 as
* a valid dma-address. So we can use 0 as error value
*/
dma_dom->bitmap[0] = 1;
dma_dom->next_bit = 0;
if (iommu->exclusion_start &&
iommu->exclusion_start < dma_dom->aperture_size) {
unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
int pages = to_pages(iommu->exclusion_start,
iommu->exclusion_length);
dma_ops_reserve_addresses(dma_dom, startpage, pages);
}
num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512);
dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *),
GFP_KERNEL);
if (!dma_dom->pte_pages)
goto free_dma_dom;
l2_pde = (u64 *)get_zeroed_page(GFP_KERNEL);
if (l2_pde == NULL)
goto free_dma_dom;
dma_dom->domain.pt_root[0] = IOMMU_L2_PDE(virt_to_phys(l2_pde));
for (i = 0; i < num_pte_pages; ++i) {
dma_dom->pte_pages[i] = (u64 *)get_zeroed_page(GFP_KERNEL);
if (!dma_dom->pte_pages[i])
goto free_dma_dom;
address = virt_to_phys(dma_dom->pte_pages[i]);
l2_pde[i] = IOMMU_L1_PDE(address);
}
return dma_dom;
free_dma_dom:
dma_ops_domain_free(dma_dom);
return NULL;
}
static struct protection_domain *domain_for_device(u16 devid)
{
struct protection_domain *dom;
unsigned long flags;
read_lock_irqsave(&amd_iommu_devtable_lock, flags);
dom = amd_iommu_pd_table[devid];
read_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
return dom;
}
static void set_device_domain(struct amd_iommu *iommu,
struct protection_domain *domain,
u16 devid)
{
unsigned long flags;
u64 pte_root = virt_to_phys(domain->pt_root);
pte_root |= (domain->mode & 0x07) << 9;
pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | 2;
write_lock_irqsave(&amd_iommu_devtable_lock, flags);
amd_iommu_dev_table[devid].data[0] = pte_root;
amd_iommu_dev_table[devid].data[1] = pte_root >> 32;
amd_iommu_dev_table[devid].data[2] = domain->id;
amd_iommu_pd_table[devid] = domain;
write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
iommu_queue_inv_dev_entry(iommu, devid);
iommu->need_sync = 1;
}
static int get_device_resources(struct device *dev,
struct amd_iommu **iommu,
struct protection_domain **domain,
u16 *bdf)
{
struct dma_ops_domain *dma_dom;
struct pci_dev *pcidev;
u16 _bdf;
BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask);
pcidev = to_pci_dev(dev);
_bdf = (pcidev->bus->number << 8) | pcidev->devfn;
if (_bdf >= amd_iommu_last_bdf) {
*iommu = NULL;
*domain = NULL;
*bdf = 0xffff;
return 0;
}
*bdf = amd_iommu_alias_table[_bdf];
*iommu = amd_iommu_rlookup_table[*bdf];
if (*iommu == NULL)
return 0;
dma_dom = (*iommu)->default_dom;
*domain = domain_for_device(*bdf);
if (*domain == NULL) {
*domain = &dma_dom->domain;
set_device_domain(*iommu, *domain, *bdf);
printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
"device ", (*domain)->id);
print_devid(_bdf, 1);
}
return 1;
}
static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
struct dma_ops_domain *dom,
unsigned long address,
phys_addr_t paddr,
int direction)
{
u64 *pte, __pte;
WARN_ON(address > dom->aperture_size);
paddr &= PAGE_MASK;
pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
pte += IOMMU_PTE_L0_INDEX(address);
__pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
if (direction == DMA_TO_DEVICE)
__pte |= IOMMU_PTE_IR;
else if (direction == DMA_FROM_DEVICE)
__pte |= IOMMU_PTE_IW;
else if (direction == DMA_BIDIRECTIONAL)
__pte |= IOMMU_PTE_IR | IOMMU_PTE_IW;
WARN_ON(*pte);
*pte = __pte;
return (dma_addr_t)address;
}
static void dma_ops_domain_unmap(struct amd_iommu *iommu,
struct dma_ops_domain *dom,
unsigned long address)
{
u64 *pte;
if (address >= dom->aperture_size)
return;
WARN_ON(address & 0xfffULL || address > dom->aperture_size);
pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
pte += IOMMU_PTE_L0_INDEX(address);
WARN_ON(!*pte);
*pte = 0ULL;
}
static dma_addr_t __map_single(struct device *dev,
struct amd_iommu *iommu,
struct dma_ops_domain *dma_dom,
phys_addr_t paddr,
size_t size,
int dir)
{
dma_addr_t offset = paddr & ~PAGE_MASK;
dma_addr_t address, start;
unsigned int pages;
int i;
pages = to_pages(paddr, size);
paddr &= PAGE_MASK;
address = dma_ops_alloc_addresses(dev, dma_dom, pages);
if (unlikely(address == bad_dma_address))
goto out;
start = address;
for (i = 0; i < pages; ++i) {
dma_ops_domain_map(iommu, dma_dom, start, paddr, dir);
paddr += PAGE_SIZE;
start += PAGE_SIZE;
}
address += offset;
out:
return address;
}
static void __unmap_single(struct amd_iommu *iommu,
struct dma_ops_domain *dma_dom,
dma_addr_t dma_addr,
size_t size,
int dir)
{
dma_addr_t i, start;
unsigned int pages;
if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size))
return;
pages = to_pages(dma_addr, size);
dma_addr &= PAGE_MASK;
start = dma_addr;
for (i = 0; i < pages; ++i) {
dma_ops_domain_unmap(iommu, dma_dom, start);
start += PAGE_SIZE;
}
dma_ops_free_addresses(dma_dom, dma_addr, pages);
}
static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
size_t size, int dir)
{
unsigned long flags;
struct amd_iommu *iommu;
struct protection_domain *domain;
u16 devid;
dma_addr_t addr;
get_device_resources(dev, &iommu, &domain, &devid);
if (iommu == NULL || domain == NULL)
return (dma_addr_t)paddr;
spin_lock_irqsave(&domain->lock, flags);
addr = __map_single(dev, iommu, domain->priv, paddr, size, dir);
if (addr == bad_dma_address)
goto out;
if (iommu_has_npcache(iommu))
iommu_flush_pages(iommu, domain->id, addr, size);
if (iommu->need_sync)
iommu_completion_wait(iommu);
out:
spin_unlock_irqrestore(&domain->lock, flags);
return addr;
}
static void unmap_single(struct device *dev, dma_addr_t dma_addr,
size_t size, int dir)
{
unsigned long flags;
struct amd_iommu *iommu;
struct protection_domain *domain;
u16 devid;
if (!get_device_resources(dev, &iommu, &domain, &devid))
return;
spin_lock_irqsave(&domain->lock, flags);
__unmap_single(iommu, domain->priv, dma_addr, size, dir);
iommu_flush_pages(iommu, domain->id, dma_addr, size);
if (iommu->need_sync)
iommu_completion_wait(iommu);
spin_unlock_irqrestore(&domain->lock, flags);
}
static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
int nelems, int dir)
{
struct scatterlist *s;
int i;
for_each_sg(sglist, s, nelems, i) {
s->dma_address = (dma_addr_t)sg_phys(s);
s->dma_length = s->length;
}
return nelems;
}
static int map_sg(struct device *dev, struct scatterlist *sglist,
int nelems, int dir)
{
unsigned long flags;
struct amd_iommu *iommu;
struct protection_domain *domain;
u16 devid;
int i;
struct scatterlist *s;
phys_addr_t paddr;
int mapped_elems = 0;
get_device_resources(dev, &iommu, &domain, &devid);
if (!iommu || !domain)
return map_sg_no_iommu(dev, sglist, nelems, dir);
spin_lock_irqsave(&domain->lock, flags);
for_each_sg(sglist, s, nelems, i) {
paddr = sg_phys(s);
s->dma_address = __map_single(dev, iommu, domain->priv,
paddr, s->length, dir);
if (s->dma_address) {
s->dma_length = s->length;
mapped_elems++;
} else
goto unmap;
if (iommu_has_npcache(iommu))
iommu_flush_pages(iommu, domain->id, s->dma_address,
s->dma_length);
}
if (iommu->need_sync)
iommu_completion_wait(iommu);
out:
spin_unlock_irqrestore(&domain->lock, flags);
return mapped_elems;
unmap:
for_each_sg(sglist, s, mapped_elems, i) {
if (s->dma_address)
__unmap_single(iommu, domain->priv, s->dma_address,
s->dma_length, dir);
s->dma_address = s->dma_length = 0;
}
mapped_elems = 0;
goto out;
}
static void unmap_sg(struct device *dev, struct scatterlist *sglist,
int nelems, int dir)
{
unsigned long flags;
struct amd_iommu *iommu;
struct protection_domain *domain;
struct scatterlist *s;
u16 devid;
int i;
if (!get_device_resources(dev, &iommu, &domain, &devid))
return;
spin_lock_irqsave(&domain->lock, flags);
for_each_sg(sglist, s, nelems, i) {
__unmap_single(iommu, domain->priv, s->dma_address,
s->dma_length, dir);
iommu_flush_pages(iommu, domain->id, s->dma_address,
s->dma_length);
s->dma_address = s->dma_length = 0;
}
if (iommu->need_sync)
iommu_completion_wait(iommu);
spin_unlock_irqrestore(&domain->lock, flags);
}
static void *alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addr, gfp_t flag)
{
unsigned long flags;
void *virt_addr;
struct amd_iommu *iommu;
struct protection_domain *domain;
u16 devid;
phys_addr_t paddr;
virt_addr = (void *)__get_free_pages(flag, get_order(size));
if (!virt_addr)
return 0;
memset(virt_addr, 0, size);
paddr = virt_to_phys(virt_addr);
get_device_resources(dev, &iommu, &domain, &devid);
if (!iommu || !domain) {
*dma_addr = (dma_addr_t)paddr;
return virt_addr;
}
spin_lock_irqsave(&domain->lock, flags);
*dma_addr = __map_single(dev, iommu, domain->priv, paddr,
size, DMA_BIDIRECTIONAL);
if (*dma_addr == bad_dma_address) {
free_pages((unsigned long)virt_addr, get_order(size));
virt_addr = NULL;
goto out;
}
if (iommu_has_npcache(iommu))
iommu_flush_pages(iommu, domain->id, *dma_addr, size);
if (iommu->need_sync)
iommu_completion_wait(iommu);
out:
spin_unlock_irqrestore(&domain->lock, flags);
return virt_addr;
}
static void free_coherent(struct device *dev, size_t size,
void *virt_addr, dma_addr_t dma_addr)
{
unsigned long flags;
struct amd_iommu *iommu;
struct protection_domain *domain;
u16 devid;
get_device_resources(dev, &iommu, &domain, &devid);
if (!iommu || !domain)
goto free_mem;
spin_lock_irqsave(&domain->lock, flags);
__unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
iommu_flush_pages(iommu, domain->id, dma_addr, size);
if (iommu->need_sync)
iommu_completion_wait(iommu);
spin_unlock_irqrestore(&domain->lock, flags);
free_mem:
free_pages((unsigned long)virt_addr, get_order(size));
}
/*
* If the driver core informs the DMA layer if a driver grabs a device
* we don't need to preallocate the protection domains anymore.
* For now we have to.
*/
void prealloc_protection_domains(void)
{
struct pci_dev *dev = NULL;
struct dma_ops_domain *dma_dom;
struct amd_iommu *iommu;
int order = amd_iommu_aperture_order;
u16 devid;
while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
devid = (dev->bus->number << 8) | dev->devfn;
if (devid >= amd_iommu_last_bdf)
continue;
devid = amd_iommu_alias_table[devid];
if (domain_for_device(devid))
continue;
iommu = amd_iommu_rlookup_table[devid];
if (!iommu)
continue;
dma_dom = dma_ops_domain_alloc(iommu, order);
if (!dma_dom)
continue;
init_unity_mappings_for_device(dma_dom, devid);
set_device_domain(iommu, &dma_dom->domain, devid);
printk(KERN_INFO "AMD IOMMU: Allocated domain %d for device ",
dma_dom->domain.id);
print_devid(devid, 1);
}
}
static struct dma_mapping_ops amd_iommu_dma_ops = {
.alloc_coherent = alloc_coherent,
.free_coherent = free_coherent,
.map_single = map_single,
.unmap_single = unmap_single,
.map_sg = map_sg,
.unmap_sg = unmap_sg,
};
int __init amd_iommu_init_dma_ops(void)
{
struct amd_iommu *iommu;
int order = amd_iommu_aperture_order;
int ret;
list_for_each_entry(iommu, &amd_iommu_list, list) {
iommu->default_dom = dma_ops_domain_alloc(iommu, order);
if (iommu->default_dom == NULL)
return -ENOMEM;
ret = iommu_init_unity_mappings(iommu);
if (ret)
goto free_domains;
}
if (amd_iommu_isolate)
prealloc_protection_domains();
iommu_detected = 1;
force_iommu = 1;
bad_dma_address = 0;
#ifdef CONFIG_GART_IOMMU
gart_iommu_aperture_disabled = 1;
gart_iommu_aperture = 0;
#endif
dma_ops = &amd_iommu_dma_ops;
return 0;
free_domains:
list_for_each_entry(iommu, &amd_iommu_list, list) {
if (iommu->default_dom)
dma_ops_domain_free(iommu->default_dom);
}
return ret;
}
/*
* Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
* Author: Joerg Roedel <joerg.roedel@amd.com>
* Leo Duran <leo.duran@amd.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/pci.h>
#include <linux/acpi.h>
#include <linux/gfp.h>
#include <linux/list.h>
#include <linux/sysdev.h>
#include <asm/pci-direct.h>
#include <asm/amd_iommu_types.h>
#include <asm/amd_iommu.h>
#include <asm/gart.h>
/*
* definitions for the ACPI scanning code
*/
#define UPDATE_LAST_BDF(x) do {\
if ((x) > amd_iommu_last_bdf) \
amd_iommu_last_bdf = (x); \
} while (0);
#define DEVID(bus, devfn) (((bus) << 8) | (devfn))
#define PCI_BUS(x) (((x) >> 8) & 0xff)
#define IVRS_HEADER_LENGTH 48
#define TBL_SIZE(x) (1 << (PAGE_SHIFT + get_order(amd_iommu_last_bdf * (x))))
#define ACPI_IVHD_TYPE 0x10
#define ACPI_IVMD_TYPE_ALL 0x20
#define ACPI_IVMD_TYPE 0x21
#define ACPI_IVMD_TYPE_RANGE 0x22
#define IVHD_DEV_ALL 0x01
#define IVHD_DEV_SELECT 0x02
#define IVHD_DEV_SELECT_RANGE_START 0x03
#define IVHD_DEV_RANGE_END 0x04
#define IVHD_DEV_ALIAS 0x42
#define IVHD_DEV_ALIAS_RANGE 0x43
#define IVHD_DEV_EXT_SELECT 0x46
#define IVHD_DEV_EXT_SELECT_RANGE 0x47
#define IVHD_FLAG_HT_TUN_EN 0x00
#define IVHD_FLAG_PASSPW_EN 0x01
#define IVHD_FLAG_RESPASSPW_EN 0x02
#define IVHD_FLAG_ISOC_EN 0x03
#define IVMD_FLAG_EXCL_RANGE 0x08
#define IVMD_FLAG_UNITY_MAP 0x01
#define ACPI_DEVFLAG_INITPASS 0x01
#define ACPI_DEVFLAG_EXTINT 0x02
#define ACPI_DEVFLAG_NMI 0x04
#define ACPI_DEVFLAG_SYSMGT1 0x10
#define ACPI_DEVFLAG_SYSMGT2 0x20
#define ACPI_DEVFLAG_LINT0 0x40
#define ACPI_DEVFLAG_LINT1 0x80
#define ACPI_DEVFLAG_ATSDIS 0x10000000
struct ivhd_header {
u8 type;
u8 flags;
u16 length;
u16 devid;
u16 cap_ptr;
u64 mmio_phys;
u16 pci_seg;
u16 info;
u32 reserved;
} __attribute__((packed));
struct ivhd_entry {
u8 type;
u16 devid;
u8 flags;
u32 ext;
} __attribute__((packed));
struct ivmd_header {
u8 type;
u8 flags;
u16 length;
u16 devid;
u16 aux;
u64 resv;
u64 range_start;
u64 range_length;
} __attribute__((packed));
static int __initdata amd_iommu_detected;
u16 amd_iommu_last_bdf;
struct list_head amd_iommu_unity_map;
unsigned amd_iommu_aperture_order = 26;
int amd_iommu_isolate;
struct list_head amd_iommu_list;
struct dev_table_entry *amd_iommu_dev_table;
u16 *amd_iommu_alias_table;
struct amd_iommu **amd_iommu_rlookup_table;
struct protection_domain **amd_iommu_pd_table;
unsigned long *amd_iommu_pd_alloc_bitmap;
static u32 dev_table_size;
static u32 alias_table_size;
static u32 rlookup_table_size;
static void __init iommu_set_exclusion_range(struct amd_iommu *iommu)
{
u64 start = iommu->exclusion_start & PAGE_MASK;
u64 limit = (start + iommu->exclusion_length) & PAGE_MASK;
u64 entry;
if (!iommu->exclusion_start)
return;
entry = start | MMIO_EXCL_ENABLE_MASK;
memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
&entry, sizeof(entry));
entry = limit;
memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
&entry, sizeof(entry));
}
static void __init iommu_set_device_table(struct amd_iommu *iommu)
{
u32 entry;
BUG_ON(iommu->mmio_base == NULL);
entry = virt_to_phys(amd_iommu_dev_table);
entry |= (dev_table_size >> 12) - 1;
memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET,
&entry, sizeof(entry));
}
static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
{
u32 ctrl;
ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
ctrl |= (1 << bit);
writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
}
static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
{
u32 ctrl;
ctrl = (u64)readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
ctrl &= ~(1 << bit);
writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
}
void __init iommu_enable(struct amd_iommu *iommu)
{
printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at ");
print_devid(iommu->devid, 0);
printk(" cap 0x%hx\n", iommu->cap_ptr);
iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
}
static u8 * __init iommu_map_mmio_space(u64 address)
{
u8 *ret;
if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu"))
return NULL;
ret = ioremap_nocache(address, MMIO_REGION_LENGTH);
if (ret != NULL)
return ret;
release_mem_region(address, MMIO_REGION_LENGTH);
return NULL;
}
static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
{
if (iommu->mmio_base)
iounmap(iommu->mmio_base);
release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH);
}
static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr)
{
u32 cap;
cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
UPDATE_LAST_BDF(DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap)));
return 0;
}
static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
{
u8 *p = (void *)h, *end = (void *)h;
struct ivhd_entry *dev;
p += sizeof(*h);
end += h->length;
find_last_devid_on_pci(PCI_BUS(h->devid),
PCI_SLOT(h->devid),
PCI_FUNC(h->devid),
h->cap_ptr);
while (p < end) {
dev = (struct ivhd_entry *)p;
switch (dev->type) {
case IVHD_DEV_SELECT:
case IVHD_DEV_RANGE_END:
case IVHD_DEV_ALIAS:
case IVHD_DEV_EXT_SELECT:
UPDATE_LAST_BDF(dev->devid);
break;
default:
break;
}
p += 0x04 << (*p >> 6);
}
WARN_ON(p != end);
return 0;
}
static int __init find_last_devid_acpi(struct acpi_table_header *table)
{
int i;
u8 checksum = 0, *p = (u8 *)table, *end = (u8 *)table;
struct ivhd_header *h;
/*
* Validate checksum here so we don't need to do it when
* we actually parse the table
*/
for (i = 0; i < table->length; ++i)
checksum += p[i];
if (checksum != 0)
/* ACPI table corrupt */
return -ENODEV;
p += IVRS_HEADER_LENGTH;
end += table->length;
while (p < end) {
h = (struct ivhd_header *)p;
switch (h->type) {
case ACPI_IVHD_TYPE:
find_last_devid_from_ivhd(h);
break;
default:
break;
}
p += h->length;
}
WARN_ON(p != end);
return 0;
}
static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
{
u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL,
get_order(CMD_BUFFER_SIZE));
u64 entry = 0;
if (cmd_buf == NULL)
return NULL;
iommu->cmd_buf_size = CMD_BUFFER_SIZE;
memset(cmd_buf, 0, CMD_BUFFER_SIZE);
entry = (u64)virt_to_phys(cmd_buf);
entry |= MMIO_CMD_SIZE_512;
memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
&entry, sizeof(entry));
iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
return cmd_buf;
}
static void __init free_command_buffer(struct amd_iommu *iommu)
{
if (iommu->cmd_buf)
free_pages((unsigned long)iommu->cmd_buf,
get_order(CMD_BUFFER_SIZE));
}
static void set_dev_entry_bit(u16 devid, u8 bit)
{
int i = (bit >> 5) & 0x07;
int _bit = bit & 0x1f;
amd_iommu_dev_table[devid].data[i] |= (1 << _bit);
}
static void __init set_dev_entry_from_acpi(u16 devid, u32 flags, u32 ext_flags)
{
if (flags & ACPI_DEVFLAG_INITPASS)
set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS);
if (flags & ACPI_DEVFLAG_EXTINT)
set_dev_entry_bit(devid, DEV_ENTRY_EINT_PASS);
if (flags & ACPI_DEVFLAG_NMI)
set_dev_entry_bit(devid, DEV_ENTRY_NMI_PASS);
if (flags & ACPI_DEVFLAG_SYSMGT1)
set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1);
if (flags & ACPI_DEVFLAG_SYSMGT2)
set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2);
if (flags & ACPI_DEVFLAG_LINT0)
set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS);
if (flags & ACPI_DEVFLAG_LINT1)
set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS);
}
static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid)
{
amd_iommu_rlookup_table[devid] = iommu;
}
static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
{
struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
if (!(m->flags & IVMD_FLAG_EXCL_RANGE))
return;
if (iommu) {
set_dev_entry_bit(m->devid, DEV_ENTRY_EX);
iommu->exclusion_start = m->range_start;
iommu->exclusion_length = m->range_length;
}
}
static void __init init_iommu_from_pci(struct amd_iommu *iommu)
{
int bus = PCI_BUS(iommu->devid);
int dev = PCI_SLOT(iommu->devid);
int fn = PCI_FUNC(iommu->devid);
int cap_ptr = iommu->cap_ptr;
u32 range;
iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET);
range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
iommu->first_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_FD(range));
iommu->last_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_LD(range));
}
static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
struct ivhd_header *h)
{
u8 *p = (u8 *)h;
u8 *end = p, flags = 0;
u16 dev_i, devid = 0, devid_start = 0, devid_to = 0;
u32 ext_flags = 0;
bool alias = 0;
struct ivhd_entry *e;
/*
* First set the recommended feature enable bits from ACPI
* into the IOMMU control registers
*/
h->flags & IVHD_FLAG_HT_TUN_EN ?
iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
h->flags & IVHD_FLAG_PASSPW_EN ?
iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
h->flags & IVHD_FLAG_RESPASSPW_EN ?
iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
h->flags & IVHD_FLAG_ISOC_EN ?
iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
iommu_feature_disable(iommu, CONTROL_ISOC_EN);
/*
* make IOMMU memory accesses cache coherent
*/
iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
/*
* Done. Now parse the device entries
*/
p += sizeof(struct ivhd_header);
end += h->length;
while (p < end) {
e = (struct ivhd_entry *)p;
switch (e->type) {
case IVHD_DEV_ALL:
for (dev_i = iommu->first_device;
dev_i <= iommu->last_device; ++dev_i)
set_dev_entry_from_acpi(dev_i, e->flags, 0);
break;
case IVHD_DEV_SELECT:
devid = e->devid;
set_dev_entry_from_acpi(devid, e->flags, 0);
break;
case IVHD_DEV_SELECT_RANGE_START:
devid_start = e->devid;
flags = e->flags;
ext_flags = 0;
alias = 0;
break;
case IVHD_DEV_ALIAS:
devid = e->devid;
devid_to = e->ext >> 8;
set_dev_entry_from_acpi(devid, e->flags, 0);
amd_iommu_alias_table[devid] = devid_to;
break;
case IVHD_DEV_ALIAS_RANGE:
devid_start = e->devid;
flags = e->flags;
devid_to = e->ext >> 8;
ext_flags = 0;
alias = 1;
break;
case IVHD_DEV_EXT_SELECT:
devid = e->devid;
set_dev_entry_from_acpi(devid, e->flags, e->ext);
break;
case IVHD_DEV_EXT_SELECT_RANGE:
devid_start = e->devid;
flags = e->flags;
ext_flags = e->ext;
alias = 0;
break;
case IVHD_DEV_RANGE_END:
devid = e->devid;
for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
if (alias)
amd_iommu_alias_table[dev_i] = devid_to;
set_dev_entry_from_acpi(
amd_iommu_alias_table[dev_i],
flags, ext_flags);
}
break;
default:
break;
}
p += 0x04 << (e->type >> 6);
}
}
static int __init init_iommu_devices(struct amd_iommu *iommu)
{
u16 i;
for (i = iommu->first_device; i <= iommu->last_device; ++i)
set_iommu_for_device(iommu, i);
return 0;
}
static void __init free_iommu_one(struct amd_iommu *iommu)
{
free_command_buffer(iommu);
iommu_unmap_mmio_space(iommu);
}
static void __init free_iommu_all(void)
{
struct amd_iommu *iommu, *next;
list_for_each_entry_safe(iommu, next, &amd_iommu_list, list) {
list_del(&iommu->list);
free_iommu_one(iommu);
kfree(iommu);
}
}
static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
{
spin_lock_init(&iommu->lock);
list_add_tail(&iommu->list, &amd_iommu_list);
/*
* Copy data from ACPI table entry to the iommu struct
*/
iommu->devid = h->devid;
iommu->cap_ptr = h->cap_ptr;
iommu->mmio_phys = h->mmio_phys;
iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys);
if (!iommu->mmio_base)
return -ENOMEM;
iommu_set_device_table(iommu);
iommu->cmd_buf = alloc_command_buffer(iommu);
if (!iommu->cmd_buf)
return -ENOMEM;
init_iommu_from_pci(iommu);
init_iommu_from_acpi(iommu, h);
init_iommu_devices(iommu);
return 0;
}
static int __init init_iommu_all(struct acpi_table_header *table)
{
u8 *p = (u8 *)table, *end = (u8 *)table;
struct ivhd_header *h;
struct amd_iommu *iommu;
int ret;
INIT_LIST_HEAD(&amd_iommu_list);
end += table->length;
p += IVRS_HEADER_LENGTH;
while (p < end) {
h = (struct ivhd_header *)p;
switch (*p) {
case ACPI_IVHD_TYPE:
iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
if (iommu == NULL)
return -ENOMEM;
ret = init_iommu_one(iommu, h);
if (ret)
return ret;
break;
default:
break;
}
p += h->length;
}
WARN_ON(p != end);
return 0;
}
static void __init free_unity_maps(void)
{
struct unity_map_entry *entry, *next;
list_for_each_entry_safe(entry, next, &amd_iommu_unity_map, list) {
list_del(&entry->list);
kfree(entry);
}
}
static int __init init_exclusion_range(struct ivmd_header *m)
{
int i;
switch (m->type) {
case ACPI_IVMD_TYPE:
set_device_exclusion_range(m->devid, m);
break;
case ACPI_IVMD_TYPE_ALL:
for (i = 0; i < amd_iommu_last_bdf; ++i)
set_device_exclusion_range(i, m);
break;
case ACPI_IVMD_TYPE_RANGE:
for (i = m->devid; i <= m->aux; ++i)
set_device_exclusion_range(i, m);
break;
default:
break;
}
return 0;
}
static int __init init_unity_map_range(struct ivmd_header *m)
{
struct unity_map_entry *e = 0;
e = kzalloc(sizeof(*e), GFP_KERNEL);
if (e == NULL)
return -ENOMEM;
switch (m->type) {
default:
case ACPI_IVMD_TYPE:
e->devid_start = e->devid_end = m->devid;
break;
case ACPI_IVMD_TYPE_ALL:
e->devid_start = 0;
e->devid_end = amd_iommu_last_bdf;
break;
case ACPI_IVMD_TYPE_RANGE:
e->devid_start = m->devid;
e->devid_end = m->aux;
break;
}
e->address_start = PAGE_ALIGN(m->range_start);
e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
e->prot = m->flags >> 1;
list_add_tail(&e->list, &amd_iommu_unity_map);
return 0;
}
static int __init init_memory_definitions(struct acpi_table_header *table)
{
u8 *p = (u8 *)table, *end = (u8 *)table;
struct ivmd_header *m;
INIT_LIST_HEAD(&amd_iommu_unity_map);
end += table->length;
p += IVRS_HEADER_LENGTH;
while (p < end) {
m = (struct ivmd_header *)p;
if (m->flags & IVMD_FLAG_EXCL_RANGE)
init_exclusion_range(m);
else if (m->flags & IVMD_FLAG_UNITY_MAP)
init_unity_map_range(m);
p += m->length;
}
return 0;
}
static void __init enable_iommus(void)
{
struct amd_iommu *iommu;
list_for_each_entry(iommu, &amd_iommu_list, list) {
iommu_set_exclusion_range(iommu);
iommu_enable(iommu);
}
}
/*
* Suspend/Resume support
* disable suspend until real resume implemented
*/
static int amd_iommu_resume(struct sys_device *dev)
{
return 0;
}
static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state)
{
return -EINVAL;
}
static struct sysdev_class amd_iommu_sysdev_class = {
.name = "amd_iommu",
.suspend = amd_iommu_suspend,
.resume = amd_iommu_resume,
};
static struct sys_device device_amd_iommu = {
.id = 0,
.cls = &amd_iommu_sysdev_class,
};
int __init amd_iommu_init(void)
{
int i, ret = 0;
if (no_iommu) {
printk(KERN_INFO "AMD IOMMU disabled by kernel command line\n");
return 0;
}
if (!amd_iommu_detected)
return -ENODEV;
/*
* First parse ACPI tables to find the largest Bus/Dev/Func
* we need to handle. Upon this information the shared data
* structures for the IOMMUs in the system will be allocated
*/
if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0)
return -ENODEV;
dev_table_size = TBL_SIZE(DEV_TABLE_ENTRY_SIZE);
alias_table_size = TBL_SIZE(ALIAS_TABLE_ENTRY_SIZE);
rlookup_table_size = TBL_SIZE(RLOOKUP_TABLE_ENTRY_SIZE);
ret = -ENOMEM;
/* Device table - directly used by all IOMMUs */
amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL,
get_order(dev_table_size));
if (amd_iommu_dev_table == NULL)
goto out;
/*
* Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the
* IOMMU see for that device
*/
amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL,
get_order(alias_table_size));
if (amd_iommu_alias_table == NULL)
goto free;
/* IOMMU rlookup table - find the IOMMU for a specific device */
amd_iommu_rlookup_table = (void *)__get_free_pages(GFP_KERNEL,
get_order(rlookup_table_size));
if (amd_iommu_rlookup_table == NULL)
goto free;
/*
* Protection Domain table - maps devices to protection domains
* This table has the same size as the rlookup_table
*/
amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL,
get_order(rlookup_table_size));
if (amd_iommu_pd_table == NULL)
goto free;
amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(GFP_KERNEL,
get_order(MAX_DOMAIN_ID/8));
if (amd_iommu_pd_alloc_bitmap == NULL)
goto free;
/*
* memory is allocated now; initialize the device table with all zeroes
* and let all alias entries point to itself
*/
memset(amd_iommu_dev_table, 0, dev_table_size);
for (i = 0; i < amd_iommu_last_bdf; ++i)
amd_iommu_alias_table[i] = i;
memset(amd_iommu_pd_table, 0, rlookup_table_size);
memset(amd_iommu_pd_alloc_bitmap, 0, MAX_DOMAIN_ID / 8);
/*
* never allocate domain 0 because its used as the non-allocated and
* error value placeholder
*/
amd_iommu_pd_alloc_bitmap[0] = 1;
/*
* now the data structures are allocated and basically initialized
* start the real acpi table scan
*/
ret = -ENODEV;
if (acpi_table_parse("IVRS", init_iommu_all) != 0)
goto free;
if (acpi_table_parse("IVRS", init_memory_definitions) != 0)
goto free;
ret = amd_iommu_init_dma_ops();
if (ret)
goto free;
ret = sysdev_class_register(&amd_iommu_sysdev_class);
if (ret)
goto free;
ret = sysdev_register(&device_amd_iommu);
if (ret)
goto free;
enable_iommus();
printk(KERN_INFO "AMD IOMMU: aperture size is %d MB\n",
(1 << (amd_iommu_aperture_order-20)));
printk(KERN_INFO "AMD IOMMU: device isolation ");
if (amd_iommu_isolate)
printk("enabled\n");
else
printk("disabled\n");
out:
return ret;
free:
if (amd_iommu_pd_alloc_bitmap)
free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1);
if (amd_iommu_pd_table)
free_pages((unsigned long)amd_iommu_pd_table,
get_order(rlookup_table_size));
if (amd_iommu_rlookup_table)
free_pages((unsigned long)amd_iommu_rlookup_table,
get_order(rlookup_table_size));
if (amd_iommu_alias_table)
free_pages((unsigned long)amd_iommu_alias_table,
get_order(alias_table_size));
if (amd_iommu_dev_table)
free_pages((unsigned long)amd_iommu_dev_table,
get_order(dev_table_size));
free_iommu_all();
free_unity_maps();
goto out;
}
static int __init early_amd_iommu_detect(struct acpi_table_header *table)
{
return 0;
}
void __init amd_iommu_detect(void)
{
if (swiotlb || no_iommu || iommu_detected)
return;
if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) {
iommu_detected = 1;
amd_iommu_detected = 1;
#ifdef CONFIG_GART_IOMMU
gart_iommu_aperture_disabled = 1;
gart_iommu_aperture = 0;
#endif
}
}
static int __init parse_amd_iommu_options(char *str)
{
for (; *str; ++str) {
if (strcmp(str, "isolate") == 0)
amd_iommu_isolate = 1;
}
return 1;
}
static int __init parse_amd_iommu_size_options(char *str)
{
for (; *str; ++str) {
if (strcmp(str, "32M") == 0)
amd_iommu_aperture_order = 25;
if (strcmp(str, "64M") == 0)
amd_iommu_aperture_order = 26;
if (strcmp(str, "128M") == 0)
amd_iommu_aperture_order = 27;
if (strcmp(str, "256M") == 0)
amd_iommu_aperture_order = 28;
if (strcmp(str, "512M") == 0)
amd_iommu_aperture_order = 29;
if (strcmp(str, "1G") == 0)
amd_iommu_aperture_order = 30;
}
return 1;
}
__setup("amd_iommu=", parse_amd_iommu_options);
__setup("amd_iommu_size=", parse_amd_iommu_size_options);
...@@ -925,11 +925,11 @@ error_kernelspace: ...@@ -925,11 +925,11 @@ error_kernelspace:
iret run with kernel gs again, so don't set the user space flag. iret run with kernel gs again, so don't set the user space flag.
B stepping K8s sometimes report an truncated RIP for IRET B stepping K8s sometimes report an truncated RIP for IRET
exceptions returning to compat mode. Check for these here too. */ exceptions returning to compat mode. Check for these here too. */
leaq irq_return(%rip),%rbp leaq irq_return(%rip),%rcx
cmpq %rbp,RIP(%rsp) cmpq %rcx,RIP(%rsp)
je error_swapgs je error_swapgs
movl %ebp,%ebp /* zero extend */ movl %ecx,%ecx /* zero extend */
cmpq %rbp,RIP(%rsp) cmpq %rcx,RIP(%rsp)
je error_swapgs je error_swapgs
cmpq $gs_change,RIP(%rsp) cmpq $gs_change,RIP(%rsp)
je error_swapgs je error_swapgs
......
...@@ -162,7 +162,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, ...@@ -162,7 +162,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
int ret; int ret;
if (!cpu_has_fxsr) if (!cpu_has_fxsr)
return -EIO; return -ENODEV;
ret = init_fpu(target); ret = init_fpu(target);
if (ret) if (ret)
...@@ -179,7 +179,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, ...@@ -179,7 +179,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
int ret; int ret;
if (!cpu_has_fxsr) if (!cpu_has_fxsr)
return -EIO; return -ENODEV;
ret = init_fpu(target); ret = init_fpu(target);
if (ret) if (ret)
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include <asm/dma.h> #include <asm/dma.h>
#include <asm/gart.h> #include <asm/gart.h>
#include <asm/calgary.h> #include <asm/calgary.h>
#include <asm/amd_iommu.h>
int forbid_dac __read_mostly; int forbid_dac __read_mostly;
EXPORT_SYMBOL(forbid_dac); EXPORT_SYMBOL(forbid_dac);
...@@ -123,6 +124,8 @@ void __init pci_iommu_alloc(void) ...@@ -123,6 +124,8 @@ void __init pci_iommu_alloc(void)
detect_intel_iommu(); detect_intel_iommu();
amd_iommu_detect();
#ifdef CONFIG_SWIOTLB #ifdef CONFIG_SWIOTLB
pci_swiotlb_init(); pci_swiotlb_init();
#endif #endif
...@@ -503,6 +506,8 @@ static int __init pci_iommu_init(void) ...@@ -503,6 +506,8 @@ static int __init pci_iommu_init(void)
intel_iommu_init(); intel_iommu_init();
amd_iommu_init();
#ifdef CONFIG_GART_IOMMU #ifdef CONFIG_GART_IOMMU
gart_iommu_init(); gart_iommu_init();
#endif #endif
......
...@@ -943,13 +943,13 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) ...@@ -943,13 +943,13 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
return copy_regset_to_user(child, &user_x86_32_view, return copy_regset_to_user(child, &user_x86_32_view,
REGSET_XFP, REGSET_XFP,
0, sizeof(struct user_fxsr_struct), 0, sizeof(struct user_fxsr_struct),
datap); datap) ? -EIO : 0;
case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */ case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */
return copy_regset_from_user(child, &user_x86_32_view, return copy_regset_from_user(child, &user_x86_32_view,
REGSET_XFP, REGSET_XFP,
0, sizeof(struct user_fxsr_struct), 0, sizeof(struct user_fxsr_struct),
datap); datap) ? -EIO : 0;
#endif #endif
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
......
...@@ -123,6 +123,8 @@ void __init time_init(void) ...@@ -123,6 +123,8 @@ void __init time_init(void)
(boot_cpu_data.x86_vendor == X86_VENDOR_AMD)) (boot_cpu_data.x86_vendor == X86_VENDOR_AMD))
cpu_khz = calculate_cpu_khz(); cpu_khz = calculate_cpu_khz();
lpj_fine = ((unsigned long)tsc_khz * 1000)/HZ;
if (unsynchronized_tsc()) if (unsynchronized_tsc())
mark_tsc_unstable("TSCs unsynchronized"); mark_tsc_unstable("TSCs unsynchronized");
......
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/clocksource.h> #include <linux/clocksource.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/delay.h>
#include <linux/cpufreq.h> #include <linux/cpufreq.h>
#include <linux/jiffies.h> #include <linux/jiffies.h>
#include <linux/init.h> #include <linux/init.h>
...@@ -403,6 +404,7 @@ static inline void check_geode_tsc_reliable(void) { } ...@@ -403,6 +404,7 @@ static inline void check_geode_tsc_reliable(void) { }
void __init tsc_init(void) void __init tsc_init(void)
{ {
int cpu; int cpu;
u64 lpj;
if (!cpu_has_tsc || tsc_disabled > 0) if (!cpu_has_tsc || tsc_disabled > 0)
return; return;
...@@ -415,6 +417,10 @@ void __init tsc_init(void) ...@@ -415,6 +417,10 @@ void __init tsc_init(void)
return; return;
} }
lpj = ((u64)tsc_khz * 1000);
do_div(lpj, HZ);
lpj_fine = lpj;
/* now allow native_sched_clock() to use rdtsc */ /* now allow native_sched_clock() to use rdtsc */
tsc_disabled = 0; tsc_disabled = 0;
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
* *
* Copyright (C) 1993 Linus Torvalds * Copyright (C) 1993 Linus Torvalds
* Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz> * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
* Copyright (C) 2008 Jiri Hladky <hladky _dot_ jiri _at_ gmail _dot_ com>
* *
* The __delay function must _NOT_ be inlined as its execution time * The __delay function must _NOT_ be inlined as its execution time
* depends wildly on alignment on many x86 processors. The additional * depends wildly on alignment on many x86 processors. The additional
...@@ -28,16 +29,22 @@ ...@@ -28,16 +29,22 @@
/* simple loop based delay: */ /* simple loop based delay: */
static void delay_loop(unsigned long loops) static void delay_loop(unsigned long loops)
{ {
int d0;
__asm__ __volatile__( __asm__ __volatile__(
"\tjmp 1f\n" " test %0,%0 \n"
".align 16\n" " jz 3f \n"
"1:\tjmp 2f\n" " jmp 1f \n"
".align 16\n"
"2:\tdecl %0\n\tjns 2b" ".align 16 \n"
:"=&a" (d0) "1: jmp 2f \n"
:"0" (loops));
".align 16 \n"
"2: decl %0 \n"
" jnz 2b \n"
"3: decl %0 \n"
: /* we don't need output */
:"a" (loops)
);
} }
/* TSC based delay: */ /* TSC based delay: */
......
...@@ -392,11 +392,7 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, ...@@ -392,11 +392,7 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
printk(KERN_CONT "NULL pointer dereference"); printk(KERN_CONT "NULL pointer dereference");
else else
printk(KERN_CONT "paging request"); printk(KERN_CONT "paging request");
#ifdef CONFIG_X86_32 printk(KERN_CONT " at %p\n", (void *) address);
printk(KERN_CONT " at %08lx\n", address);
#else
printk(KERN_CONT " at %016lx\n", address);
#endif
printk(KERN_ALERT "IP:"); printk(KERN_ALERT "IP:");
printk_address(regs->ip, 1); printk_address(regs->ip, 1);
dump_pagetable(address); dump_pagetable(address);
...@@ -796,14 +792,10 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) ...@@ -796,14 +792,10 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
printk_ratelimit()) { printk_ratelimit()) {
printk( printk(
#ifdef CONFIG_X86_32 "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
"%s%s[%d]: segfault at %lx ip %08lx sp %08lx error %lx",
#else
"%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx",
#endif
task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
tsk->comm, task_pid_nr(tsk), address, regs->ip, tsk->comm, task_pid_nr(tsk), address,
regs->sp, error_code); (void *) regs->ip, (void *) regs->sp, error_code);
print_vma_addr(" in ", regs->ip); print_vma_addr(" in ", regs->ip);
printk("\n"); printk("\n");
} }
......
...@@ -269,12 +269,13 @@ static void nmi_cpu_shutdown(void *dummy) ...@@ -269,12 +269,13 @@ static void nmi_cpu_shutdown(void *dummy)
static void nmi_shutdown(void) static void nmi_shutdown(void)
{ {
struct op_msrs *msrs = &__get_cpu_var(cpu_msrs); struct op_msrs *msrs = &get_cpu_var(cpu_msrs);
nmi_enabled = 0; nmi_enabled = 0;
on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1); on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
unregister_die_notifier(&profile_exceptions_nb); unregister_die_notifier(&profile_exceptions_nb);
model->shutdown(msrs); model->shutdown(msrs);
free_msrs(); free_msrs();
put_cpu_var(cpu_msrs);
} }
static void nmi_cpu_start(void *dummy) static void nmi_cpu_start(void *dummy)
......
/*
* Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
* Author: Joerg Roedel <joerg.roedel@amd.com>
* Leo Duran <leo.duran@amd.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _ASM_X86_AMD_IOMMU_H
#define _ASM_X86_AMD_IOMMU_H
#ifdef CONFIG_AMD_IOMMU
extern int amd_iommu_init(void);
extern int amd_iommu_init_dma_ops(void);
extern void amd_iommu_detect(void);
#else
static inline int amd_iommu_init(void) { return -ENODEV; }
static inline void amd_iommu_detect(void) { }
#endif
#endif
/*
* Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
* Author: Joerg Roedel <joerg.roedel@amd.com>
* Leo Duran <leo.duran@amd.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef __AMD_IOMMU_TYPES_H__
#define __AMD_IOMMU_TYPES_H__
#include <linux/types.h>
#include <linux/list.h>
#include <linux/spinlock.h>
/*
* some size calculation constants
*/
#define DEV_TABLE_ENTRY_SIZE 256
#define ALIAS_TABLE_ENTRY_SIZE 2
#define RLOOKUP_TABLE_ENTRY_SIZE (sizeof(void *))
/* helper macros */
#define LOW_U32(x) ((x) & ((1ULL << 32)-1))
#define HIGH_U32(x) (LOW_U32((x) >> 32))
/* Length of the MMIO region for the AMD IOMMU */
#define MMIO_REGION_LENGTH 0x4000
/* Capability offsets used by the driver */
#define MMIO_CAP_HDR_OFFSET 0x00
#define MMIO_RANGE_OFFSET 0x0c
/* Masks, shifts and macros to parse the device range capability */
#define MMIO_RANGE_LD_MASK 0xff000000
#define MMIO_RANGE_FD_MASK 0x00ff0000
#define MMIO_RANGE_BUS_MASK 0x0000ff00
#define MMIO_RANGE_LD_SHIFT 24
#define MMIO_RANGE_FD_SHIFT 16
#define MMIO_RANGE_BUS_SHIFT 8
#define MMIO_GET_LD(x) (((x) & MMIO_RANGE_LD_MASK) >> MMIO_RANGE_LD_SHIFT)
#define MMIO_GET_FD(x) (((x) & MMIO_RANGE_FD_MASK) >> MMIO_RANGE_FD_SHIFT)
#define MMIO_GET_BUS(x) (((x) & MMIO_RANGE_BUS_MASK) >> MMIO_RANGE_BUS_SHIFT)
/* Flag masks for the AMD IOMMU exclusion range */
#define MMIO_EXCL_ENABLE_MASK 0x01ULL
#define MMIO_EXCL_ALLOW_MASK 0x02ULL
/* Used offsets into the MMIO space */
#define MMIO_DEV_TABLE_OFFSET 0x0000
#define MMIO_CMD_BUF_OFFSET 0x0008
#define MMIO_EVT_BUF_OFFSET 0x0010
#define MMIO_CONTROL_OFFSET 0x0018
#define MMIO_EXCL_BASE_OFFSET 0x0020
#define MMIO_EXCL_LIMIT_OFFSET 0x0028
#define MMIO_CMD_HEAD_OFFSET 0x2000
#define MMIO_CMD_TAIL_OFFSET 0x2008
#define MMIO_EVT_HEAD_OFFSET 0x2010
#define MMIO_EVT_TAIL_OFFSET 0x2018
#define MMIO_STATUS_OFFSET 0x2020
/* feature control bits */
#define CONTROL_IOMMU_EN 0x00ULL
#define CONTROL_HT_TUN_EN 0x01ULL
#define CONTROL_EVT_LOG_EN 0x02ULL
#define CONTROL_EVT_INT_EN 0x03ULL
#define CONTROL_COMWAIT_EN 0x04ULL
#define CONTROL_PASSPW_EN 0x08ULL
#define CONTROL_RESPASSPW_EN 0x09ULL
#define CONTROL_COHERENT_EN 0x0aULL
#define CONTROL_ISOC_EN 0x0bULL
#define CONTROL_CMDBUF_EN 0x0cULL
#define CONTROL_PPFLOG_EN 0x0dULL
#define CONTROL_PPFINT_EN 0x0eULL
/* command specific defines */
#define CMD_COMPL_WAIT 0x01
#define CMD_INV_DEV_ENTRY 0x02
#define CMD_INV_IOMMU_PAGES 0x03
#define CMD_COMPL_WAIT_STORE_MASK 0x01
#define CMD_INV_IOMMU_PAGES_SIZE_MASK 0x01
#define CMD_INV_IOMMU_PAGES_PDE_MASK 0x02
#define CMD_INV_IOMMU_ALL_PAGES_ADDRESS 0x7fffffffffffffffULL
/* macros and definitions for device table entries */
#define DEV_ENTRY_VALID 0x00
#define DEV_ENTRY_TRANSLATION 0x01
#define DEV_ENTRY_IR 0x3d
#define DEV_ENTRY_IW 0x3e
#define DEV_ENTRY_EX 0x67
#define DEV_ENTRY_SYSMGT1 0x68
#define DEV_ENTRY_SYSMGT2 0x69
#define DEV_ENTRY_INIT_PASS 0xb8
#define DEV_ENTRY_EINT_PASS 0xb9
#define DEV_ENTRY_NMI_PASS 0xba
#define DEV_ENTRY_LINT0_PASS 0xbe
#define DEV_ENTRY_LINT1_PASS 0xbf
/* constants to configure the command buffer */
#define CMD_BUFFER_SIZE 8192
#define CMD_BUFFER_ENTRIES 512
#define MMIO_CMD_SIZE_SHIFT 56
#define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT)
#define PAGE_MODE_1_LEVEL 0x01
#define PAGE_MODE_2_LEVEL 0x02
#define PAGE_MODE_3_LEVEL 0x03
#define IOMMU_PDE_NL_0 0x000ULL
#define IOMMU_PDE_NL_1 0x200ULL
#define IOMMU_PDE_NL_2 0x400ULL
#define IOMMU_PDE_NL_3 0x600ULL
#define IOMMU_PTE_L2_INDEX(address) (((address) >> 30) & 0x1ffULL)
#define IOMMU_PTE_L1_INDEX(address) (((address) >> 21) & 0x1ffULL)
#define IOMMU_PTE_L0_INDEX(address) (((address) >> 12) & 0x1ffULL)
#define IOMMU_MAP_SIZE_L1 (1ULL << 21)
#define IOMMU_MAP_SIZE_L2 (1ULL << 30)
#define IOMMU_MAP_SIZE_L3 (1ULL << 39)
#define IOMMU_PTE_P (1ULL << 0)
#define IOMMU_PTE_U (1ULL << 59)
#define IOMMU_PTE_FC (1ULL << 60)
#define IOMMU_PTE_IR (1ULL << 61)
#define IOMMU_PTE_IW (1ULL << 62)
#define IOMMU_L1_PDE(address) \
((address) | IOMMU_PDE_NL_1 | IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW)
#define IOMMU_L2_PDE(address) \
((address) | IOMMU_PDE_NL_2 | IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW)
#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P)
#define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK))
#define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07)
#define IOMMU_PROT_MASK 0x03
#define IOMMU_PROT_IR 0x01
#define IOMMU_PROT_IW 0x02
/* IOMMU capabilities */
#define IOMMU_CAP_IOTLB 24
#define IOMMU_CAP_NPCACHE 26
#define MAX_DOMAIN_ID 65536
struct protection_domain {
spinlock_t lock;
u16 id;
int mode;
u64 *pt_root;
void *priv;
};
struct dma_ops_domain {
struct list_head list;
struct protection_domain domain;
unsigned long aperture_size;
unsigned long next_bit;
unsigned long *bitmap;
u64 **pte_pages;
};
struct amd_iommu {
struct list_head list;
spinlock_t lock;
u16 devid;
u16 cap_ptr;
u64 mmio_phys;
u8 *mmio_base;
u32 cap;
u16 first_device;
u16 last_device;
u64 exclusion_start;
u64 exclusion_length;
u8 *cmd_buf;
u32 cmd_buf_size;
int need_sync;
struct dma_ops_domain *default_dom;
};
extern struct list_head amd_iommu_list;
struct dev_table_entry {
u32 data[8];
};
struct unity_map_entry {
struct list_head list;
u16 devid_start;
u16 devid_end;
u64 address_start;
u64 address_end;
int prot;
};
extern struct list_head amd_iommu_unity_map;
/* data structures for device handling */
extern struct dev_table_entry *amd_iommu_dev_table;
extern u16 *amd_iommu_alias_table;
extern struct amd_iommu **amd_iommu_rlookup_table;
extern unsigned amd_iommu_aperture_order;
extern u16 amd_iommu_last_bdf;
/* data structures for protection domain handling */
extern struct protection_domain **amd_iommu_pd_table;
extern unsigned long *amd_iommu_pd_alloc_bitmap;
extern int amd_iommu_isolate;
static inline void print_devid(u16 devid, int nl)
{
int bus = devid >> 8;
int dev = devid >> 3 & 0x1f;
int fn = devid & 0x07;
printk("%02x:%02x.%x", bus, dev, fn);
if (nl)
printk("\n");
}
#endif
...@@ -106,6 +106,7 @@ ...@@ -106,6 +106,7 @@
/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ /* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
#define X86_FEATURE_LAHF_LM (6*32+ 0) /* LAHF/SAHF in long mode */ #define X86_FEATURE_LAHF_LM (6*32+ 0) /* LAHF/SAHF in long mode */
#define X86_FEATURE_CMP_LEGACY (6*32+ 1) /* If yes HyperThreading not valid */ #define X86_FEATURE_CMP_LEGACY (6*32+ 1) /* If yes HyperThreading not valid */
#define X86_FEATURE_IBS (6*32+ 10) /* Instruction Based Sampling */
/* /*
* Auxiliary flags: Linux defined - For features scattered in various * Auxiliary flags: Linux defined - For features scattered in various
......
...@@ -22,8 +22,9 @@ extern int gart_iommu_aperture_allowed; ...@@ -22,8 +22,9 @@ extern int gart_iommu_aperture_allowed;
extern int gart_iommu_aperture_disabled; extern int gart_iommu_aperture_disabled;
extern int fix_aperture; extern int fix_aperture;
#else #else
#define gart_iommu_aperture 0 #define gart_iommu_aperture 0
#define gart_iommu_aperture_allowed 0 #define gart_iommu_aperture_allowed 0
#define gart_iommu_aperture_disabled 1
static inline void early_gart_iommu_check(void) static inline void early_gart_iommu_check(void)
{ {
......
...@@ -3,6 +3,62 @@ ...@@ -3,6 +3,62 @@
#define ARCH_HAS_IOREMAP_WC #define ARCH_HAS_IOREMAP_WC
#include <linux/compiler.h>
#define build_mmio_read(name, size, type, reg, barrier) \
static inline type name(const volatile void __iomem *addr) \
{ type ret; asm volatile("mov" size " %1,%0":"=" reg (ret) \
:"m" (*(volatile type __force *)addr) barrier); return ret; }
#define build_mmio_write(name, size, type, reg, barrier) \
static inline void name(type val, volatile void __iomem *addr) \
{ asm volatile("mov" size " %0,%1": :reg (val), \
"m" (*(volatile type __force *)addr) barrier); }
build_mmio_read(readb, "b", unsigned char, "q", :"memory")
build_mmio_read(readw, "w", unsigned short, "r", :"memory")
build_mmio_read(readl, "l", unsigned int, "r", :"memory")
build_mmio_read(__readb, "b", unsigned char, "q", )
build_mmio_read(__readw, "w", unsigned short, "r", )
build_mmio_read(__readl, "l", unsigned int, "r", )
build_mmio_write(writeb, "b", unsigned char, "q", :"memory")
build_mmio_write(writew, "w", unsigned short, "r", :"memory")
build_mmio_write(writel, "l", unsigned int, "r", :"memory")
build_mmio_write(__writeb, "b", unsigned char, "q", )
build_mmio_write(__writew, "w", unsigned short, "r", )
build_mmio_write(__writel, "l", unsigned int, "r", )
#define readb_relaxed(a) __readb(a)
#define readw_relaxed(a) __readw(a)
#define readl_relaxed(a) __readl(a)
#define __raw_readb __readb
#define __raw_readw __readw
#define __raw_readl __readl
#define __raw_writeb __writeb
#define __raw_writew __writew
#define __raw_writel __writel
#define mmiowb() barrier()
#ifdef CONFIG_X86_64
build_mmio_read(readq, "q", unsigned long, "r", :"memory")
build_mmio_read(__readq, "q", unsigned long, "r", )
build_mmio_write(writeq, "q", unsigned long, "r", :"memory")
build_mmio_write(__writeq, "q", unsigned long, "r", )
#define readq_relaxed(a) __readq(a)
#define __raw_readq __readq
#define __raw_writeq writeq
/* Let people know we have them */
#define readq readq
#define writeq writeq
#endif
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
# include "io_32.h" # include "io_32.h"
#else #else
......
...@@ -149,55 +149,6 @@ extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); ...@@ -149,55 +149,6 @@ extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
#define virt_to_bus virt_to_phys #define virt_to_bus virt_to_phys
#define bus_to_virt phys_to_virt #define bus_to_virt phys_to_virt
/*
* readX/writeX() are used to access memory mapped devices. On some
* architectures the memory mapped IO stuff needs to be accessed
* differently. On the x86 architecture, we just read/write the
* memory location directly.
*/
static inline unsigned char readb(const volatile void __iomem *addr)
{
return *(volatile unsigned char __force *)addr;
}
static inline unsigned short readw(const volatile void __iomem *addr)
{
return *(volatile unsigned short __force *)addr;
}
static inline unsigned int readl(const volatile void __iomem *addr)
{
return *(volatile unsigned int __force *) addr;
}
#define readb_relaxed(addr) readb(addr)
#define readw_relaxed(addr) readw(addr)
#define readl_relaxed(addr) readl(addr)
#define __raw_readb readb
#define __raw_readw readw
#define __raw_readl readl
static inline void writeb(unsigned char b, volatile void __iomem *addr)
{
*(volatile unsigned char __force *)addr = b;
}
static inline void writew(unsigned short b, volatile void __iomem *addr)
{
*(volatile unsigned short __force *)addr = b;
}
static inline void writel(unsigned int b, volatile void __iomem *addr)
{
*(volatile unsigned int __force *)addr = b;
}
#define __raw_writeb writeb
#define __raw_writew writew
#define __raw_writel writel
#define mmiowb()
static inline void static inline void
memset_io(volatile void __iomem *addr, unsigned char val, int count) memset_io(volatile void __iomem *addr, unsigned char val, int count)
{ {
......
...@@ -204,77 +204,6 @@ extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); ...@@ -204,77 +204,6 @@ extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
#define virt_to_bus virt_to_phys #define virt_to_bus virt_to_phys
#define bus_to_virt phys_to_virt #define bus_to_virt phys_to_virt
/*
* readX/writeX() are used to access memory mapped devices. On some
* architectures the memory mapped IO stuff needs to be accessed
* differently. On the x86 architecture, we just read/write the
* memory location directly.
*/
static inline __u8 __readb(const volatile void __iomem *addr)
{
return *(__force volatile __u8 *)addr;
}
static inline __u16 __readw(const volatile void __iomem *addr)
{
return *(__force volatile __u16 *)addr;
}
static __always_inline __u32 __readl(const volatile void __iomem *addr)
{
return *(__force volatile __u32 *)addr;
}
static inline __u64 __readq(const volatile void __iomem *addr)
{
return *(__force volatile __u64 *)addr;
}
#define readb(x) __readb(x)
#define readw(x) __readw(x)
#define readl(x) __readl(x)
#define readq(x) __readq(x)
#define readb_relaxed(a) readb(a)
#define readw_relaxed(a) readw(a)
#define readl_relaxed(a) readl(a)
#define readq_relaxed(a) readq(a)
#define __raw_readb readb
#define __raw_readw readw
#define __raw_readl readl
#define __raw_readq readq
#define mmiowb()
static inline void __writel(__u32 b, volatile void __iomem *addr)
{
*(__force volatile __u32 *)addr = b;
}
static inline void __writeq(__u64 b, volatile void __iomem *addr)
{
*(__force volatile __u64 *)addr = b;
}
static inline void __writeb(__u8 b, volatile void __iomem *addr)
{
*(__force volatile __u8 *)addr = b;
}
static inline void __writew(__u16 b, volatile void __iomem *addr)
{
*(__force volatile __u16 *)addr = b;
}
#define writeq(val, addr) __writeq((val), (addr))
#define writel(val, addr) __writel((val), (addr))
#define writew(val, addr) __writew((val), (addr))
#define writeb(val, addr) __writeb((val), (addr))
#define __raw_writeb writeb
#define __raw_writew writew
#define __raw_writel writel
#define __raw_writeq writeq
void __memcpy_fromio(void *, unsigned long, unsigned); void __memcpy_fromio(void *, unsigned long, unsigned);
void __memcpy_toio(unsigned long, const void *, unsigned); void __memcpy_toio(unsigned long, const void *, unsigned);
......
...@@ -41,6 +41,7 @@ static inline void ndelay(unsigned long x) ...@@ -41,6 +41,7 @@ static inline void ndelay(unsigned long x)
#define ndelay(x) ndelay(x) #define ndelay(x) ndelay(x)
#endif #endif
extern unsigned long lpj_fine;
void calibrate_delay(void); void calibrate_delay(void);
void msleep(unsigned int msecs); void msleep(unsigned int msecs);
unsigned long msleep_interruptible(unsigned int msecs); unsigned long msleep_interruptible(unsigned int msecs);
......
...@@ -8,7 +8,9 @@ ...@@ -8,7 +8,9 @@
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/timex.h> #include <linux/timex.h>
#include <linux/smp.h>
unsigned long lpj_fine;
unsigned long preset_lpj; unsigned long preset_lpj;
static int __init lpj_setup(char *str) static int __init lpj_setup(char *str)
{ {
...@@ -33,9 +35,9 @@ static unsigned long __cpuinit calibrate_delay_direct(void) ...@@ -33,9 +35,9 @@ static unsigned long __cpuinit calibrate_delay_direct(void)
unsigned long pre_start, start, post_start; unsigned long pre_start, start, post_start;
unsigned long pre_end, end, post_end; unsigned long pre_end, end, post_end;
unsigned long start_jiffies; unsigned long start_jiffies;
unsigned long tsc_rate_min, tsc_rate_max; unsigned long timer_rate_min, timer_rate_max;
unsigned long good_tsc_sum = 0; unsigned long good_timer_sum = 0;
unsigned long good_tsc_count = 0; unsigned long good_timer_count = 0;
int i; int i;
if (read_current_timer(&pre_start) < 0 ) if (read_current_timer(&pre_start) < 0 )
...@@ -79,22 +81,24 @@ static unsigned long __cpuinit calibrate_delay_direct(void) ...@@ -79,22 +81,24 @@ static unsigned long __cpuinit calibrate_delay_direct(void)
} }
read_current_timer(&post_end); read_current_timer(&post_end);
tsc_rate_max = (post_end - pre_start) / DELAY_CALIBRATION_TICKS; timer_rate_max = (post_end - pre_start) /
tsc_rate_min = (pre_end - post_start) / DELAY_CALIBRATION_TICKS; DELAY_CALIBRATION_TICKS;
timer_rate_min = (pre_end - post_start) /
DELAY_CALIBRATION_TICKS;
/* /*
* If the upper limit and lower limit of the tsc_rate is * If the upper limit and lower limit of the timer_rate is
* >= 12.5% apart, redo calibration. * >= 12.5% apart, redo calibration.
*/ */
if (pre_start != 0 && pre_end != 0 && if (pre_start != 0 && pre_end != 0 &&
(tsc_rate_max - tsc_rate_min) < (tsc_rate_max >> 3)) { (timer_rate_max - timer_rate_min) < (timer_rate_max >> 3)) {
good_tsc_count++; good_timer_count++;
good_tsc_sum += tsc_rate_max; good_timer_sum += timer_rate_max;
} }
} }
if (good_tsc_count) if (good_timer_count)
return (good_tsc_sum/good_tsc_count); return (good_timer_sum/good_timer_count);
printk(KERN_WARNING "calibrate_delay_direct() failed to get a good " printk(KERN_WARNING "calibrate_delay_direct() failed to get a good "
"estimate for loops_per_jiffy.\nProbably due to long platform interrupts. Consider using \"lpj=\" boot option.\n"); "estimate for loops_per_jiffy.\nProbably due to long platform interrupts. Consider using \"lpj=\" boot option.\n");
...@@ -108,6 +112,10 @@ static unsigned long __cpuinit calibrate_delay_direct(void) {return 0;} ...@@ -108,6 +112,10 @@ static unsigned long __cpuinit calibrate_delay_direct(void) {return 0;}
* This is the number of bits of precision for the loops_per_jiffy. Each * This is the number of bits of precision for the loops_per_jiffy. Each
* bit takes on average 1.5/HZ seconds. This (like the original) is a little * bit takes on average 1.5/HZ seconds. This (like the original) is a little
* better than 1% * better than 1%
* For the boot cpu we can skip the delay calibration and assign it a value
* calculated based on the timer frequency.
* For the rest of the CPUs we cannot assume that the timer frequency is same as
* the cpu frequency, hence do the calibration for those.
*/ */
#define LPS_PREC 8 #define LPS_PREC 8
...@@ -118,20 +126,20 @@ void __cpuinit calibrate_delay(void) ...@@ -118,20 +126,20 @@ void __cpuinit calibrate_delay(void)
if (preset_lpj) { if (preset_lpj) {
loops_per_jiffy = preset_lpj; loops_per_jiffy = preset_lpj;
printk("Calibrating delay loop (skipped)... " printk(KERN_INFO
"%lu.%02lu BogoMIPS preset\n", "Calibrating delay loop (skipped) preset value.. ");
loops_per_jiffy/(500000/HZ), } else if ((smp_processor_id() == 0) && lpj_fine) {
(loops_per_jiffy/(5000/HZ)) % 100); loops_per_jiffy = lpj_fine;
printk(KERN_INFO
"Calibrating delay loop (skipped), "
"value calculated using timer frequency.. ");
} else if ((loops_per_jiffy = calibrate_delay_direct()) != 0) { } else if ((loops_per_jiffy = calibrate_delay_direct()) != 0) {
printk("Calibrating delay using timer specific routine.. "); printk(KERN_INFO
printk("%lu.%02lu BogoMIPS (lpj=%lu)\n", "Calibrating delay using timer specific routine.. ");
loops_per_jiffy/(500000/HZ),
(loops_per_jiffy/(5000/HZ)) % 100,
loops_per_jiffy);
} else { } else {
loops_per_jiffy = (1<<12); loops_per_jiffy = (1<<12);
printk(KERN_DEBUG "Calibrating delay loop... "); printk(KERN_INFO "Calibrating delay loop... ");
while ((loops_per_jiffy <<= 1) != 0) { while ((loops_per_jiffy <<= 1) != 0) {
/* wait for "start of" clock tick */ /* wait for "start of" clock tick */
ticks = jiffies; ticks = jiffies;
...@@ -161,12 +169,8 @@ void __cpuinit calibrate_delay(void) ...@@ -161,12 +169,8 @@ void __cpuinit calibrate_delay(void)
if (jiffies != ticks) /* longer than 1 tick */ if (jiffies != ticks) /* longer than 1 tick */
loops_per_jiffy &= ~loopbit; loops_per_jiffy &= ~loopbit;
} }
/* Round the value and print it */
printk("%lu.%02lu BogoMIPS (lpj=%lu)\n",
loops_per_jiffy/(500000/HZ),
(loops_per_jiffy/(5000/HZ)) % 100,
loops_per_jiffy);
} }
printk(KERN_INFO "%lu.%02lu BogoMIPS (lpj=%lu)\n",
loops_per_jiffy/(500000/HZ),
(loops_per_jiffy/(5000/HZ)) % 100, loops_per_jiffy);
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment