Commit 3f6280dd authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'iommu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'iommu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (61 commits)
  amd-iommu: remove unnecessary "AMD IOMMU: " prefix
  amd-iommu: detach device explicitly before attaching it to a new domain
  amd-iommu: remove BUS_NOTIFY_BOUND_DRIVER handling
  dma-debug: simplify logic in driver_filter()
  dma-debug: disable/enable irqs only once in device_dma_allocations
  dma-debug: use pr_* instead of printk(KERN_* ...)
  dma-debug: code style fixes
  dma-debug: comment style fixes
  dma-debug: change hash_bucket_find from first-fit to best-fit
  x86: enable GART-IOMMU only after setting up protection methods
  amd_iommu: fix lock imbalance
  dma-debug: add documentation for the driver filter
  dma-debug: add dma_debug_driver kernel command line
  dma-debug: add debugfs file for driver filter
  dma-debug: add variables and checks for driver filter
  dma-debug: fix debug_dma_sync_sg_for_cpu and debug_dma_sync_sg_for_device
  dma-debug: use sg_dma_len accessor
  dma-debug: use sg_dma_address accessor instead of using dma_address directly
  amd-iommu: don't free dma adresses below 512MB with CONFIG_IOMMU_STRESS
  amd-iommu: don't preallocate page tables with CONFIG_IOMMU_STRESS
  ...
parents 75063600 92db1e6a
......@@ -704,12 +704,24 @@ this directory the following files can currently be found:
The current number of free dma_debug_entries
in the allocator.
dma-api/driver-filter
You can write a name of a driver into this file
to limit the debug output to requests from that
particular driver. Write an empty string to
that file to disable the filter and see
all errors again.
If you have this code compiled into your kernel it will be enabled by default.
If you want to boot without the bookkeeping anyway you can provide
'dma_debug=off' as a boot parameter. This will disable DMA-API debugging.
Notice that you can not enable it again at runtime. You have to reboot to do
so.
If you want to see debug messages only for a special device driver you can
specify the dma_debug_driver=<drivername> parameter. This will enable the
driver filter at boot time. The debug code will only print errors for that
driver afterwards. This filter can be disabled or changed later using debugfs.
When the code disables itself at runtime this is most likely because it ran
out of dma_debug_entries. These entries are preallocated at boot. The number
of preallocated entries is defined per architecture. If it is too low for you
......
......@@ -329,11 +329,6 @@ and is between 256 and 4096 characters. It is defined in the file
flushed before they will be reused, which
is a lot of faster
amd_iommu_size= [HW,X86-64]
Define the size of the aperture for the AMD IOMMU
driver. Possible values are:
'32M', '64M' (default), '128M', '256M', '512M', '1G'
amijoy.map= [HW,JOY] Amiga joystick support
Map of devices attached to JOY0DAT and JOY1DAT
Format: <a>,<b>
......@@ -646,6 +641,13 @@ and is between 256 and 4096 characters. It is defined in the file
DMA-API debugging code disables itself because the
architectural default is too low.
dma_debug_driver=<driver_name>
With this option the DMA-API debugging driver
filter feature can be enabled at boot time. Just
pass the driver to filter for as the parameter.
The filter can be disabled or changed to another
driver later using sysfs.
dscc4.setup= [NET]
dtc3181e= [HW,SCSI]
......
......@@ -159,10 +159,17 @@ config IOMMU_DEBUG
options. See Documentation/x86_64/boot-options.txt for more
details.
config IOMMU_STRESS
bool "Enable IOMMU stress-test mode"
---help---
This option disables various optimizations in IOMMU related
code to do real stress testing of the IOMMU code. This option
will cause a performance drop and should only be enabled for
testing.
config IOMMU_LEAK
bool "IOMMU leak tracing"
depends on DEBUG_KERNEL
depends on IOMMU_DEBUG
depends on IOMMU_DEBUG && DMA_API_DEBUG
---help---
Add a simple leak tracer to the IOMMU code. This is useful when you
are debugging a buggy device driver that leaks IOMMU mappings.
......
......@@ -27,6 +27,8 @@ extern int amd_iommu_init(void);
extern int amd_iommu_init_dma_ops(void);
extern void amd_iommu_detect(void);
extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
extern void amd_iommu_flush_all_domains(void);
extern void amd_iommu_flush_all_devices(void);
#else
static inline int amd_iommu_init(void) { return -ENODEV; }
static inline void amd_iommu_detect(void) { }
......
......@@ -194,6 +194,27 @@
#define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */
#define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops
domain for an IOMMU */
extern bool amd_iommu_dump;
#define DUMP_printk(format, arg...) \
do { \
if (amd_iommu_dump) \
printk(KERN_INFO "AMD IOMMU: " format, ## arg); \
} while(0);
/*
* Make iterating over all IOMMUs easier
*/
#define for_each_iommu(iommu) \
list_for_each_entry((iommu), &amd_iommu_list, list)
#define for_each_iommu_safe(iommu, next) \
list_for_each_entry_safe((iommu), (next), &amd_iommu_list, list)
#define APERTURE_RANGE_SHIFT 27 /* 128 MB */
#define APERTURE_RANGE_SIZE (1ULL << APERTURE_RANGE_SHIFT)
#define APERTURE_RANGE_PAGES (APERTURE_RANGE_SIZE >> PAGE_SHIFT)
#define APERTURE_MAX_RANGES 32 /* allows 4GB of DMA address space */
#define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT)
#define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL)
/*
* This structure contains generic data for IOMMU protection domains
......@@ -209,6 +230,26 @@ struct protection_domain {
void *priv; /* private data */
};
/*
* For dynamic growth the aperture size is split into ranges of 128MB of
* DMA address space each. This struct represents one such range.
*/
struct aperture_range {
/* address allocation bitmap */
unsigned long *bitmap;
/*
* Array of PTE pages for the aperture. In this array we save all the
* leaf pages of the domain page table used for the aperture. This way
* we don't need to walk the page table to find a specific PTE. We can
* just calculate its address in constant time.
*/
u64 *pte_pages[64];
unsigned long offset;
};
/*
* Data container for a dma_ops specific protection domain
*/
......@@ -222,18 +263,10 @@ struct dma_ops_domain {
unsigned long aperture_size;
/* address we start to search for free addresses */
unsigned long next_bit;
/* address allocation bitmap */
unsigned long *bitmap;
unsigned long next_address;
/*
* Array of PTE pages for the aperture. In this array we save all the
* leaf pages of the domain page table used for the aperture. This way
* we don't need to walk the page table to find a specific PTE. We can
* just calculate its address in constant time.
*/
u64 **pte_pages;
/* address space relevant data */
struct aperture_range *aperture[APERTURE_MAX_RANGES];
/* This will be set to true when TLB needs to be flushed */
bool need_flush;
......
This diff is collapsed.
This diff is collapsed.
......@@ -186,37 +186,6 @@ static struct cal_chipset_ops calioc2_chip_ops = {
static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, };
/* enable this to stress test the chip's TCE cache */
#ifdef CONFIG_IOMMU_DEBUG
static int debugging = 1;
static inline unsigned long verify_bit_range(unsigned long* bitmap,
int expected, unsigned long start, unsigned long end)
{
unsigned long idx = start;
BUG_ON(start >= end);
while (idx < end) {
if (!!test_bit(idx, bitmap) != expected)
return idx;
++idx;
}
/* all bits have the expected value */
return ~0UL;
}
#else /* debugging is disabled */
static int debugging;
static inline unsigned long verify_bit_range(unsigned long* bitmap,
int expected, unsigned long start, unsigned long end)
{
return ~0UL;
}
#endif /* CONFIG_IOMMU_DEBUG */
static inline int translation_enabled(struct iommu_table *tbl)
{
/* only PHBs with translation enabled have an IOMMU table */
......@@ -228,7 +197,6 @@ static void iommu_range_reserve(struct iommu_table *tbl,
{
unsigned long index;
unsigned long end;
unsigned long badbit;
unsigned long flags;
index = start_addr >> PAGE_SHIFT;
......@@ -243,14 +211,6 @@ static void iommu_range_reserve(struct iommu_table *tbl,
spin_lock_irqsave(&tbl->it_lock, flags);
badbit = verify_bit_range(tbl->it_map, 0, index, end);
if (badbit != ~0UL) {
if (printk_ratelimit())
printk(KERN_ERR "Calgary: entry already allocated at "
"0x%lx tbl %p dma 0x%lx npages %u\n",
badbit, tbl, start_addr, npages);
}
iommu_area_reserve(tbl->it_map, index, npages);
spin_unlock_irqrestore(&tbl->it_lock, flags);
......@@ -326,7 +286,6 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
unsigned int npages)
{
unsigned long entry;
unsigned long badbit;
unsigned long badend;
unsigned long flags;
......@@ -346,14 +305,6 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
spin_lock_irqsave(&tbl->it_lock, flags);
badbit = verify_bit_range(tbl->it_map, 1, entry, entry + npages);
if (badbit != ~0UL) {
if (printk_ratelimit())
printk(KERN_ERR "Calgary: bit is off at 0x%lx "
"tbl %p dma 0x%Lx entry 0x%lx npages %u\n",
badbit, tbl, dma_addr, entry, npages);
}
iommu_area_free(tbl->it_map, entry, npages);
spin_unlock_irqrestore(&tbl->it_lock, flags);
......@@ -1488,9 +1439,8 @@ void __init detect_calgary(void)
iommu_detected = 1;
calgary_detected = 1;
printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected.\n");
printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, "
"CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size,
debugging ? "enabled" : "disabled");
printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n",
specified_table_size);
/* swiotlb for devices that aren't behind the Calgary. */
if (max_pfn > MAX_DMA32_PFN)
......
......@@ -144,48 +144,21 @@ static void flush_gart(void)
}
#ifdef CONFIG_IOMMU_LEAK
#define SET_LEAK(x) \
do { \
if (iommu_leak_tab) \
iommu_leak_tab[x] = __builtin_return_address(0);\
} while (0)
#define CLEAR_LEAK(x) \
do { \
if (iommu_leak_tab) \
iommu_leak_tab[x] = NULL; \
} while (0)
/* Debugging aid for drivers that don't free their IOMMU tables */
static void **iommu_leak_tab;
static int leak_trace;
static int iommu_leak_pages = 20;
static void dump_leak(void)
{
int i;
static int dump;
if (dump || !iommu_leak_tab)
if (dump)
return;
dump = 1;
show_stack(NULL, NULL);
/* Very crude. dump some from the end of the table too */
printk(KERN_DEBUG "Dumping %d pages from end of IOMMU:\n",
iommu_leak_pages);
for (i = 0; i < iommu_leak_pages; i += 2) {
printk(KERN_DEBUG "%lu: ", iommu_pages-i);
printk_address((unsigned long) iommu_leak_tab[iommu_pages-i],
0);
printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' ');
}
printk(KERN_DEBUG "\n");
show_stack(NULL, NULL);
debug_dma_dump_mappings(NULL);
}
#else
# define SET_LEAK(x)
# define CLEAR_LEAK(x)
#endif
static void iommu_full(struct device *dev, size_t size, int dir)
......@@ -248,7 +221,6 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
for (i = 0; i < npages; i++) {
iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
SET_LEAK(iommu_page + i);
phys_mem += PAGE_SIZE;
}
return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
......@@ -294,7 +266,6 @@ static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr,
npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
for (i = 0; i < npages; i++) {
iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
CLEAR_LEAK(iommu_page + i);
}
free_iommu(iommu_page, npages);
}
......@@ -377,7 +348,6 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start,
pages = iommu_num_pages(s->offset, s->length, PAGE_SIZE);
while (pages--) {
iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
SET_LEAK(iommu_page);
addr += PAGE_SIZE;
iommu_page++;
}
......@@ -688,8 +658,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
agp_gatt_table = gatt;
enable_gart_translations();
error = sysdev_class_register(&gart_sysdev_class);
if (!error)
error = sysdev_register(&device_gart);
......@@ -801,11 +769,12 @@ void __init gart_iommu_init(void)
#ifdef CONFIG_IOMMU_LEAK
if (leak_trace) {
iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
get_order(iommu_pages*sizeof(void *)));
if (!iommu_leak_tab)
int ret;
ret = dma_debug_resize_entries(iommu_pages);
if (ret)
printk(KERN_DEBUG
"PCI-DMA: Cannot allocate leak trace area\n");
"PCI-DMA: Cannot trace all the entries\n");
}
#endif
......@@ -845,6 +814,14 @@ void __init gart_iommu_init(void)
* the pages as Not-Present:
*/
wbinvd();
/*
* Now all caches are flushed and we can safely enable
* GART hardware. Doing it early leaves the possibility
* of stale cache entries that can lead to GART PTE
* errors.
*/
enable_gart_translations();
/*
* Try to workaround a bug (thanks to BenH):
......
......@@ -28,7 +28,7 @@ dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
return paddr;
}
phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr)
phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
{
return baddr;
}
......
......@@ -32,6 +32,8 @@ extern void dma_debug_add_bus(struct bus_type *bus);
extern void dma_debug_init(u32 num_entries);
extern int dma_debug_resize_entries(u32 num_entries);
extern void debug_dma_map_page(struct device *dev, struct page *page,
size_t offset, size_t size,
int direction, dma_addr_t dma_addr,
......@@ -91,6 +93,11 @@ static inline void dma_debug_init(u32 num_entries)
{
}
static inline int dma_debug_resize_entries(u32 num_entries)
{
return 0;
}
static inline void debug_dma_map_page(struct device *dev, struct page *page,
size_t offset, size_t size,
int direction, dma_addr_t dma_addr,
......
......@@ -29,7 +29,8 @@ extern void *swiotlb_alloc(unsigned order, unsigned long nslabs);
extern dma_addr_t swiotlb_phys_to_bus(struct device *hwdev,
phys_addr_t address);
extern phys_addr_t swiotlb_bus_to_phys(dma_addr_t address);
extern phys_addr_t swiotlb_bus_to_phys(struct device *hwdev,
dma_addr_t address);
extern int swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size);
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment