Commit b159a68b authored by Olof Johansson's avatar Olof Johansson Committed by Linus Torvalds

[PATCH] ppc64: More IOMMU cleanups

* Tidy up some of the arguments to iommu_*()
* Comment cleanup
* Don't bump the hint to the next block for large allocs, to avoid
  fragmentation.
* Simplify vmerge logic during SG allocations
* Move the memory barriers from the bus-specific parts into the common
  code.

Some changes are mine, some are from Ben Herrenschmidt.
parent 8ccd6848
/*
* arch/ppc64/kernel/pci_iommu.c
* arch/ppc64/kernel/iommu.c
* Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
*
* Rewrite, cleanup, new allocation schemes, virtual merging:
* Copyright (C) 2004 Olof Johansson, IBM Corporation
* and Ben. Herrenschmidt, IBM Corporation
*
* Dynamic DMA mapping support, platform-independent parts.
* Dynamic DMA mapping support, bus-independent parts.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
......@@ -60,41 +60,57 @@ static int __init setup_iommu(char *str)
__setup("iommu=", setup_iommu);
static unsigned long iommu_range_alloc(struct iommu_table *tbl, unsigned long npages,
unsigned long *handle)
unsigned long *handle)
{
unsigned long n, end, i, start;
unsigned long hint;
unsigned long limit;
int largealloc = npages > 15;
int pass = 0;
if (handle && *handle)
hint = *handle;
else
hint = largealloc ? tbl->it_largehint : tbl->it_hint;
/* This allocator was derived from x86_64's bit string search */
/* Most of this is stolen from x86_64's bit string search function */
/* Sanity check */
if (unlikely(npages) == 0) {
if (printk_ratelimit())
WARN_ON(1);
return NO_TCE;
}
start = hint;
if (handle && *handle)
start = *handle;
else
start = largealloc ? tbl->it_largehint : tbl->it_hint;
/* Use only half of the table for small allocs (less than 15 pages). */
/* Use only half of the table for small allocs (15 pages or less) */
limit = largealloc ? tbl->it_mapsize : tbl->it_halfpoint;
limit = largealloc ? tbl->it_mapsize : tbl->it_mapsize >> 1;
if (largealloc && start < tbl->it_halfpoint)
start = tbl->it_halfpoint;
if (largealloc && start < (tbl->it_mapsize >> 1))
start = tbl->it_mapsize >> 1;
/* The case below can happen if we have a small segment appended
* to a large, or when the previous alloc was at the very end of
* the available space. If so, go back to the initial start.
*/
if (start >= limit)
start = largealloc ? tbl->it_largehint : tbl->it_hint;
again:
n = find_next_zero_bit(tbl->it_map, limit, start);
end = n + npages;
if (end >= limit) {
if (hint) {
start = largealloc ? tbl->it_mapsize >> 1 : 0;
hint = 0;
if (unlikely(end >= limit)) {
if (likely(pass++ < 2)) {
/* First failure, just rescan the half of the table.
* Second failure, rescan the other half of the table.
*/
start = (largealloc ^ pass) ? tbl->it_halfpoint : 0;
limit = pass ? tbl->it_mapsize : limit;
goto again;
} else
} else {
/* Third failure, give up */
return NO_TCE;
}
}
for (i = n; i < end; i++)
......@@ -106,16 +122,17 @@ static unsigned long iommu_range_alloc(struct iommu_table *tbl, unsigned long np
for (i = n; i < end; i++)
__set_bit(i, tbl->it_map);
/* Bump the hint to a new PHB cache line, which
* is 16 entries wide on all pSeries machines.
*/
if (largealloc)
tbl->it_largehint = (end+tbl->it_blocksize-1) &
~(tbl->it_blocksize-1);
else
tbl->it_hint = (end+tbl->it_blocksize-1) &
~(tbl->it_blocksize-1);
/* Bump the hint to a new block for small allocs. */
if (largealloc) {
/* Don't bump to new block to avoid fragmentation */
tbl->it_largehint = end;
} else {
/* Overflow will be taken care of at the next allocation */
tbl->it_hint = (end + tbl->it_blocksize - 1) &
~(tbl->it_blocksize - 1);
}
/* Update handle for SG allocations */
if (handle)
*handle = end;
......@@ -123,35 +140,38 @@ static unsigned long iommu_range_alloc(struct iommu_table *tbl, unsigned long np
}
dma_addr_t iommu_alloc(struct iommu_table *tbl, void *page,
unsigned int npages, int direction,
unsigned long *handle)
unsigned int npages, int direction)
{
unsigned long entry, flags;
dma_addr_t retTce = NO_TCE;
dma_addr_t ret = NO_TCE;
spin_lock_irqsave(&(tbl->it_lock), flags);
/* Allocate a range of entries into the table */
entry = iommu_range_alloc(tbl, npages, handle);
entry = iommu_range_alloc(tbl, npages, NULL);
if (unlikely(entry == NO_TCE)) {
spin_unlock_irqrestore(&(tbl->it_lock), flags);
return NO_TCE;
}
/* We got the tces we wanted */
entry += tbl->it_offset; /* Offset into real TCE table */
retTce = entry << PAGE_SHIFT; /* Set the return dma address */
ret = entry << PAGE_SHIFT; /* Set the return dma address */
/* Put the TCEs in the HW table */
ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & PAGE_MASK, direction);
ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & PAGE_MASK,
direction);
/* Flush/invalidate TLBs if necessary */
/* Flush/invalidate TLB caches if necessary */
if (ppc_md.tce_flush)
ppc_md.tce_flush(tbl);
spin_unlock_irqrestore(&(tbl->it_lock), flags);
return retTce;
/* Make sure updates are seen by hardware */
mb();
return ret;
}
static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
......@@ -168,7 +188,7 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
if (printk_ratelimit()) {
printk(KERN_INFO "iommu_free: invalid entry\n");
printk(KERN_INFO "\tentry = 0x%lx\n", entry);
printk(KERN_INFO "\tdma_ddr = 0x%lx\n", (u64)dma_addr);
printk(KERN_INFO "\tdma_addr = 0x%lx\n", (u64)dma_addr);
printk(KERN_INFO "\tTable = 0x%lx\n", (u64)tbl);
printk(KERN_INFO "\tbus# = 0x%lx\n", (u64)tbl->it_busno);
printk(KERN_INFO "\tmapsize = 0x%lx\n", (u64)tbl->it_mapsize);
......@@ -194,68 +214,30 @@ void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
__iommu_free(tbl, dma_addr, npages);
/* Flush/invalidate TLBs if necessary */
/* Make sure TLB cache is flushed if the HW needs it. We do
* not do an mb() here on purpose, it is not needed on any of
* the current platforms.
*/
if (ppc_md.tce_flush)
ppc_md.tce_flush(tbl);
spin_unlock_irqrestore(&(tbl->it_lock), flags);
}
/*
* Build a iommu_table structure. This contains a bit map which
* is used to manage allocation of the tce space.
*/
struct iommu_table *iommu_init_table(struct iommu_table *tbl)
{
unsigned long sz;
static int welcomed = 0;
/* it_size is in pages, it_mapsize in number of entries */
tbl->it_mapsize = tbl->it_size * tbl->it_entrysize;
if (systemcfg->platform == PLATFORM_POWERMAC)
tbl->it_mapsize = tbl->it_size * (PAGE_SIZE / sizeof(unsigned int));
else
tbl->it_mapsize = tbl->it_size * (PAGE_SIZE / sizeof(union tce_entry));
/* sz is the number of bytes needed for the bitmap */
sz = (tbl->it_mapsize + 7) >> 3;
tbl->it_map = (unsigned long *)__get_free_pages(GFP_ATOMIC, get_order(sz));
if (!tbl->it_map)
panic("iommu_init_table: Can't allocate memory, size %ld bytes\n", sz);
memset(tbl->it_map, 0, sz);
tbl->it_hint = 0;
tbl->it_largehint = 0;
spin_lock_init(&tbl->it_lock);
if (!welcomed) {
printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n",
novmerge ? "disabled" : "enabled");
welcomed = 1;
}
return tbl;
}
int iommu_alloc_sg(struct iommu_table *tbl, struct scatterlist *sglist, int nelems,
int direction, unsigned long *handle)
int iommu_alloc_sg(struct iommu_table *tbl, struct device *dev,
struct scatterlist *sglist, int nelems, int direction)
{
dma_addr_t dma_next, dma_addr;
unsigned long flags, vaddr, npages, entry;
struct scatterlist *s, *outs, *segstart, *ps;
unsigned long flags;
struct scatterlist *s, *outs, *segstart;
int outcount;
unsigned long handle;
/* Initialize some stuffs */
outs = s = segstart = &sglist[0];
outcount = 1;
ps = NULL;
handle = 0;
/* Init first segment length for error handling */
/* Init first segment length for backout at failure */
outs->dma_length = 0;
DBG("mapping %d elements:\n", nelems);
......@@ -263,13 +245,21 @@ int iommu_alloc_sg(struct iommu_table *tbl, struct scatterlist *sglist, int nele
spin_lock_irqsave(&(tbl->it_lock), flags);
for (s = outs; nelems; nelems--, s++) {
unsigned long vaddr, npages, entry, slen;
slen = s->length;
/* Sanity check */
if (slen == 0) {
dma_next = 0;
continue;
}
/* Allocate iommu entries for that segment */
vaddr = (unsigned long)page_address(s->page) + s->offset;
npages = PAGE_ALIGN(vaddr + s->length) - (vaddr & PAGE_MASK);
npages = PAGE_ALIGN(vaddr + slen) - (vaddr & PAGE_MASK);
npages >>= PAGE_SHIFT;
entry = iommu_range_alloc(tbl, npages, handle);
entry = iommu_range_alloc(tbl, npages, &handle);
DBG(" - vaddr: %lx, size: %lx\n", vaddr, s->length);
DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen);
/* Handle failure */
if (unlikely(entry == NO_TCE)) {
......@@ -293,13 +283,10 @@ int iommu_alloc_sg(struct iommu_table *tbl, struct scatterlist *sglist, int nele
/* If we are in an open segment, try merging */
if (segstart != s) {
DBG(" - trying merge...\n");
/* We cannot merge is:
/* We cannot merge if:
* - allocated dma_addr isn't contiguous to previous allocation
* - current entry has an offset into the page
* - previous entry didn't end on a page boundary
*/
if (novmerge || (dma_addr != dma_next) || s->offset ||
(ps->offset + ps->length) % PAGE_SIZE) {
if (novmerge || (dma_addr != dma_next)) {
/* Can't merge: create a new segment */
segstart = s;
outcount++; outs++;
......@@ -310,31 +297,28 @@ int iommu_alloc_sg(struct iommu_table *tbl, struct scatterlist *sglist, int nele
}
}
/* If we are beginning a new segment, fill entries */
if (segstart == s) {
/* This is a new segment, fill entries */
DBG(" - filling new segment.\n");
outs->dma_address = dma_addr;
outs->dma_length = s->length;
outs->dma_length = slen;
}
/* Calculate next page pointer for contiguous check */
dma_next = (dma_addr & PAGE_MASK) + (npages << PAGE_SHIFT);
dma_next = dma_addr + slen;
DBG(" - dma next is: %lx\n", dma_next);
/* Keep a pointer to the previous entry */
ps = s;
}
/* Make sure the update is visible to hardware. */
mb();
/* Flush/invalidate TLBs if necessary */
/* Flush/invalidate TLB caches if necessary */
if (ppc_md.tce_flush)
ppc_md.tce_flush(tbl);
spin_unlock_irqrestore(&(tbl->it_lock), flags);
/* Make sure updates are seen by hardware */
mb();
DBG("mapped %d elements:\n", outcount);
/* For the sake of iommu_free_sg, we clear out the length in the
......@@ -348,25 +332,26 @@ int iommu_alloc_sg(struct iommu_table *tbl, struct scatterlist *sglist, int nele
return outcount;
failure:
spin_unlock_irqrestore(&(tbl->it_lock), flags);
for (s = &sglist[0]; s <= outs; s++) {
if (s->dma_length != 0) {
unsigned long vaddr, npages;
vaddr = s->dma_address & PAGE_MASK;
npages = (PAGE_ALIGN(s->dma_address + s->dma_length) - vaddr)
>> PAGE_SHIFT;
iommu_free(tbl, vaddr, npages);
__iommu_free(tbl, vaddr, npages);
}
}
spin_unlock_irqrestore(&(tbl->it_lock), flags);
return 0;
}
void iommu_free_sg(struct iommu_table *tbl, struct scatterlist *sglist, int nelems,
int direction)
void iommu_free_sg(struct iommu_table *tbl, struct scatterlist *sglist,
int nelems)
{
unsigned long flags;
/* Lock the whole operation to try to free as a "chunk" */
spin_lock_irqsave(&(tbl->it_lock), flags);
while (nelems--) {
......@@ -381,9 +366,49 @@ void iommu_free_sg(struct iommu_table *tbl, struct scatterlist *sglist, int nele
sglist++;
}
/* Flush/invalidate TLBs if necessary */
/* Flush/invalidate TLBs if necessary. As for iommu_free(), we
* do not do an mb() here, the affected platforms do not need it
* when freeing.
*/
if (ppc_md.tce_flush)
ppc_md.tce_flush(tbl);
spin_unlock_irqrestore(&(tbl->it_lock), flags);
}
/*
* Build a iommu_table structure. This contains a bit map which
* is used to manage allocation of the tce space.
*/
struct iommu_table *iommu_init_table(struct iommu_table *tbl)
{
unsigned long sz;
static int welcomed = 0;
/* it_size is in pages, it_mapsize in number of entries */
tbl->it_mapsize = (tbl->it_size << PAGE_SHIFT) / tbl->it_entrysize;
/* Set aside 1/4 of the table for large allocations. */
tbl->it_halfpoint = tbl->it_mapsize * 3 / 4;
/* number of bytes needed for the bitmap */
sz = (tbl->it_mapsize + 7) >> 3;
tbl->it_map = (unsigned long *)__get_free_pages(GFP_ATOMIC, get_order(sz));
if (!tbl->it_map)
panic("iommu_init_table: Can't allocate %ld bytes\n", sz);
memset(tbl->it_map, 0, sz);
tbl->it_hint = 0;
tbl->it_largehint = tbl->it_halfpoint;
spin_lock_init(&tbl->it_lock);
if (!welcomed) {
printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n",
novmerge ? "disabled" : "enabled");
welcomed = 1;
}
return tbl;
}
......@@ -99,10 +99,7 @@ void *pci_iommu_alloc_consistent(struct pci_dev *hwdev, size_t size,
memset(ret, 0, size);
/* Set up tces to cover the allocated range */
mapping = iommu_alloc(tbl, ret, npages, PCI_DMA_BIDIRECTIONAL, NULL);
/* Make sure the update is visible to hardware. */
mb();
mapping = iommu_alloc(tbl, ret, npages, PCI_DMA_BIDIRECTIONAL);
if (mapping == NO_TCE) {
free_pages((unsigned long)ret, order);
......@@ -145,7 +142,6 @@ dma_addr_t pci_iommu_map_single(struct pci_dev *hwdev, void *vaddr,
dma_addr_t dma_handle = NO_TCE;
unsigned long uaddr;
unsigned int npages;
unsigned long handle = 0;
BUG_ON(direction == PCI_DMA_NONE);
......@@ -156,7 +152,7 @@ dma_addr_t pci_iommu_map_single(struct pci_dev *hwdev, void *vaddr,
tbl = devnode_table(hwdev);
if (tbl) {
dma_handle = iommu_alloc(tbl, vaddr, npages, direction, &handle);
dma_handle = iommu_alloc(tbl, vaddr, npages, direction);
if (dma_handle == NO_TCE) {
if (printk_ratelimit()) {
printk(KERN_INFO "iommu_alloc failed, tbl %p vaddr %p npages %d\n",
......@@ -166,8 +162,6 @@ dma_addr_t pci_iommu_map_single(struct pci_dev *hwdev, void *vaddr,
dma_handle |= (uaddr & ~PAGE_MASK);
}
mb();
return dma_handle;
}
......@@ -194,7 +188,6 @@ int pci_iommu_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelem
int direction)
{
struct iommu_table * tbl;
unsigned long handle;
BUG_ON(direction == PCI_DMA_NONE);
......@@ -205,9 +198,7 @@ int pci_iommu_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelem
if (!tbl)
return 0;
handle = 0;
return iommu_alloc_sg(tbl, sglist, nelems, direction, &handle);
return iommu_alloc_sg(tbl, &pdev->dev, sglist, nelems, direction);
}
void pci_iommu_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems,
......@@ -221,7 +212,7 @@ void pci_iommu_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int ne
if (!tbl)
return;
iommu_free_sg(tbl, sglist, nelems, direction);
iommu_free_sg(tbl, sglist, nelems);
}
/* We support DMA to/from any memory page via the iommu */
......
......@@ -432,7 +432,7 @@ dma_addr_t vio_map_single(struct vio_dev *dev, void *vaddr,
tbl = dev->iommu_table;
if (tbl) {
dma_handle = iommu_alloc(tbl, vaddr, npages, direction, NULL);
dma_handle = iommu_alloc(tbl, vaddr, npages, direction);
dma_handle |= (uaddr & ~PAGE_MASK);
}
......@@ -461,7 +461,6 @@ int vio_map_sg(struct vio_dev *vdev, struct scatterlist *sglist, int nelems,
int direction)
{
struct iommu_table *tbl;
unsigned long handle;
BUG_ON(direction == PCI_DMA_NONE);
......@@ -472,7 +471,7 @@ int vio_map_sg(struct vio_dev *vdev, struct scatterlist *sglist, int nelems,
if (!tbl)
return 0;
return iommu_alloc_sg(tbl, sglist, nelems, direction, &handle);
return iommu_alloc_sg(tbl, &vdev->dev, sglist, nelems, direction);
}
EXPORT_SYMBOL(vio_map_sg);
......@@ -485,7 +484,7 @@ void vio_unmap_sg(struct vio_dev *vdev, struct scatterlist *sglist, int nelems,
tbl = vdev->iommu_table;
if (tbl)
iommu_free_sg(tbl, sglist, nelems, direction);
iommu_free_sg(tbl, sglist, nelems);
}
EXPORT_SYMBOL(vio_unmap_sg);
......@@ -517,7 +516,7 @@ void *vio_alloc_consistent(struct vio_dev *dev, size_t size,
/* Page allocation succeeded */
memset(ret, 0, npages << PAGE_SHIFT);
/* Set up tces to cover the allocated range */
tce = iommu_alloc(tbl, ret, npages, PCI_DMA_BIDIRECTIONAL, NULL);
tce = iommu_alloc(tbl, ret, npages, PCI_DMA_BIDIRECTIONAL);
if (tce == NO_TCE) {
PPCDBG(PPCDBG_TCE, "vio_alloc_consistent: iommu_alloc failed\n" );
free_pages((unsigned long)ret, order);
......
......@@ -24,6 +24,7 @@
#include <asm/types.h>
#include <linux/spinlock.h>
#include <linux/device.h>
/*
* IOMAP_MAX_ORDER defines the largest contiguous block
......@@ -78,6 +79,7 @@ struct iommu_table {
unsigned long it_blocksize; /* Entries in each block (cacheline) */
unsigned long it_hint; /* Hint for next alloc */
unsigned long it_largehint; /* Hint for large allocs */
unsigned long it_halfpoint; /* Breaking point for small/large allocs */
spinlock_t it_lock; /* Protects it_map */
unsigned long it_mapsize; /* Size of map in # of entries (bits) */
unsigned long *it_map; /* A simple allocation bitmap for now */
......@@ -132,16 +134,16 @@ extern struct iommu_table *iommu_init_table(struct iommu_table * tbl);
/* allocates a range of tces and sets them to the pages */
extern dma_addr_t iommu_alloc(struct iommu_table *, void *page,
unsigned int numPages, int direction,
unsigned long *handle);
unsigned int numPages, int direction);
extern void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
unsigned int npages);
/* same with sg lists */
extern int iommu_alloc_sg(struct iommu_table *table, struct scatterlist *sglist,
int nelems, int direction, unsigned long *handle);
extern int iommu_alloc_sg(struct iommu_table *table, struct device *dev,
struct scatterlist *sglist, int nelems,
int direction);
extern void iommu_free_sg(struct iommu_table *tbl, struct scatterlist *sglist,
int nelems, int direction);
int nelems);
extern void tce_init_pSeries(void);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment