Commit 05c6cfb9 authored by Alexey Kardashevskiy's avatar Alexey Kardashevskiy Committed by Michael Ellerman

powerpc/iommu/powernv: Release replaced TCE

At the moment writing new TCE value to the IOMMU table fails with EBUSY
if there is a valid entry already. However PAPR specification allows
the guest to write new TCE value without clearing it first.

Another problem this patch is addressing is the use of pool locks for
external IOMMU users such as VFIO. The pool locks are to protect
DMA page allocator rather than entries and since the host kernel does
not control what pages are in use, there is no point in pool locks and
exchange()+put_page(oldtce) is sufficient to avoid possible races.

This adds an exchange() callback to iommu_table_ops which does the same
thing as set() plus it returns replaced TCE and DMA direction so
the caller can release the pages afterwards. The exchange() receives
a physical address unlike set() which receives linear mapping address;
and returns a physical address as the clear() does.

This implements exchange() for P5IOC2/IODA/IODA2. This adds a requirement
for a platform to have exchange() implemented in order to support VFIO.

This replaces iommu_tce_build() and iommu_clear_tce() with
a single iommu_tce_xchg().

This makes sure that TCE permission bits are not set in TCE passed to
IOMMU API as those are to be calculated by platform code from
DMA direction.

This moves SetPageDirty() to the IOMMU code to make it work for both
VFIO ioctl interface in in-kernel TCE acceleration (when it becomes
available later).
Signed-off-by: default avatarAlexey Kardashevskiy <aik@ozlabs.ru>
[aw: for the vfio related changes]
Acked-by: default avatarAlex Williamson <alex.williamson@redhat.com>
Reviewed-by: default avatarDavid Gibson <david@gibson.dropbear.id.au>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent c5bb44ed
...@@ -45,13 +45,29 @@ extern int iommu_is_off; ...@@ -45,13 +45,29 @@ extern int iommu_is_off;
extern int iommu_force_on; extern int iommu_force_on;
struct iommu_table_ops { struct iommu_table_ops {
/*
* When called with direction==DMA_NONE, it is equal to clear().
* uaddr is a linear map address.
*/
int (*set)(struct iommu_table *tbl, int (*set)(struct iommu_table *tbl,
long index, long npages, long index, long npages,
unsigned long uaddr, unsigned long uaddr,
enum dma_data_direction direction, enum dma_data_direction direction,
struct dma_attrs *attrs); struct dma_attrs *attrs);
#ifdef CONFIG_IOMMU_API
/*
* Exchanges existing TCE with new TCE plus direction bits;
* returns old TCE and DMA direction mask.
* @tce is a physical address.
*/
int (*exchange)(struct iommu_table *tbl,
long index,
unsigned long *hpa,
enum dma_data_direction *direction);
#endif
void (*clear)(struct iommu_table *tbl, void (*clear)(struct iommu_table *tbl,
long index, long npages); long index, long npages);
/* get() returns a physical address */
unsigned long (*get)(struct iommu_table *tbl, long index); unsigned long (*get)(struct iommu_table *tbl, long index);
void (*flush)(struct iommu_table *tbl); void (*flush)(struct iommu_table *tbl);
}; };
...@@ -153,6 +169,8 @@ extern void iommu_register_group(struct iommu_table_group *table_group, ...@@ -153,6 +169,8 @@ extern void iommu_register_group(struct iommu_table_group *table_group,
extern int iommu_add_device(struct device *dev); extern int iommu_add_device(struct device *dev);
extern void iommu_del_device(struct device *dev); extern void iommu_del_device(struct device *dev);
extern int __init tce_iommu_bus_notifier_init(void); extern int __init tce_iommu_bus_notifier_init(void);
extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
unsigned long *hpa, enum dma_data_direction *direction);
#else #else
static inline void iommu_register_group(struct iommu_table_group *table_group, static inline void iommu_register_group(struct iommu_table_group *table_group,
int pci_domain_number, int pci_domain_number,
...@@ -225,10 +243,6 @@ extern int iommu_tce_clear_param_check(struct iommu_table *tbl, ...@@ -225,10 +243,6 @@ extern int iommu_tce_clear_param_check(struct iommu_table *tbl,
unsigned long npages); unsigned long npages);
extern int iommu_tce_put_param_check(struct iommu_table *tbl, extern int iommu_tce_put_param_check(struct iommu_table *tbl,
unsigned long ioba, unsigned long tce); unsigned long ioba, unsigned long tce);
extern int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
unsigned long hwaddr, enum dma_data_direction direction);
extern unsigned long iommu_clear_tce(struct iommu_table *tbl,
unsigned long entry);
extern void iommu_flush_tce(struct iommu_table *tbl); extern void iommu_flush_tce(struct iommu_table *tbl);
extern int iommu_take_ownership(struct iommu_table *tbl); extern int iommu_take_ownership(struct iommu_table *tbl);
......
...@@ -965,10 +965,7 @@ EXPORT_SYMBOL_GPL(iommu_tce_clear_param_check); ...@@ -965,10 +965,7 @@ EXPORT_SYMBOL_GPL(iommu_tce_clear_param_check);
int iommu_tce_put_param_check(struct iommu_table *tbl, int iommu_tce_put_param_check(struct iommu_table *tbl,
unsigned long ioba, unsigned long tce) unsigned long ioba, unsigned long tce)
{ {
if (!(tce & (TCE_PCI_WRITE | TCE_PCI_READ))) if (tce & ~IOMMU_PAGE_MASK(tbl))
return -EINVAL;
if (tce & ~(IOMMU_PAGE_MASK(tbl) | TCE_PCI_WRITE | TCE_PCI_READ))
return -EINVAL; return -EINVAL;
if (ioba & ~IOMMU_PAGE_MASK(tbl)) if (ioba & ~IOMMU_PAGE_MASK(tbl))
...@@ -985,44 +982,16 @@ int iommu_tce_put_param_check(struct iommu_table *tbl, ...@@ -985,44 +982,16 @@ int iommu_tce_put_param_check(struct iommu_table *tbl,
} }
EXPORT_SYMBOL_GPL(iommu_tce_put_param_check); EXPORT_SYMBOL_GPL(iommu_tce_put_param_check);
unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry) long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
unsigned long *hpa, enum dma_data_direction *direction)
{ {
unsigned long oldtce; long ret;
struct iommu_pool *pool = get_pool(tbl, entry);
spin_lock(&(pool->lock)); ret = tbl->it_ops->exchange(tbl, entry, hpa, direction);
oldtce = tbl->it_ops->get(tbl, entry); if (!ret && ((*direction == DMA_FROM_DEVICE) ||
if (oldtce & (TCE_PCI_WRITE | TCE_PCI_READ)) (*direction == DMA_BIDIRECTIONAL)))
tbl->it_ops->clear(tbl, entry, 1); SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT));
else
oldtce = 0;
spin_unlock(&(pool->lock));
return oldtce;
}
EXPORT_SYMBOL_GPL(iommu_clear_tce);
/*
* hwaddr is a kernel virtual address here (0xc... bazillion),
* tce_build converts it to a physical address.
*/
int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
unsigned long hwaddr, enum dma_data_direction direction)
{
int ret = -EBUSY;
unsigned long oldtce;
struct iommu_pool *pool = get_pool(tbl, entry);
spin_lock(&(pool->lock));
oldtce = tbl->it_ops->get(tbl, entry);
/* Add new entry if it is not busy */
if (!(oldtce & (TCE_PCI_WRITE | TCE_PCI_READ)))
ret = tbl->it_ops->set(tbl, entry, 1, hwaddr, direction, NULL);
spin_unlock(&(pool->lock));
/* if (unlikely(ret)) /* if (unlikely(ret))
pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n", pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n",
...@@ -1031,13 +1000,23 @@ int iommu_tce_build(struct iommu_table *tbl, unsigned long entry, ...@@ -1031,13 +1000,23 @@ int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(iommu_tce_build); EXPORT_SYMBOL_GPL(iommu_tce_xchg);
int iommu_take_ownership(struct iommu_table *tbl) int iommu_take_ownership(struct iommu_table *tbl)
{ {
unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
int ret = 0; int ret = 0;
/*
* VFIO does not control TCE entries allocation and the guest
* can write new TCEs on top of existing ones so iommu_tce_build()
* must be able to release old pages. This functionality
* requires exchange() callback defined so if it is not
* implemented, we disallow taking ownership over the table.
*/
if (!tbl->it_ops->exchange)
return -EINVAL;
spin_lock_irqsave(&tbl->large_pool.lock, flags); spin_lock_irqsave(&tbl->large_pool.lock, flags);
for (i = 0; i < tbl->nr_pools; i++) for (i = 0; i < tbl->nr_pools; i++)
spin_lock(&tbl->pools[i].lock); spin_lock(&tbl->pools[i].lock);
......
...@@ -1738,6 +1738,20 @@ static int pnv_ioda1_tce_build(struct iommu_table *tbl, long index, ...@@ -1738,6 +1738,20 @@ static int pnv_ioda1_tce_build(struct iommu_table *tbl, long index,
return ret; return ret;
} }
#ifdef CONFIG_IOMMU_API
static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index,
unsigned long *hpa, enum dma_data_direction *direction)
{
long ret = pnv_tce_xchg(tbl, index, hpa, direction);
if (!ret && (tbl->it_type &
(TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE)))
pnv_pci_ioda1_tce_invalidate(tbl, index, 1, false);
return ret;
}
#endif
static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index, static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index,
long npages) long npages)
{ {
...@@ -1749,6 +1763,9 @@ static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index, ...@@ -1749,6 +1763,9 @@ static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index,
static struct iommu_table_ops pnv_ioda1_iommu_ops = { static struct iommu_table_ops pnv_ioda1_iommu_ops = {
.set = pnv_ioda1_tce_build, .set = pnv_ioda1_tce_build,
#ifdef CONFIG_IOMMU_API
.exchange = pnv_ioda1_tce_xchg,
#endif
.clear = pnv_ioda1_tce_free, .clear = pnv_ioda1_tce_free,
.get = pnv_tce_get, .get = pnv_tce_get,
}; };
...@@ -1824,6 +1841,20 @@ static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index, ...@@ -1824,6 +1841,20 @@ static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index,
return ret; return ret;
} }
#ifdef CONFIG_IOMMU_API
static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index,
unsigned long *hpa, enum dma_data_direction *direction)
{
long ret = pnv_tce_xchg(tbl, index, hpa, direction);
if (!ret && (tbl->it_type &
(TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE)))
pnv_pci_ioda2_tce_invalidate(tbl, index, 1, false);
return ret;
}
#endif
static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index, static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index,
long npages) long npages)
{ {
...@@ -1835,6 +1866,9 @@ static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index, ...@@ -1835,6 +1866,9 @@ static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index,
static struct iommu_table_ops pnv_ioda2_iommu_ops = { static struct iommu_table_ops pnv_ioda2_iommu_ops = {
.set = pnv_ioda2_tce_build, .set = pnv_ioda2_tce_build,
#ifdef CONFIG_IOMMU_API
.exchange = pnv_ioda2_tce_xchg,
#endif
.clear = pnv_ioda2_tce_free, .clear = pnv_ioda2_tce_free,
.get = pnv_tce_get, .get = pnv_tce_get,
}; };
......
...@@ -85,6 +85,9 @@ static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) { } ...@@ -85,6 +85,9 @@ static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) { }
static struct iommu_table_ops pnv_p5ioc2_iommu_ops = { static struct iommu_table_ops pnv_p5ioc2_iommu_ops = {
.set = pnv_tce_build, .set = pnv_tce_build,
#ifdef CONFIG_IOMMU_API
.exchange = pnv_tce_xchg,
#endif
.clear = pnv_tce_free, .clear = pnv_tce_free,
.get = pnv_tce_get, .get = pnv_tce_get,
}; };
......
...@@ -598,6 +598,24 @@ int pnv_tce_build(struct iommu_table *tbl, long index, long npages, ...@@ -598,6 +598,24 @@ int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
return 0; return 0;
} }
#ifdef CONFIG_IOMMU_API
int pnv_tce_xchg(struct iommu_table *tbl, long index,
unsigned long *hpa, enum dma_data_direction *direction)
{
u64 proto_tce = iommu_direction_to_tce_perm(*direction);
unsigned long newtce = *hpa | proto_tce, oldtce;
unsigned long idx = index - tbl->it_offset;
BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
oldtce = xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce));
*hpa = be64_to_cpu(oldtce) & ~(TCE_PCI_READ | TCE_PCI_WRITE);
*direction = iommu_tce_direction(oldtce);
return 0;
}
#endif
void pnv_tce_free(struct iommu_table *tbl, long index, long npages) void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
{ {
long i; long i;
......
...@@ -207,6 +207,8 @@ extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages, ...@@ -207,6 +207,8 @@ extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
unsigned long uaddr, enum dma_data_direction direction, unsigned long uaddr, enum dma_data_direction direction,
struct dma_attrs *attrs); struct dma_attrs *attrs);
extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages); extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages);
extern int pnv_tce_xchg(struct iommu_table *tbl, long index,
unsigned long *hpa, enum dma_data_direction *direction);
extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index); extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index);
void pnv_pci_dump_phb_diag_data(struct pci_controller *hose, void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
......
...@@ -236,18 +236,11 @@ static void tce_iommu_release(void *iommu_data) ...@@ -236,18 +236,11 @@ static void tce_iommu_release(void *iommu_data)
} }
static void tce_iommu_unuse_page(struct tce_container *container, static void tce_iommu_unuse_page(struct tce_container *container,
unsigned long oldtce) unsigned long hpa)
{ {
struct page *page; struct page *page;
if (!(oldtce & (TCE_PCI_READ | TCE_PCI_WRITE))) page = pfn_to_page(hpa >> PAGE_SHIFT);
return;
page = pfn_to_page(oldtce >> PAGE_SHIFT);
if (oldtce & TCE_PCI_WRITE)
SetPageDirty(page);
put_page(page); put_page(page);
} }
...@@ -255,14 +248,21 @@ static int tce_iommu_clear(struct tce_container *container, ...@@ -255,14 +248,21 @@ static int tce_iommu_clear(struct tce_container *container,
struct iommu_table *tbl, struct iommu_table *tbl,
unsigned long entry, unsigned long pages) unsigned long entry, unsigned long pages)
{ {
unsigned long oldtce; unsigned long oldhpa;
long ret;
enum dma_data_direction direction;
for ( ; pages; --pages, ++entry) { for ( ; pages; --pages, ++entry) {
oldtce = iommu_clear_tce(tbl, entry); direction = DMA_NONE;
if (!oldtce) oldhpa = 0;
ret = iommu_tce_xchg(tbl, entry, &oldhpa, &direction);
if (ret)
continue;
if (direction == DMA_NONE)
continue; continue;
tce_iommu_unuse_page(container, oldtce); tce_iommu_unuse_page(container, oldhpa);
} }
return 0; return 0;
...@@ -284,12 +284,13 @@ static int tce_iommu_use_page(unsigned long tce, unsigned long *hpa) ...@@ -284,12 +284,13 @@ static int tce_iommu_use_page(unsigned long tce, unsigned long *hpa)
static long tce_iommu_build(struct tce_container *container, static long tce_iommu_build(struct tce_container *container,
struct iommu_table *tbl, struct iommu_table *tbl,
unsigned long entry, unsigned long tce, unsigned long pages) unsigned long entry, unsigned long tce, unsigned long pages,
enum dma_data_direction direction)
{ {
long i, ret = 0; long i, ret = 0;
struct page *page; struct page *page;
unsigned long hpa; unsigned long hpa;
enum dma_data_direction direction = iommu_tce_direction(tce); enum dma_data_direction dirtmp;
for (i = 0; i < pages; ++i) { for (i = 0; i < pages; ++i) {
unsigned long offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK; unsigned long offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
...@@ -305,8 +306,8 @@ static long tce_iommu_build(struct tce_container *container, ...@@ -305,8 +306,8 @@ static long tce_iommu_build(struct tce_container *container,
} }
hpa |= offset; hpa |= offset;
ret = iommu_tce_build(tbl, entry + i, (unsigned long) __va(hpa), dirtmp = direction;
direction); ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp);
if (ret) { if (ret) {
tce_iommu_unuse_page(container, hpa); tce_iommu_unuse_page(container, hpa);
pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n", pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
...@@ -314,6 +315,10 @@ static long tce_iommu_build(struct tce_container *container, ...@@ -314,6 +315,10 @@ static long tce_iommu_build(struct tce_container *container,
tce, ret); tce, ret);
break; break;
} }
if (dirtmp != DMA_NONE)
tce_iommu_unuse_page(container, hpa);
tce += IOMMU_PAGE_SIZE(tbl); tce += IOMMU_PAGE_SIZE(tbl);
} }
...@@ -378,8 +383,8 @@ static long tce_iommu_ioctl(void *iommu_data, ...@@ -378,8 +383,8 @@ static long tce_iommu_ioctl(void *iommu_data,
case VFIO_IOMMU_MAP_DMA: { case VFIO_IOMMU_MAP_DMA: {
struct vfio_iommu_type1_dma_map param; struct vfio_iommu_type1_dma_map param;
struct iommu_table *tbl = NULL; struct iommu_table *tbl = NULL;
unsigned long tce;
long num; long num;
enum dma_data_direction direction;
if (!container->enabled) if (!container->enabled)
return -EPERM; return -EPERM;
...@@ -405,19 +410,27 @@ static long tce_iommu_ioctl(void *iommu_data, ...@@ -405,19 +410,27 @@ static long tce_iommu_ioctl(void *iommu_data,
return -EINVAL; return -EINVAL;
/* iova is checked by the IOMMU API */ /* iova is checked by the IOMMU API */
tce = param.vaddr; if (param.flags & VFIO_DMA_MAP_FLAG_READ) {
if (param.flags & VFIO_DMA_MAP_FLAG_READ) if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
tce |= TCE_PCI_READ; direction = DMA_BIDIRECTIONAL;
if (param.flags & VFIO_DMA_MAP_FLAG_WRITE) else
tce |= TCE_PCI_WRITE; direction = DMA_TO_DEVICE;
} else {
if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
direction = DMA_FROM_DEVICE;
else
return -EINVAL;
}
ret = iommu_tce_put_param_check(tbl, param.iova, tce); ret = iommu_tce_put_param_check(tbl, param.iova, param.vaddr);
if (ret) if (ret)
return ret; return ret;
ret = tce_iommu_build(container, tbl, ret = tce_iommu_build(container, tbl,
param.iova >> tbl->it_page_shift, param.iova >> tbl->it_page_shift,
tce, param.size >> tbl->it_page_shift); param.vaddr,
param.size >> tbl->it_page_shift,
direction);
iommu_flush_tce(tbl); iommu_flush_tce(tbl);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment