Commit 2820a708 authored by Eugeniy Paltsev's avatar Eugeniy Paltsev Committed by Vineet Gupta

ARC: dma [IOC] Enable per device io coherency

So far the IOC treatment was global on ARC, being turned on (or off)
for all devices in the system. With this patch, this can now be done
per device using the "dma-coherent" DT property; IOW with this patch
we can use both HW-coherent and regular DMA peripherals simultaneously.

The changes involved are too many so enlisting the summary below:

1. common code calls ARC arch_setup_dma_ops() per device.

2. For coherent dma (IOC) it plugs in generic @dma_direct_ops which
   doesn't need any arch specific backend: No need for any explicit
   cache flushes or MMU mappings to provide for uncached access

   - dma_(map|sync)_single* return early as corresponding dma ops callbacks
     are NULL in generic code.
     So arch_sync_dma_*() -> dma_cache_*() need not handle the coherent
     dma case, hence drop ARC __dma_cache_*_ioc() which were no-op anyways

3. For noncoherent dma (non IOC) generic @dma_noncoherent_ops is used
   which in turns calls ARC specific routines

   - arch_dma_alloc() no longer checks for @ioc_enable since this is
     called only for !IOC case.
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarEugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Signed-off-by: default avatarVineet Gupta <vgupta@synopsys.com>
[vgupta: rewrote changelog]
parent 678c8110
// SPDX-License-Identifier: GPL-2.0
// (C) 2018 Synopsys, Inc. (www.synopsys.com)
#ifndef ASM_ARC_DMA_MAPPING_H
#define ASM_ARC_DMA_MAPPING_H
#include <asm-generic/dma-mapping.h>
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
const struct iommu_ops *iommu, bool coherent);
#define arch_setup_dma_ops arch_setup_dma_ops
#endif
...@@ -65,7 +65,7 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len) ...@@ -65,7 +65,7 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len)
n += scnprintf(buf + n, len - n, "Peripherals\t: %#lx%s%s\n", n += scnprintf(buf + n, len - n, "Peripherals\t: %#lx%s%s\n",
perip_base, perip_base,
IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency ")); IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency (per-device) "));
return buf; return buf;
} }
...@@ -896,15 +896,6 @@ static void __dma_cache_wback_slc(phys_addr_t start, unsigned long sz) ...@@ -896,15 +896,6 @@ static void __dma_cache_wback_slc(phys_addr_t start, unsigned long sz)
slc_op(start, sz, OP_FLUSH); slc_op(start, sz, OP_FLUSH);
} }
/*
* DMA ops for systems with IOC
* IOC hardware snoops all DMA traffic keeping the caches consistent with
* memory - eliding need for any explicit cache maintenance of DMA buffers
*/
static void __dma_cache_wback_inv_ioc(phys_addr_t start, unsigned long sz) {}
static void __dma_cache_inv_ioc(phys_addr_t start, unsigned long sz) {}
static void __dma_cache_wback_ioc(phys_addr_t start, unsigned long sz) {}
/* /*
* Exported DMA API * Exported DMA API
*/ */
...@@ -1264,11 +1255,7 @@ void __init arc_cache_init_master(void) ...@@ -1264,11 +1255,7 @@ void __init arc_cache_init_master(void)
if (is_isa_arcv2() && ioc_enable) if (is_isa_arcv2() && ioc_enable)
arc_ioc_setup(); arc_ioc_setup();
if (is_isa_arcv2() && ioc_enable) { if (is_isa_arcv2() && l2_line_sz && slc_enable) {
__dma_cache_wback_inv = __dma_cache_wback_inv_ioc;
__dma_cache_inv = __dma_cache_inv_ioc;
__dma_cache_wback = __dma_cache_wback_ioc;
} else if (is_isa_arcv2() && l2_line_sz && slc_enable) {
__dma_cache_wback_inv = __dma_cache_wback_inv_slc; __dma_cache_wback_inv = __dma_cache_wback_inv_slc;
__dma_cache_inv = __dma_cache_inv_slc; __dma_cache_inv = __dma_cache_inv_slc;
__dma_cache_wback = __dma_cache_wback_slc; __dma_cache_wback = __dma_cache_wback_slc;
...@@ -1277,6 +1264,12 @@ void __init arc_cache_init_master(void) ...@@ -1277,6 +1264,12 @@ void __init arc_cache_init_master(void)
__dma_cache_inv = __dma_cache_inv_l1; __dma_cache_inv = __dma_cache_inv_l1;
__dma_cache_wback = __dma_cache_wback_l1; __dma_cache_wback = __dma_cache_wback_l1;
} }
/*
* In case of IOC (say IOC+SLC case), pointers above could still be set
* but end up not being relevant as the first function in chain is not
* called at all for @dma_direct_ops
* arch_sync_dma_for_cpu() -> dma_cache_*() -> __dma_cache_*()
*/
} }
void __ref arc_cache_init(void) void __ref arc_cache_init(void)
......
...@@ -6,20 +6,17 @@ ...@@ -6,20 +6,17 @@
* published by the Free Software Foundation. * published by the Free Software Foundation.
*/ */
/*
* DMA Coherent API Notes
*
* I/O is inherently non-coherent on ARC. So a coherent DMA buffer is
* implemented by accessing it using a kernel virtual address, with
* Cache bit off in the TLB entry.
*
* The default DMA address == Phy address which is 0x8000_0000 based.
*/
#include <linux/dma-noncoherent.h> #include <linux/dma-noncoherent.h>
#include <asm/cache.h> #include <asm/cache.h>
#include <asm/cacheflush.h> #include <asm/cacheflush.h>
/*
* ARCH specific callbacks for generic noncoherent DMA ops (dma/noncoherent.c)
* - hardware IOC not available (or "dma-coherent" not set for device in DT)
* - But still handle both coherent and non-coherent requests from caller
*
* For DMA coherent hardware (IOC) generic code suffices
*/
void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t gfp, unsigned long attrs) gfp_t gfp, unsigned long attrs)
{ {
...@@ -33,19 +30,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, ...@@ -33,19 +30,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
if (!page) if (!page)
return NULL; return NULL;
/* if (attrs & DMA_ATTR_NON_CONSISTENT)
* IOC relies on all data (even coherent DMA data) being in cache
* Thus allocate normal cached memory
*
* The gains with IOC are two pronged:
* -For streaming data, elides need for cache maintenance, saving
* cycles in flush code, and bus bandwidth as all the lines of a
* buffer need to be flushed out to memory
* -For coherent data, Read/Write to buffers terminate early in cache
* (vs. always going to memory - thus are faster)
*/
if ((is_isa_arcv2() && ioc_enable) ||
(attrs & DMA_ATTR_NON_CONSISTENT))
need_coh = 0; need_coh = 0;
/* /*
...@@ -95,8 +80,7 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr, ...@@ -95,8 +80,7 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr,
struct page *page = virt_to_page(paddr); struct page *page = virt_to_page(paddr);
int is_non_coh = 1; int is_non_coh = 1;
is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT) || is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT);
(is_isa_arcv2() && ioc_enable);
if (PageHighMem(page) || !is_non_coh) if (PageHighMem(page) || !is_non_coh)
iounmap((void __force __iomem *)vaddr); iounmap((void __force __iomem *)vaddr);
...@@ -185,3 +169,23 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, ...@@ -185,3 +169,23 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
break; break;
} }
} }
/*
* Plug in coherent or noncoherent dma ops
*/
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
const struct iommu_ops *iommu, bool coherent)
{
/*
* IOC hardware snoops all DMA traffic keeping the caches consistent
* with memory - eliding need for any explicit cache maintenance of
* DMA buffers - so we can use dma_direct cache ops.
*/
if (is_isa_arcv2() && ioc_enable && coherent) {
set_dma_ops(dev, &dma_direct_ops);
dev_info(dev, "use dma_direct_ops cache ops\n");
} else {
set_dma_ops(dev, &dma_noncoherent_ops);
dev_info(dev, "use dma_noncoherent_ops cache ops\n");
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment