Commit dbe8e6a8 authored by Joerg Roedel's avatar Joerg Roedel

Merge branch 'for-joerg/arm-smmu/updates' of...

Merge branch 'for-joerg/arm-smmu/updates' of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux into arm/smmu
parents d1abaeb3 1554240f
......@@ -1350,8 +1350,7 @@ M: Will Deacon <will@kernel.org>
R: Robin Murphy <robin.murphy@arm.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
F: drivers/iommu/arm-smmu.c
F: drivers/iommu/arm-smmu-v3.c
F: drivers/iommu/arm-smmu*
F: drivers/iommu/io-pgtable-arm.c
F: drivers/iommu/io-pgtable-arm-v7s.c
......
......@@ -222,7 +222,7 @@ void panfrost_mmu_unmap(struct panfrost_gem_object *bo)
size_t unmapped_page;
size_t pgsize = get_pgsize(iova, len - unmapped_len);
unmapped_page = ops->unmap(ops, iova, pgsize);
unmapped_page = ops->unmap(ops, iova, pgsize, NULL);
if (!unmapped_page)
break;
......@@ -247,20 +247,28 @@ static void mmu_tlb_inv_context_s1(void *cookie)
mmu_hw_do_operation(pfdev, 0, 0, ~0UL, AS_COMMAND_FLUSH_MEM);
}
static void mmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
size_t granule, bool leaf, void *cookie)
{}
static void mmu_tlb_sync_context(void *cookie)
{
//struct panfrost_device *pfdev = cookie;
// TODO: Wait 1000 GPU cycles for HW_ISSUE_6367/T60X
}
static const struct iommu_gather_ops mmu_tlb_ops = {
static void mmu_tlb_flush_walk(unsigned long iova, size_t size, size_t granule,
void *cookie)
{
mmu_tlb_sync_context(cookie);
}
static void mmu_tlb_flush_leaf(unsigned long iova, size_t size, size_t granule,
void *cookie)
{
mmu_tlb_sync_context(cookie);
}
static const struct iommu_flush_ops mmu_tlb_ops = {
.tlb_flush_all = mmu_tlb_inv_context_s1,
.tlb_add_flush = mmu_tlb_inv_range_nosync,
.tlb_sync = mmu_tlb_sync_context,
.tlb_flush_walk = mmu_tlb_flush_walk,
.tlb_flush_leaf = mmu_tlb_flush_leaf,
};
static const char *access_type_name(struct panfrost_device *pfdev,
......
......@@ -13,7 +13,7 @@ obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o
obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o
obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += amd_iommu_debugfs.o
obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o
obj-$(CONFIG_ARM_SMMU) += arm-smmu.o
obj-$(CONFIG_ARM_SMMU) += arm-smmu.o arm-smmu-impl.o
obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o
obj-$(CONFIG_DMAR_TABLE) += dmar.o
obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o intel-pasid.o
......
......@@ -3055,7 +3055,8 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
}
static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
size_t page_size)
size_t page_size,
struct iommu_iotlb_gather *gather)
{
struct protection_domain *domain = to_pdomain(dom);
size_t unmap_size;
......@@ -3196,9 +3197,10 @@ static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
domain_flush_complete(dom);
}
static void amd_iommu_iotlb_range_add(struct iommu_domain *domain,
unsigned long iova, size_t size)
static void amd_iommu_iotlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather)
{
amd_iommu_flush_iotlb_all(domain);
}
const struct iommu_ops amd_iommu_ops = {
......@@ -3219,8 +3221,7 @@ const struct iommu_ops amd_iommu_ops = {
.is_attach_deferred = amd_iommu_is_attach_deferred,
.pgsize_bitmap = AMD_IOMMU_PGSIZES,
.flush_iotlb_all = amd_iommu_flush_iotlb_all,
.iotlb_range_add = amd_iommu_iotlb_range_add,
.iotlb_sync = amd_iommu_flush_iotlb_all,
.iotlb_sync = amd_iommu_iotlb_sync,
};
/*****************************************************************************
......
// SPDX-License-Identifier: GPL-2.0-only
// Miscellaneous Arm SMMU implementation and integration quirks
// Copyright (C) 2019 Arm Limited
#define pr_fmt(fmt) "arm-smmu: " fmt
#include <linux/bitfield.h>
#include <linux/of.h>
#include "arm-smmu.h"
static int arm_smmu_gr0_ns(int offset)
{
switch(offset) {
case ARM_SMMU_GR0_sCR0:
case ARM_SMMU_GR0_sACR:
case ARM_SMMU_GR0_sGFSR:
case ARM_SMMU_GR0_sGFSYNR0:
case ARM_SMMU_GR0_sGFSYNR1:
case ARM_SMMU_GR0_sGFSYNR2:
return offset + 0x400;
default:
return offset;
}
}
static u32 arm_smmu_read_ns(struct arm_smmu_device *smmu, int page,
int offset)
{
if (page == ARM_SMMU_GR0)
offset = arm_smmu_gr0_ns(offset);
return readl_relaxed(arm_smmu_page(smmu, page) + offset);
}
static void arm_smmu_write_ns(struct arm_smmu_device *smmu, int page,
int offset, u32 val)
{
if (page == ARM_SMMU_GR0)
offset = arm_smmu_gr0_ns(offset);
writel_relaxed(val, arm_smmu_page(smmu, page) + offset);
}
/* Since we don't care for sGFAR, we can do without 64-bit accessors */
static const struct arm_smmu_impl calxeda_impl = {
.read_reg = arm_smmu_read_ns,
.write_reg = arm_smmu_write_ns,
};
struct cavium_smmu {
struct arm_smmu_device smmu;
u32 id_base;
};
static int cavium_cfg_probe(struct arm_smmu_device *smmu)
{
static atomic_t context_count = ATOMIC_INIT(0);
struct cavium_smmu *cs = container_of(smmu, struct cavium_smmu, smmu);
/*
* Cavium CN88xx erratum #27704.
* Ensure ASID and VMID allocation is unique across all SMMUs in
* the system.
*/
cs->id_base = atomic_fetch_add(smmu->num_context_banks, &context_count);
dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
return 0;
}
static int cavium_init_context(struct arm_smmu_domain *smmu_domain)
{
struct cavium_smmu *cs = container_of(smmu_domain->smmu,
struct cavium_smmu, smmu);
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
smmu_domain->cfg.vmid += cs->id_base;
else
smmu_domain->cfg.asid += cs->id_base;
return 0;
}
static const struct arm_smmu_impl cavium_impl = {
.cfg_probe = cavium_cfg_probe,
.init_context = cavium_init_context,
};
static struct arm_smmu_device *cavium_smmu_impl_init(struct arm_smmu_device *smmu)
{
struct cavium_smmu *cs;
cs = devm_kzalloc(smmu->dev, sizeof(*cs), GFP_KERNEL);
if (!cs)
return ERR_PTR(-ENOMEM);
cs->smmu = *smmu;
cs->smmu.impl = &cavium_impl;
devm_kfree(smmu->dev, smmu);
return &cs->smmu;
}
#define ARM_MMU500_ACTLR_CPRE (1 << 1)
#define ARM_MMU500_ACR_CACHE_LOCK (1 << 26)
#define ARM_MMU500_ACR_S2CRB_TLBEN (1 << 10)
#define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8)
static int arm_mmu500_reset(struct arm_smmu_device *smmu)
{
u32 reg, major;
int i;
/*
* On MMU-500 r2p0 onwards we need to clear ACR.CACHE_LOCK before
* writes to the context bank ACTLRs will stick. And we just hope that
* Secure has also cleared SACR.CACHE_LOCK for this to take effect...
*/
reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID7);
major = FIELD_GET(ID7_MAJOR, reg);
reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sACR);
if (major >= 2)
reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
/*
* Allow unmatched Stream IDs to allocate bypass
* TLB entries for reduced latency.
*/
reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sACR, reg);
/*
* Disable MMU-500's not-particularly-beneficial next-page
* prefetcher for the sake of errata #841119 and #826419.
*/
for (i = 0; i < smmu->num_context_banks; ++i) {
reg = arm_smmu_cb_read(smmu, i, ARM_SMMU_CB_ACTLR);
reg &= ~ARM_MMU500_ACTLR_CPRE;
arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_ACTLR, reg);
}
return 0;
}
static const struct arm_smmu_impl arm_mmu500_impl = {
.reset = arm_mmu500_reset,
};
struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu)
{
/*
* We will inevitably have to combine model-specific implementation
* quirks with platform-specific integration quirks, but everything
* we currently support happens to work out as straightforward
* mutually-exclusive assignments.
*/
switch (smmu->model) {
case ARM_MMU500:
smmu->impl = &arm_mmu500_impl;
break;
case CAVIUM_SMMUV2:
return cavium_smmu_impl_init(smmu);
default:
break;
}
if (of_property_read_bool(smmu->dev->of_node,
"calxeda,smmu-secure-config-access"))
smmu->impl = &calxeda_impl;
return smmu;
}
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* IOMMU API for ARM architected SMMU implementations.
*
* Copyright (C) 2013 ARM Limited
*
* Author: Will Deacon <will.deacon@arm.com>
*/
#ifndef _ARM_SMMU_REGS_H
#define _ARM_SMMU_REGS_H
/* Configuration registers */
#define ARM_SMMU_GR0_sCR0 0x0
#define sCR0_CLIENTPD (1 << 0)
#define sCR0_GFRE (1 << 1)
#define sCR0_GFIE (1 << 2)
#define sCR0_EXIDENABLE (1 << 3)
#define sCR0_GCFGFRE (1 << 4)
#define sCR0_GCFGFIE (1 << 5)
#define sCR0_USFCFG (1 << 10)
#define sCR0_VMIDPNE (1 << 11)
#define sCR0_PTM (1 << 12)
#define sCR0_FB (1 << 13)
#define sCR0_VMID16EN (1 << 31)
#define sCR0_BSU_SHIFT 14
#define sCR0_BSU_MASK 0x3
/* Auxiliary Configuration register */
#define ARM_SMMU_GR0_sACR 0x10
/* Identification registers */
#define ARM_SMMU_GR0_ID0 0x20
#define ARM_SMMU_GR0_ID1 0x24
#define ARM_SMMU_GR0_ID2 0x28
#define ARM_SMMU_GR0_ID3 0x2c
#define ARM_SMMU_GR0_ID4 0x30
#define ARM_SMMU_GR0_ID5 0x34
#define ARM_SMMU_GR0_ID6 0x38
#define ARM_SMMU_GR0_ID7 0x3c
#define ARM_SMMU_GR0_sGFSR 0x48
#define ARM_SMMU_GR0_sGFSYNR0 0x50
#define ARM_SMMU_GR0_sGFSYNR1 0x54
#define ARM_SMMU_GR0_sGFSYNR2 0x58
#define ID0_S1TS (1 << 30)
#define ID0_S2TS (1 << 29)
#define ID0_NTS (1 << 28)
#define ID0_SMS (1 << 27)
#define ID0_ATOSNS (1 << 26)
#define ID0_PTFS_NO_AARCH32 (1 << 25)
#define ID0_PTFS_NO_AARCH32S (1 << 24)
#define ID0_CTTW (1 << 14)
#define ID0_NUMIRPT_SHIFT 16
#define ID0_NUMIRPT_MASK 0xff
#define ID0_NUMSIDB_SHIFT 9
#define ID0_NUMSIDB_MASK 0xf
#define ID0_EXIDS (1 << 8)
#define ID0_NUMSMRG_SHIFT 0
#define ID0_NUMSMRG_MASK 0xff
#define ID1_PAGESIZE (1 << 31)
#define ID1_NUMPAGENDXB_SHIFT 28
#define ID1_NUMPAGENDXB_MASK 7
#define ID1_NUMS2CB_SHIFT 16
#define ID1_NUMS2CB_MASK 0xff
#define ID1_NUMCB_SHIFT 0
#define ID1_NUMCB_MASK 0xff
#define ID2_OAS_SHIFT 4
#define ID2_OAS_MASK 0xf
#define ID2_IAS_SHIFT 0
#define ID2_IAS_MASK 0xf
#define ID2_UBS_SHIFT 8
#define ID2_UBS_MASK 0xf
#define ID2_PTFS_4K (1 << 12)
#define ID2_PTFS_16K (1 << 13)
#define ID2_PTFS_64K (1 << 14)
#define ID2_VMID16 (1 << 15)
#define ID7_MAJOR_SHIFT 4
#define ID7_MAJOR_MASK 0xf
/* Global TLB invalidation */
#define ARM_SMMU_GR0_TLBIVMID 0x64
#define ARM_SMMU_GR0_TLBIALLNSNH 0x68
#define ARM_SMMU_GR0_TLBIALLH 0x6c
#define ARM_SMMU_GR0_sTLBGSYNC 0x70
#define ARM_SMMU_GR0_sTLBGSTATUS 0x74
#define sTLBGSTATUS_GSACTIVE (1 << 0)
/* Stream mapping registers */
#define ARM_SMMU_GR0_SMR(n) (0x800 + ((n) << 2))
#define SMR_VALID (1 << 31)
#define SMR_MASK_SHIFT 16
#define SMR_ID_SHIFT 0
#define ARM_SMMU_GR0_S2CR(n) (0xc00 + ((n) << 2))
#define S2CR_CBNDX_SHIFT 0
#define S2CR_CBNDX_MASK 0xff
#define S2CR_EXIDVALID (1 << 10)
#define S2CR_TYPE_SHIFT 16
#define S2CR_TYPE_MASK 0x3
enum arm_smmu_s2cr_type {
S2CR_TYPE_TRANS,
S2CR_TYPE_BYPASS,
S2CR_TYPE_FAULT,
};
#define S2CR_PRIVCFG_SHIFT 24
#define S2CR_PRIVCFG_MASK 0x3
enum arm_smmu_s2cr_privcfg {
S2CR_PRIVCFG_DEFAULT,
S2CR_PRIVCFG_DIPAN,
S2CR_PRIVCFG_UNPRIV,
S2CR_PRIVCFG_PRIV,
};
/* Context bank attribute registers */
#define ARM_SMMU_GR1_CBAR(n) (0x0 + ((n) << 2))
#define CBAR_VMID_SHIFT 0
#define CBAR_VMID_MASK 0xff
#define CBAR_S1_BPSHCFG_SHIFT 8
#define CBAR_S1_BPSHCFG_MASK 3
#define CBAR_S1_BPSHCFG_NSH 3
#define CBAR_S1_MEMATTR_SHIFT 12
#define CBAR_S1_MEMATTR_MASK 0xf
#define CBAR_S1_MEMATTR_WB 0xf
#define CBAR_TYPE_SHIFT 16
#define CBAR_TYPE_MASK 0x3
#define CBAR_TYPE_S2_TRANS (0 << CBAR_TYPE_SHIFT)
#define CBAR_TYPE_S1_TRANS_S2_BYPASS (1 << CBAR_TYPE_SHIFT)
#define CBAR_TYPE_S1_TRANS_S2_FAULT (2 << CBAR_TYPE_SHIFT)
#define CBAR_TYPE_S1_TRANS_S2_TRANS (3 << CBAR_TYPE_SHIFT)
#define CBAR_IRPTNDX_SHIFT 24
#define CBAR_IRPTNDX_MASK 0xff
#define ARM_SMMU_GR1_CBFRSYNRA(n) (0x400 + ((n) << 2))
#define ARM_SMMU_GR1_CBA2R(n) (0x800 + ((n) << 2))
#define CBA2R_RW64_32BIT (0 << 0)
#define CBA2R_RW64_64BIT (1 << 0)
#define CBA2R_VMID_SHIFT 16
#define CBA2R_VMID_MASK 0xffff
#define ARM_SMMU_CB_SCTLR 0x0
#define ARM_SMMU_CB_ACTLR 0x4
#define ARM_SMMU_CB_RESUME 0x8
#define ARM_SMMU_CB_TTBCR2 0x10
#define ARM_SMMU_CB_TTBR0 0x20
#define ARM_SMMU_CB_TTBR1 0x28
#define ARM_SMMU_CB_TTBCR 0x30
#define ARM_SMMU_CB_CONTEXTIDR 0x34
#define ARM_SMMU_CB_S1_MAIR0 0x38
#define ARM_SMMU_CB_S1_MAIR1 0x3c
#define ARM_SMMU_CB_PAR 0x50
#define ARM_SMMU_CB_FSR 0x58
#define ARM_SMMU_CB_FAR 0x60
#define ARM_SMMU_CB_FSYNR0 0x68
#define ARM_SMMU_CB_S1_TLBIVA 0x600
#define ARM_SMMU_CB_S1_TLBIASID 0x610
#define ARM_SMMU_CB_S1_TLBIVAL 0x620
#define ARM_SMMU_CB_S2_TLBIIPAS2 0x630
#define ARM_SMMU_CB_S2_TLBIIPAS2L 0x638
#define ARM_SMMU_CB_TLBSYNC 0x7f0
#define ARM_SMMU_CB_TLBSTATUS 0x7f4
#define ARM_SMMU_CB_ATS1PR 0x800
#define ARM_SMMU_CB_ATSR 0x8f0
#define SCTLR_S1_ASIDPNE (1 << 12)
#define SCTLR_CFCFG (1 << 7)
#define SCTLR_CFIE (1 << 6)
#define SCTLR_CFRE (1 << 5)
#define SCTLR_E (1 << 4)
#define SCTLR_AFE (1 << 2)
#define SCTLR_TRE (1 << 1)
#define SCTLR_M (1 << 0)
#define CB_PAR_F (1 << 0)
#define ATSR_ACTIVE (1 << 0)
#define RESUME_RETRY (0 << 0)
#define RESUME_TERMINATE (1 << 0)
#define TTBCR2_SEP_SHIFT 15
#define TTBCR2_SEP_UPSTREAM (0x7 << TTBCR2_SEP_SHIFT)
#define TTBCR2_AS (1 << 4)
#define TTBRn_ASID_SHIFT 48
#define FSR_MULTI (1 << 31)
#define FSR_SS (1 << 30)
#define FSR_UUT (1 << 8)
#define FSR_ASF (1 << 7)
#define FSR_TLBLKF (1 << 6)
#define FSR_TLBMCF (1 << 5)
#define FSR_EF (1 << 4)
#define FSR_PF (1 << 3)
#define FSR_AFF (1 << 2)
#define FSR_TF (1 << 1)
#define FSR_IGN (FSR_AFF | FSR_ASF | \
FSR_TLBMCF | FSR_TLBLKF)
#define FSR_FAULT (FSR_MULTI | FSR_SS | FSR_UUT | \
FSR_EF | FSR_PF | FSR_TF | FSR_IGN)
#define FSYNR0_WNR (1 << 4)
#endif /* _ARM_SMMU_REGS_H */
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -444,13 +444,18 @@ static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr,
struct iommu_dma_cookie *cookie = domain->iova_cookie;
struct iova_domain *iovad = &cookie->iovad;
size_t iova_off = iova_offset(iovad, dma_addr);
struct iommu_iotlb_gather iotlb_gather;
size_t unmapped;
dma_addr -= iova_off;
size = iova_align(iovad, size + iova_off);
iommu_iotlb_gather_init(&iotlb_gather);
unmapped = iommu_unmap_fast(domain, dma_addr, size, &iotlb_gather);
WARN_ON(unmapped != size);
WARN_ON(iommu_unmap_fast(domain, dma_addr, size) != size);
if (!cookie->fq_domain)
iommu_tlb_sync(domain);
iommu_tlb_sync(domain, &iotlb_gather);
iommu_dma_free_iova(cookie, dma_addr, size);
}
......
......@@ -1130,7 +1130,8 @@ static void exynos_iommu_tlb_invalidate_entry(struct exynos_iommu_domain *domain
}
static size_t exynos_iommu_unmap(struct iommu_domain *iommu_domain,
unsigned long l_iova, size_t size)
unsigned long l_iova, size_t size,
struct iommu_iotlb_gather *gather)
{
struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain);
sysmmu_iova_t iova = (sysmmu_iova_t)l_iova;
......
......@@ -5153,7 +5153,8 @@ static int intel_iommu_map(struct iommu_domain *domain,
}
static size_t intel_iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size)
unsigned long iova, size_t size,
struct iommu_iotlb_gather *gather)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct page *freelist = NULL;
......
......@@ -362,7 +362,8 @@ static bool arm_v7s_pte_is_cont(arm_v7s_iopte pte, int lvl)
return false;
}
static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *, unsigned long,
static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *,
struct iommu_iotlb_gather *, unsigned long,
size_t, int, arm_v7s_iopte *);
static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
......@@ -383,7 +384,7 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
size_t sz = ARM_V7S_BLOCK_SIZE(lvl);
tblp = ptep - ARM_V7S_LVL_IDX(iova, lvl);
if (WARN_ON(__arm_v7s_unmap(data, iova + i * sz,
if (WARN_ON(__arm_v7s_unmap(data, NULL, iova + i * sz,
sz, lvl, tblp) != sz))
return -EINVAL;
} else if (ptep[i]) {
......@@ -493,9 +494,8 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
* a chance for anything to kick off a table walk for the new iova.
*/
if (iop->cfg.quirks & IO_PGTABLE_QUIRK_TLBI_ON_MAP) {
io_pgtable_tlb_add_flush(iop, iova, size,
ARM_V7S_BLOCK_SIZE(2), false);
io_pgtable_tlb_sync(iop);
io_pgtable_tlb_flush_walk(iop, iova, size,
ARM_V7S_BLOCK_SIZE(2));
} else {
wmb();
}
......@@ -541,12 +541,12 @@ static arm_v7s_iopte arm_v7s_split_cont(struct arm_v7s_io_pgtable *data,
__arm_v7s_pte_sync(ptep, ARM_V7S_CONT_PAGES, &iop->cfg);
size *= ARM_V7S_CONT_PAGES;
io_pgtable_tlb_add_flush(iop, iova, size, size, true);
io_pgtable_tlb_sync(iop);
io_pgtable_tlb_flush_leaf(iop, iova, size, size);
return pte;
}
static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
struct iommu_iotlb_gather *gather,
unsigned long iova, size_t size,
arm_v7s_iopte blk_pte,
arm_v7s_iopte *ptep)
......@@ -583,15 +583,15 @@ static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
return 0;
tablep = iopte_deref(pte, 1);
return __arm_v7s_unmap(data, iova, size, 2, tablep);
return __arm_v7s_unmap(data, gather, iova, size, 2, tablep);
}
io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true);
io_pgtable_tlb_sync(&data->iop);
io_pgtable_tlb_add_page(&data->iop, gather, iova, size);
return size;
}
static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
struct iommu_iotlb_gather *gather,
unsigned long iova, size_t size, int lvl,
arm_v7s_iopte *ptep)
{
......@@ -638,9 +638,8 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
for (i = 0; i < num_entries; i++) {
if (ARM_V7S_PTE_IS_TABLE(pte[i], lvl)) {
/* Also flush any partial walks */
io_pgtable_tlb_add_flush(iop, iova, blk_size,
ARM_V7S_BLOCK_SIZE(lvl + 1), false);
io_pgtable_tlb_sync(iop);
io_pgtable_tlb_flush_walk(iop, iova, blk_size,
ARM_V7S_BLOCK_SIZE(lvl + 1));
ptep = iopte_deref(pte[i], lvl);
__arm_v7s_free_table(ptep, lvl + 1, data);
} else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) {
......@@ -651,8 +650,7 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
*/
smp_wmb();
} else {
io_pgtable_tlb_add_flush(iop, iova, blk_size,
blk_size, true);
io_pgtable_tlb_add_page(iop, gather, iova, blk_size);
}
iova += blk_size;
}
......@@ -662,23 +660,24 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
* Insert a table at the next level to map the old region,
* minus the part we want to unmap
*/
return arm_v7s_split_blk_unmap(data, iova, size, pte[0], ptep);
return arm_v7s_split_blk_unmap(data, gather, iova, size, pte[0],
ptep);
}
/* Keep on walkin' */
ptep = iopte_deref(pte[0], lvl);
return __arm_v7s_unmap(data, iova, size, lvl + 1, ptep);
return __arm_v7s_unmap(data, gather, iova, size, lvl + 1, ptep);
}
static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
size_t size)
size_t size, struct iommu_iotlb_gather *gather)
{
struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
if (WARN_ON(upper_32_bits(iova)))
return 0;
return __arm_v7s_unmap(data, iova, size, 1, data->pgd);
return __arm_v7s_unmap(data, gather, iova, size, 1, data->pgd);
}
static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops,
......@@ -806,22 +805,24 @@ static void dummy_tlb_flush_all(void *cookie)
WARN_ON(cookie != cfg_cookie);
}
static void dummy_tlb_add_flush(unsigned long iova, size_t size,
size_t granule, bool leaf, void *cookie)
static void dummy_tlb_flush(unsigned long iova, size_t size, size_t granule,
void *cookie)
{
WARN_ON(cookie != cfg_cookie);
WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
}
static void dummy_tlb_sync(void *cookie)
static void dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
unsigned long iova, size_t granule, void *cookie)
{
WARN_ON(cookie != cfg_cookie);
dummy_tlb_flush(iova, granule, granule, cookie);
}
static const struct iommu_gather_ops dummy_tlb_ops = {
static const struct iommu_flush_ops dummy_tlb_ops = {
.tlb_flush_all = dummy_tlb_flush_all,
.tlb_add_flush = dummy_tlb_add_flush,
.tlb_sync = dummy_tlb_sync,
.tlb_flush_walk = dummy_tlb_flush,
.tlb_flush_leaf = dummy_tlb_flush,
.tlb_add_page = dummy_tlb_add_page,
};
#define __FAIL(ops) ({ \
......@@ -896,7 +897,7 @@ static int __init arm_v7s_do_selftests(void)
size = 1UL << __ffs(cfg.pgsize_bitmap);
while (i < loopnr) {
iova_start = i * SZ_16M;
if (ops->unmap(ops, iova_start + size, size) != size)
if (ops->unmap(ops, iova_start + size, size, NULL) != size)
return __FAIL(ops);
/* Remap of partial unmap */
......@@ -914,7 +915,7 @@ static int __init arm_v7s_do_selftests(void)
for_each_set_bit(i, &cfg.pgsize_bitmap, BITS_PER_LONG) {
size = 1UL << i;
if (ops->unmap(ops, iova, size) != size)
if (ops->unmap(ops, iova, size, NULL) != size)
return __FAIL(ops);
if (ops->iova_to_phys(ops, iova + 42))
......
......@@ -12,7 +12,6 @@
#include <linux/atomic.h>
#include <linux/bitops.h>
#include <linux/io-pgtable.h>
#include <linux/iommu.h>
#include <linux/kernel.h>
#include <linux/sizes.h>
#include <linux/slab.h>
......@@ -290,6 +289,7 @@ static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
}
static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
struct iommu_iotlb_gather *gather,
unsigned long iova, size_t size, int lvl,
arm_lpae_iopte *ptep);
......@@ -335,8 +335,10 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data);
if (WARN_ON(__arm_lpae_unmap(data, iova, sz, lvl, tblp) != sz))
if (__arm_lpae_unmap(data, NULL, iova, sz, lvl, tblp) != sz) {
WARN_ON(1);
return -EINVAL;
}
}
__arm_lpae_init_pte(data, paddr, prot, lvl, ptep);
......@@ -537,6 +539,7 @@ static void arm_lpae_free_pgtable(struct io_pgtable *iop)
}
static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
struct iommu_iotlb_gather *gather,
unsigned long iova, size_t size,
arm_lpae_iopte blk_pte, int lvl,
arm_lpae_iopte *ptep)
......@@ -582,15 +585,15 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
tablep = iopte_deref(pte, data);
} else if (unmap_idx >= 0) {
io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true);
io_pgtable_tlb_sync(&data->iop);
io_pgtable_tlb_add_page(&data->iop, gather, iova, size);
return size;
}
return __arm_lpae_unmap(data, iova, size, lvl, tablep);
return __arm_lpae_unmap(data, gather, iova, size, lvl, tablep);
}
static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
struct iommu_iotlb_gather *gather,
unsigned long iova, size_t size, int lvl,
arm_lpae_iopte *ptep)
{
......@@ -612,9 +615,8 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
if (!iopte_leaf(pte, lvl, iop->fmt)) {
/* Also flush any partial walks */
io_pgtable_tlb_add_flush(iop, iova, size,
ARM_LPAE_GRANULE(data), false);
io_pgtable_tlb_sync(iop);
io_pgtable_tlb_flush_walk(iop, iova, size,
ARM_LPAE_GRANULE(data));
ptep = iopte_deref(pte, data);
__arm_lpae_free_pgtable(data, lvl + 1, ptep);
} else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) {
......@@ -625,7 +627,7 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
*/
smp_wmb();
} else {
io_pgtable_tlb_add_flush(iop, iova, size, size, true);
io_pgtable_tlb_add_page(iop, gather, iova, size);
}
return size;
......@@ -634,17 +636,17 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
* Insert a table at the next level to map the old region,
* minus the part we want to unmap
*/
return arm_lpae_split_blk_unmap(data, iova, size, pte,
return arm_lpae_split_blk_unmap(data, gather, iova, size, pte,
lvl + 1, ptep);
}
/* Keep on walkin' */
ptep = iopte_deref(pte, data);
return __arm_lpae_unmap(data, iova, size, lvl + 1, ptep);
return __arm_lpae_unmap(data, gather, iova, size, lvl + 1, ptep);
}
static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
size_t size)
size_t size, struct iommu_iotlb_gather *gather)
{
struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
arm_lpae_iopte *ptep = data->pgd;
......@@ -653,7 +655,7 @@ static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias)))
return 0;
return __arm_lpae_unmap(data, iova, size, lvl, ptep);
return __arm_lpae_unmap(data, gather, iova, size, lvl, ptep);
}
static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
......@@ -1070,22 +1072,24 @@ static void dummy_tlb_flush_all(void *cookie)
WARN_ON(cookie != cfg_cookie);
}
static void dummy_tlb_add_flush(unsigned long iova, size_t size,
size_t granule, bool leaf, void *cookie)
static void dummy_tlb_flush(unsigned long iova, size_t size, size_t granule,
void *cookie)
{
WARN_ON(cookie != cfg_cookie);
WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
}
static void dummy_tlb_sync(void *cookie)
static void dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
unsigned long iova, size_t granule, void *cookie)
{
WARN_ON(cookie != cfg_cookie);
dummy_tlb_flush(iova, granule, granule, cookie);
}
static const struct iommu_gather_ops dummy_tlb_ops __initconst = {
static const struct iommu_flush_ops dummy_tlb_ops __initconst = {
.tlb_flush_all = dummy_tlb_flush_all,
.tlb_add_flush = dummy_tlb_add_flush,
.tlb_sync = dummy_tlb_sync,
.tlb_flush_walk = dummy_tlb_flush,
.tlb_flush_leaf = dummy_tlb_flush,
.tlb_add_page = dummy_tlb_add_page,
};
static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops)
......@@ -1168,7 +1172,7 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg)
/* Partial unmap */
size = 1UL << __ffs(cfg->pgsize_bitmap);
if (ops->unmap(ops, SZ_1G + size, size) != size)
if (ops->unmap(ops, SZ_1G + size, size, NULL) != size)
return __FAIL(ops, i);
/* Remap of partial unmap */
......@@ -1183,7 +1187,7 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg)
for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) {
size = 1UL << j;
if (ops->unmap(ops, iova, size) != size)
if (ops->unmap(ops, iova, size, NULL) != size)
return __FAIL(ops, i);
if (ops->iova_to_phys(ops, iova + 42))
......
......@@ -1862,7 +1862,7 @@ EXPORT_SYMBOL_GPL(iommu_map);
static size_t __iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size,
bool sync)
struct iommu_iotlb_gather *iotlb_gather)
{
const struct iommu_ops *ops = domain->ops;
size_t unmapped_page, unmapped = 0;
......@@ -1899,13 +1899,10 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
while (unmapped < size) {
size_t pgsize = iommu_pgsize(domain, iova, size - unmapped);
unmapped_page = ops->unmap(domain, iova, pgsize);
unmapped_page = ops->unmap(domain, iova, pgsize, iotlb_gather);
if (!unmapped_page)
break;
if (sync && ops->iotlb_range_add)
ops->iotlb_range_add(domain, iova, pgsize);
pr_debug("unmapped: iova 0x%lx size 0x%zx\n",
iova, unmapped_page);
......@@ -1913,9 +1910,6 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
unmapped += unmapped_page;
}
if (sync && ops->iotlb_sync)
ops->iotlb_sync(domain);
trace_unmap(orig_iova, size, unmapped);
return unmapped;
}
......@@ -1923,14 +1917,22 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
size_t iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size)
{
return __iommu_unmap(domain, iova, size, true);
struct iommu_iotlb_gather iotlb_gather;
size_t ret;
iommu_iotlb_gather_init(&iotlb_gather);
ret = __iommu_unmap(domain, iova, size, &iotlb_gather);
iommu_tlb_sync(domain, &iotlb_gather);
return ret;
}
EXPORT_SYMBOL_GPL(iommu_unmap);
size_t iommu_unmap_fast(struct iommu_domain *domain,
unsigned long iova, size_t size)
unsigned long iova, size_t size,
struct iommu_iotlb_gather *iotlb_gather)
{
return __iommu_unmap(domain, iova, size, false);
return __iommu_unmap(domain, iova, size, iotlb_gather);
}
EXPORT_SYMBOL_GPL(iommu_unmap_fast);
......
......@@ -361,16 +361,16 @@ static void ipmmu_tlb_flush_all(void *cookie)
ipmmu_tlb_invalidate(domain);
}
static void ipmmu_tlb_add_flush(unsigned long iova, size_t size,
size_t granule, bool leaf, void *cookie)
static void ipmmu_tlb_flush(unsigned long iova, size_t size,
size_t granule, void *cookie)
{
/* The hardware doesn't support selective TLB flush. */
ipmmu_tlb_flush_all(cookie);
}
static const struct iommu_gather_ops ipmmu_gather_ops = {
static const struct iommu_flush_ops ipmmu_flush_ops = {
.tlb_flush_all = ipmmu_tlb_flush_all,
.tlb_add_flush = ipmmu_tlb_add_flush,
.tlb_sync = ipmmu_tlb_flush_all,
.tlb_flush_walk = ipmmu_tlb_flush,
.tlb_flush_leaf = ipmmu_tlb_flush,
};
/* -----------------------------------------------------------------------------
......@@ -480,7 +480,7 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
domain->cfg.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K;
domain->cfg.ias = 32;
domain->cfg.oas = 40;
domain->cfg.tlb = &ipmmu_gather_ops;
domain->cfg.tlb = &ipmmu_flush_ops;
domain->io_domain.geometry.aperture_end = DMA_BIT_MASK(32);
domain->io_domain.geometry.force_aperture = true;
/*
......@@ -733,14 +733,14 @@ static int ipmmu_map(struct iommu_domain *io_domain, unsigned long iova,
}
static size_t ipmmu_unmap(struct iommu_domain *io_domain, unsigned long iova,
size_t size)
size_t size, struct iommu_iotlb_gather *gather)
{
struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
return domain->iop->unmap(domain->iop, iova, size);
return domain->iop->unmap(domain->iop, iova, size, gather);
}
static void ipmmu_iotlb_sync(struct iommu_domain *io_domain)
static void ipmmu_flush_iotlb_all(struct iommu_domain *io_domain)
{
struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
......@@ -748,6 +748,12 @@ static void ipmmu_iotlb_sync(struct iommu_domain *io_domain)
ipmmu_tlb_flush_all(domain);
}
static void ipmmu_iotlb_sync(struct iommu_domain *io_domain,
struct iommu_iotlb_gather *gather)
{
ipmmu_flush_iotlb_all(io_domain);
}
static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain,
dma_addr_t iova)
{
......@@ -957,7 +963,7 @@ static const struct iommu_ops ipmmu_ops = {
.detach_dev = ipmmu_detach_device,
.map = ipmmu_map,
.unmap = ipmmu_unmap,
.flush_iotlb_all = ipmmu_iotlb_sync,
.flush_iotlb_all = ipmmu_flush_iotlb_all,
.iotlb_sync = ipmmu_iotlb_sync,
.iova_to_phys = ipmmu_iova_to_phys,
.add_device = ipmmu_add_device,
......
......@@ -168,20 +168,29 @@ static void __flush_iotlb_range(unsigned long iova, size_t size,
return;
}
static void __flush_iotlb_sync(void *cookie)
static void __flush_iotlb_walk(unsigned long iova, size_t size,
size_t granule, void *cookie)
{
/*
* Nothing is needed here, the barrier to guarantee
* completion of the tlb sync operation is implicitly
* taken care when the iommu client does a writel before
* kick starting the other master.
*/
__flush_iotlb_range(iova, size, granule, false, cookie);
}
static void __flush_iotlb_leaf(unsigned long iova, size_t size,
size_t granule, void *cookie)
{
__flush_iotlb_range(iova, size, granule, true, cookie);
}
static const struct iommu_gather_ops msm_iommu_gather_ops = {
static void __flush_iotlb_page(struct iommu_iotlb_gather *gather,
unsigned long iova, size_t granule, void *cookie)
{
__flush_iotlb_range(iova, granule, granule, true, cookie);
}
static const struct iommu_flush_ops msm_iommu_flush_ops = {
.tlb_flush_all = __flush_iotlb,
.tlb_add_flush = __flush_iotlb_range,
.tlb_sync = __flush_iotlb_sync,
.tlb_flush_walk = __flush_iotlb_walk,
.tlb_flush_leaf = __flush_iotlb_leaf,
.tlb_add_page = __flush_iotlb_page,
};
static int msm_iommu_alloc_ctx(unsigned long *map, int start, int end)
......@@ -345,7 +354,7 @@ static int msm_iommu_domain_config(struct msm_priv *priv)
.pgsize_bitmap = msm_iommu_ops.pgsize_bitmap,
.ias = 32,
.oas = 32,
.tlb = &msm_iommu_gather_ops,
.tlb = &msm_iommu_flush_ops,
.iommu_dev = priv->dev,
};
......@@ -509,13 +518,13 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long iova,
}
static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
size_t len)
size_t len, struct iommu_iotlb_gather *gather)
{
struct msm_priv *priv = to_msm_priv(domain);
unsigned long flags;
spin_lock_irqsave(&priv->pgtlock, flags);
len = priv->iop->unmap(priv->iop, iova, len);
len = priv->iop->unmap(priv->iop, iova, len, gather);
spin_unlock_irqrestore(&priv->pgtlock, flags);
return len;
......@@ -691,6 +700,13 @@ static struct iommu_ops msm_iommu_ops = {
.detach_dev = msm_iommu_detach_dev,
.map = msm_iommu_map,
.unmap = msm_iommu_unmap,
/*
* Nothing is needed here, the barrier to guarantee
* completion of the tlb sync operation is implicitly
* taken care when the iommu client does a writel before
* kick starting the other master.
*/
.iotlb_sync = NULL,
.iova_to_phys = msm_iommu_iova_to_phys,
.add_device = msm_iommu_add_device,
.remove_device = msm_iommu_remove_device,
......
......@@ -188,10 +188,32 @@ static void mtk_iommu_tlb_sync(void *cookie)
}
}
static const struct iommu_gather_ops mtk_iommu_gather_ops = {
static void mtk_iommu_tlb_flush_walk(unsigned long iova, size_t size,
size_t granule, void *cookie)
{
mtk_iommu_tlb_add_flush_nosync(iova, size, granule, false, cookie);
mtk_iommu_tlb_sync(cookie);
}
static void mtk_iommu_tlb_flush_leaf(unsigned long iova, size_t size,
size_t granule, void *cookie)
{
mtk_iommu_tlb_add_flush_nosync(iova, size, granule, true, cookie);
mtk_iommu_tlb_sync(cookie);
}
static void mtk_iommu_tlb_flush_page_nosync(struct iommu_iotlb_gather *gather,
unsigned long iova, size_t granule,
void *cookie)
{
mtk_iommu_tlb_add_flush_nosync(iova, granule, granule, true, cookie);
}
static const struct iommu_flush_ops mtk_iommu_flush_ops = {
.tlb_flush_all = mtk_iommu_tlb_flush_all,
.tlb_add_flush = mtk_iommu_tlb_add_flush_nosync,
.tlb_sync = mtk_iommu_tlb_sync,
.tlb_flush_walk = mtk_iommu_tlb_flush_walk,
.tlb_flush_leaf = mtk_iommu_tlb_flush_leaf,
.tlb_add_page = mtk_iommu_tlb_flush_page_nosync,
};
static irqreturn_t mtk_iommu_isr(int irq, void *dev_id)
......@@ -267,7 +289,7 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom)
.pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap,
.ias = 32,
.oas = 32,
.tlb = &mtk_iommu_gather_ops,
.tlb = &mtk_iommu_flush_ops,
.iommu_dev = data->dev,
};
......@@ -371,20 +393,27 @@ static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova,
}
static size_t mtk_iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size)
unsigned long iova, size_t size,
struct iommu_iotlb_gather *gather)
{
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
unsigned long flags;
size_t unmapsz;
spin_lock_irqsave(&dom->pgtlock, flags);
unmapsz = dom->iop->unmap(dom->iop, iova, size);
unmapsz = dom->iop->unmap(dom->iop, iova, size, gather);
spin_unlock_irqrestore(&dom->pgtlock, flags);
return unmapsz;
}
static void mtk_iommu_iotlb_sync(struct iommu_domain *domain)
static void mtk_iommu_flush_iotlb_all(struct iommu_domain *domain)
{
mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data());
}
static void mtk_iommu_iotlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather)
{
mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data());
}
......@@ -490,7 +519,7 @@ static const struct iommu_ops mtk_iommu_ops = {
.detach_dev = mtk_iommu_detach_device,
.map = mtk_iommu_map,
.unmap = mtk_iommu_unmap,
.flush_iotlb_all = mtk_iommu_iotlb_sync,
.flush_iotlb_all = mtk_iommu_flush_iotlb_all,
.iotlb_sync = mtk_iommu_iotlb_sync,
.iova_to_phys = mtk_iommu_iova_to_phys,
.add_device = mtk_iommu_add_device,
......
......@@ -324,7 +324,8 @@ static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova,
}
static size_t mtk_iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size)
unsigned long iova, size_t size,
struct iommu_iotlb_gather *gather)
{
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
unsigned long flags;
......
......@@ -1149,7 +1149,7 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
}
static size_t omap_iommu_unmap(struct iommu_domain *domain, unsigned long da,
size_t size)
size_t size, struct iommu_iotlb_gather *gather)
{
struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
struct device *dev = omap_domain->dev;
......
......@@ -7,6 +7,7 @@
*/
#include <linux/atomic.h>
#include <linux/bitfield.h>
#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/dma-iommu.h>
......@@ -32,7 +33,7 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include "arm-smmu-regs.h"
#include "arm-smmu.h"
#define SMMU_INTR_SEL_NS 0x2000
......@@ -155,7 +156,7 @@ static void qcom_iommu_tlb_inv_range_nosync(unsigned long iova, size_t size,
struct qcom_iommu_ctx *ctx = to_ctx(fwspec, fwspec->ids[i]);
size_t s = size;
iova &= ~12UL;
iova = (iova >> 12) << 12;
iova |= ctx->asid;
do {
iommu_writel(ctx, reg, iova);
......@@ -164,10 +165,32 @@ static void qcom_iommu_tlb_inv_range_nosync(unsigned long iova, size_t size,
}
}
static const struct iommu_gather_ops qcom_gather_ops = {
static void qcom_iommu_tlb_flush_walk(unsigned long iova, size_t size,
size_t granule, void *cookie)
{
qcom_iommu_tlb_inv_range_nosync(iova, size, granule, false, cookie);
qcom_iommu_tlb_sync(cookie);
}
static void qcom_iommu_tlb_flush_leaf(unsigned long iova, size_t size,
size_t granule, void *cookie)
{
qcom_iommu_tlb_inv_range_nosync(iova, size, granule, true, cookie);
qcom_iommu_tlb_sync(cookie);
}
static void qcom_iommu_tlb_add_page(struct iommu_iotlb_gather *gather,
unsigned long iova, size_t granule,
void *cookie)
{
qcom_iommu_tlb_inv_range_nosync(iova, granule, granule, true, cookie);
}
static const struct iommu_flush_ops qcom_flush_ops = {
.tlb_flush_all = qcom_iommu_tlb_inv_context,
.tlb_add_flush = qcom_iommu_tlb_inv_range_nosync,
.tlb_sync = qcom_iommu_tlb_sync,
.tlb_flush_walk = qcom_iommu_tlb_flush_walk,
.tlb_flush_leaf = qcom_iommu_tlb_flush_leaf,
.tlb_add_page = qcom_iommu_tlb_add_page,
};
static irqreturn_t qcom_iommu_fault(int irq, void *dev)
......@@ -215,7 +238,7 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain,
.pgsize_bitmap = qcom_iommu_ops.pgsize_bitmap,
.ias = 32,
.oas = 40,
.tlb = &qcom_gather_ops,
.tlb = &qcom_flush_ops,
.iommu_dev = qcom_iommu->dev,
};
......@@ -247,16 +270,16 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain,
/* TTBRs */
iommu_writeq(ctx, ARM_SMMU_CB_TTBR0,
pgtbl_cfg.arm_lpae_s1_cfg.ttbr[0] |
((u64)ctx->asid << TTBRn_ASID_SHIFT));
FIELD_PREP(TTBRn_ASID, ctx->asid));
iommu_writeq(ctx, ARM_SMMU_CB_TTBR1,
pgtbl_cfg.arm_lpae_s1_cfg.ttbr[1] |
((u64)ctx->asid << TTBRn_ASID_SHIFT));
FIELD_PREP(TTBRn_ASID, ctx->asid));
/* TTBCR */
iommu_writel(ctx, ARM_SMMU_CB_TTBCR2,
/* TCR */
iommu_writel(ctx, ARM_SMMU_CB_TCR2,
(pgtbl_cfg.arm_lpae_s1_cfg.tcr >> 32) |
TTBCR2_SEP_UPSTREAM);
iommu_writel(ctx, ARM_SMMU_CB_TTBCR,
FIELD_PREP(TCR2_SEP, TCR2_SEP_UPSTREAM));
iommu_writel(ctx, ARM_SMMU_CB_TCR,
pgtbl_cfg.arm_lpae_s1_cfg.tcr);
/* MAIRs (stage-1 only) */
......@@ -417,7 +440,7 @@ static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova,
}
static size_t qcom_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
size_t size)
size_t size, struct iommu_iotlb_gather *gather)
{
size_t ret;
unsigned long flags;
......@@ -434,14 +457,14 @@ static size_t qcom_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
*/
pm_runtime_get_sync(qcom_domain->iommu->dev);
spin_lock_irqsave(&qcom_domain->pgtbl_lock, flags);
ret = ops->unmap(ops, iova, size);
ret = ops->unmap(ops, iova, size, gather);
spin_unlock_irqrestore(&qcom_domain->pgtbl_lock, flags);
pm_runtime_put_sync(qcom_domain->iommu->dev);
return ret;
}
static void qcom_iommu_iotlb_sync(struct iommu_domain *domain)
static void qcom_iommu_flush_iotlb_all(struct iommu_domain *domain)
{
struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain);
struct io_pgtable *pgtable = container_of(qcom_domain->pgtbl_ops,
......@@ -454,6 +477,12 @@ static void qcom_iommu_iotlb_sync(struct iommu_domain *domain)
pm_runtime_put_sync(qcom_domain->iommu->dev);
}
static void qcom_iommu_iotlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather)
{
qcom_iommu_flush_iotlb_all(domain);
}
static phys_addr_t qcom_iommu_iova_to_phys(struct iommu_domain *domain,
dma_addr_t iova)
{
......@@ -581,7 +610,7 @@ static const struct iommu_ops qcom_iommu_ops = {
.detach_dev = qcom_iommu_detach_dev,
.map = qcom_iommu_map,
.unmap = qcom_iommu_unmap,
.flush_iotlb_all = qcom_iommu_iotlb_sync,
.flush_iotlb_all = qcom_iommu_flush_iotlb_all,
.iotlb_sync = qcom_iommu_iotlb_sync,
.iova_to_phys = qcom_iommu_iova_to_phys,
.add_device = qcom_iommu_add_device,
......
......@@ -794,7 +794,7 @@ static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova,
}
static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova,
size_t size)
size_t size, struct iommu_iotlb_gather *gather)
{
struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
unsigned long flags;
......
......@@ -314,7 +314,8 @@ static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
}
static size_t s390_iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size)
unsigned long iova, size_t size,
struct iommu_iotlb_gather *gather)
{
struct s390_domain *s390_domain = to_s390_domain(domain);
int flags = ZPCI_PTE_INVALID;
......
......@@ -207,7 +207,7 @@ static inline int __gart_iommu_unmap(struct gart_device *gart,
}
static size_t gart_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
size_t bytes)
size_t bytes, struct iommu_iotlb_gather *gather)
{
struct gart_device *gart = gart_handle;
int err;
......@@ -273,11 +273,17 @@ static int gart_iommu_of_xlate(struct device *dev,
return 0;
}
static void gart_iommu_sync(struct iommu_domain *domain)
static void gart_iommu_sync_map(struct iommu_domain *domain)
{
FLUSH_GART_REGS(gart_handle);
}
static void gart_iommu_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather)
{
gart_iommu_sync_map(domain);
}
static const struct iommu_ops gart_iommu_ops = {
.capable = gart_iommu_capable,
.domain_alloc = gart_iommu_domain_alloc,
......@@ -292,7 +298,7 @@ static const struct iommu_ops gart_iommu_ops = {
.iova_to_phys = gart_iommu_iova_to_phys,
.pgsize_bitmap = GART_IOMMU_PGSIZES,
.of_xlate = gart_iommu_of_xlate,
.iotlb_sync_map = gart_iommu_sync,
.iotlb_sync_map = gart_iommu_sync_map,
.iotlb_sync = gart_iommu_sync,
};
......
......@@ -680,7 +680,7 @@ static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova,
}
static size_t tegra_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
size_t size)
size_t size, struct iommu_iotlb_gather *gather)
{
struct tegra_smmu_as *as = to_smmu_as(domain);
dma_addr_t pte_dma;
......
......@@ -751,7 +751,7 @@ static int viommu_map(struct iommu_domain *domain, unsigned long iova,
}
static size_t viommu_unmap(struct iommu_domain *domain, unsigned long iova,
size_t size)
size_t size, struct iommu_iotlb_gather *gather)
{
int ret = 0;
size_t unmapped;
......@@ -797,7 +797,8 @@ static phys_addr_t viommu_iova_to_phys(struct iommu_domain *domain,
return paddr;
}
static void viommu_iotlb_sync(struct iommu_domain *domain)
static void viommu_iotlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather)
{
struct viommu_domain *vdomain = to_viommu_domain(domain);
......
......@@ -650,12 +650,13 @@ static int vfio_iommu_type1_unpin_pages(void *iommu_data,
}
static long vfio_sync_unpin(struct vfio_dma *dma, struct vfio_domain *domain,
struct list_head *regions)
struct list_head *regions,
struct iommu_iotlb_gather *iotlb_gather)
{
long unlocked = 0;
struct vfio_regions *entry, *next;
iommu_tlb_sync(domain->domain);
iommu_tlb_sync(domain->domain, iotlb_gather);
list_for_each_entry_safe(entry, next, regions, list) {
unlocked += vfio_unpin_pages_remote(dma,
......@@ -685,18 +686,19 @@ static size_t unmap_unpin_fast(struct vfio_domain *domain,
struct vfio_dma *dma, dma_addr_t *iova,
size_t len, phys_addr_t phys, long *unlocked,
struct list_head *unmapped_list,
int *unmapped_cnt)
int *unmapped_cnt,
struct iommu_iotlb_gather *iotlb_gather)
{
size_t unmapped = 0;
struct vfio_regions *entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (entry) {
unmapped = iommu_unmap_fast(domain->domain, *iova, len);
unmapped = iommu_unmap_fast(domain->domain, *iova, len,
iotlb_gather);
if (!unmapped) {
kfree(entry);
} else {
iommu_tlb_range_add(domain->domain, *iova, unmapped);
entry->iova = *iova;
entry->phys = phys;
entry->len = unmapped;
......@@ -712,8 +714,8 @@ static size_t unmap_unpin_fast(struct vfio_domain *domain,
* or in case of errors.
*/
if (*unmapped_cnt >= VFIO_IOMMU_TLB_SYNC_MAX || !unmapped) {
*unlocked += vfio_sync_unpin(dma, domain,
unmapped_list);
*unlocked += vfio_sync_unpin(dma, domain, unmapped_list,
iotlb_gather);
*unmapped_cnt = 0;
}
......@@ -744,6 +746,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
dma_addr_t iova = dma->iova, end = dma->iova + dma->size;
struct vfio_domain *domain, *d;
LIST_HEAD(unmapped_region_list);
struct iommu_iotlb_gather iotlb_gather;
int unmapped_region_cnt = 0;
long unlocked = 0;
......@@ -768,6 +771,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
cond_resched();
}
iommu_iotlb_gather_init(&iotlb_gather);
while (iova < end) {
size_t unmapped, len;
phys_addr_t phys, next;
......@@ -796,7 +800,8 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
*/
unmapped = unmap_unpin_fast(domain, dma, &iova, len, phys,
&unlocked, &unmapped_region_list,
&unmapped_region_cnt);
&unmapped_region_cnt,
&iotlb_gather);
if (!unmapped) {
unmapped = unmap_unpin_slow(domain, dma, &iova, len,
phys, &unlocked);
......@@ -807,8 +812,10 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
dma->iommu_mapped = false;
if (unmapped_region_cnt)
unlocked += vfio_sync_unpin(dma, domain, &unmapped_region_list);
if (unmapped_region_cnt) {
unlocked += vfio_sync_unpin(dma, domain, &unmapped_region_list,
&iotlb_gather);
}
if (do_accounting) {
vfio_lock_acct(dma, -unlocked, true);
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __IO_PGTABLE_H
#define __IO_PGTABLE_H
#include <linux/bitops.h>
#include <linux/iommu.h>
/*
* Public API for use by IOMMU drivers
......@@ -17,22 +19,31 @@ enum io_pgtable_fmt {
};
/**
* struct iommu_gather_ops - IOMMU callbacks for TLB and page table management.
* struct iommu_flush_ops - IOMMU callbacks for TLB and page table management.
*
* @tlb_flush_all: Synchronously invalidate the entire TLB context.
* @tlb_add_flush: Queue up a TLB invalidation for a virtual address range.
* @tlb_sync: Ensure any queued TLB invalidation has taken effect, and
* any corresponding page table updates are visible to the
* IOMMU.
* @tlb_flush_all: Synchronously invalidate the entire TLB context.
* @tlb_flush_walk: Synchronously invalidate all intermediate TLB state
* (sometimes referred to as the "walk cache") for a virtual
* address range.
* @tlb_flush_leaf: Synchronously invalidate all leaf TLB state for a virtual
* address range.
* @tlb_add_page: Optional callback to queue up leaf TLB invalidation for a
* single page. IOMMUs that cannot batch TLB invalidation
* operations efficiently will typically issue them here, but
* others may decide to update the iommu_iotlb_gather structure
* and defer the invalidation until iommu_tlb_sync() instead.
*
* Note that these can all be called in atomic context and must therefore
* not block.
*/
struct iommu_gather_ops {
struct iommu_flush_ops {
void (*tlb_flush_all)(void *cookie);
void (*tlb_add_flush)(unsigned long iova, size_t size, size_t granule,
bool leaf, void *cookie);
void (*tlb_sync)(void *cookie);
void (*tlb_flush_walk)(unsigned long iova, size_t size, size_t granule,
void *cookie);
void (*tlb_flush_leaf)(unsigned long iova, size_t size, size_t granule,
void *cookie);
void (*tlb_add_page)(struct iommu_iotlb_gather *gather,
unsigned long iova, size_t granule, void *cookie);
};
/**
......@@ -84,7 +95,7 @@ struct io_pgtable_cfg {
unsigned int ias;
unsigned int oas;
bool coherent_walk;
const struct iommu_gather_ops *tlb;
const struct iommu_flush_ops *tlb;
struct device *iommu_dev;
/* Low-level data specific to the table format */
......@@ -128,7 +139,7 @@ struct io_pgtable_ops {
int (*map)(struct io_pgtable_ops *ops, unsigned long iova,
phys_addr_t paddr, size_t size, int prot);
size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova,
size_t size);
size_t size, struct iommu_iotlb_gather *gather);
phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops,
unsigned long iova);
};
......@@ -184,15 +195,27 @@ static inline void io_pgtable_tlb_flush_all(struct io_pgtable *iop)
iop->cfg.tlb->tlb_flush_all(iop->cookie);
}
static inline void io_pgtable_tlb_add_flush(struct io_pgtable *iop,
unsigned long iova, size_t size, size_t granule, bool leaf)
static inline void
io_pgtable_tlb_flush_walk(struct io_pgtable *iop, unsigned long iova,
size_t size, size_t granule)
{
iop->cfg.tlb->tlb_flush_walk(iova, size, granule, iop->cookie);
}
static inline void
io_pgtable_tlb_flush_leaf(struct io_pgtable *iop, unsigned long iova,
size_t size, size_t granule)
{
iop->cfg.tlb->tlb_add_flush(iova, size, granule, leaf, iop->cookie);
iop->cfg.tlb->tlb_flush_leaf(iova, size, granule, iop->cookie);
}
static inline void io_pgtable_tlb_sync(struct io_pgtable *iop)
static inline void
io_pgtable_tlb_add_page(struct io_pgtable *iop,
struct iommu_iotlb_gather * gather, unsigned long iova,
size_t granule)
{
iop->cfg.tlb->tlb_sync(iop->cookie);
if (iop->cfg.tlb->tlb_add_page)
iop->cfg.tlb->tlb_add_page(gather, iova, granule, iop->cookie);
}
/**
......
......@@ -191,6 +191,23 @@ struct iommu_sva_ops {
#ifdef CONFIG_IOMMU_API
/**
* struct iommu_iotlb_gather - Range information for a pending IOTLB flush
*
* @start: IOVA representing the start of the range to be flushed
* @end: IOVA representing the end of the range to be flushed (exclusive)
* @pgsize: The interval at which to perform the flush
*
* This structure is intended to be updated by multiple calls to the
* ->unmap() function in struct iommu_ops before eventually being passed
* into ->iotlb_sync().
*/
struct iommu_iotlb_gather {
unsigned long start;
unsigned long end;
size_t pgsize;
};
/**
* struct iommu_ops - iommu ops and capabilities
* @capable: check capability
......@@ -201,7 +218,6 @@ struct iommu_sva_ops {
* @map: map a physically contiguous memory region to an iommu domain
* @unmap: unmap a physically contiguous memory region from an iommu domain
* @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain
* @iotlb_range_add: Add a given iova range to the flush queue for this domain
* @iotlb_sync_map: Sync mappings created recently using @map to the hardware
* @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush
* queue
......@@ -242,12 +258,11 @@ struct iommu_ops {
int (*map)(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot);
size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
size_t size);
size_t size, struct iommu_iotlb_gather *iotlb_gather);
void (*flush_iotlb_all)(struct iommu_domain *domain);
void (*iotlb_range_add)(struct iommu_domain *domain,
unsigned long iova, size_t size);
void (*iotlb_sync_map)(struct iommu_domain *domain);
void (*iotlb_sync)(struct iommu_domain *domain);
void (*iotlb_sync)(struct iommu_domain *domain,
struct iommu_iotlb_gather *iotlb_gather);
phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova);
int (*add_device)(struct device *dev);
void (*remove_device)(struct device *dev);
......@@ -378,6 +393,13 @@ static inline struct iommu_device *dev_to_iommu_device(struct device *dev)
return (struct iommu_device *)dev_get_drvdata(dev);
}
static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather)
{
*gather = (struct iommu_iotlb_gather) {
.start = ULONG_MAX,
};
}
#define IOMMU_GROUP_NOTIFY_ADD_DEVICE 1 /* Device added */
#define IOMMU_GROUP_NOTIFY_DEL_DEVICE 2 /* Pre Device removed */
#define IOMMU_GROUP_NOTIFY_BIND_DRIVER 3 /* Pre Driver bind */
......@@ -402,7 +424,8 @@ extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
size_t size);
extern size_t iommu_unmap_fast(struct iommu_domain *domain,
unsigned long iova, size_t size);
unsigned long iova, size_t size,
struct iommu_iotlb_gather *iotlb_gather);
extern size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
struct scatterlist *sg,unsigned int nents, int prot);
extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova);
......@@ -476,17 +499,38 @@ static inline void iommu_flush_tlb_all(struct iommu_domain *domain)
domain->ops->flush_iotlb_all(domain);
}
static inline void iommu_tlb_range_add(struct iommu_domain *domain,
unsigned long iova, size_t size)
static inline void iommu_tlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *iotlb_gather)
{
if (domain->ops->iotlb_range_add)
domain->ops->iotlb_range_add(domain, iova, size);
if (domain->ops->iotlb_sync)
domain->ops->iotlb_sync(domain, iotlb_gather);
iommu_iotlb_gather_init(iotlb_gather);
}
static inline void iommu_tlb_sync(struct iommu_domain *domain)
static inline void iommu_iotlb_gather_add_page(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather,
unsigned long iova, size_t size)
{
if (domain->ops->iotlb_sync)
domain->ops->iotlb_sync(domain);
unsigned long start = iova, end = start + size;
/*
* If the new page is disjoint from the current range or is mapped at
* a different granularity, then sync the TLB so that the gather
* structure can be rewritten.
*/
if (gather->pgsize != size ||
end < gather->start || start > gather->end) {
if (gather->pgsize)
iommu_tlb_sync(domain, gather);
gather->pgsize = size;
}
if (gather->end < end)
gather->end = end;
if (gather->start > start)
gather->start = start;
}
/* PCI device grouping function */
......@@ -567,6 +611,7 @@ struct iommu_group {};
struct iommu_fwspec {};
struct iommu_device {};
struct iommu_fault_param {};
struct iommu_iotlb_gather {};
static inline bool iommu_present(struct bus_type *bus)
{
......@@ -621,7 +666,8 @@ static inline size_t iommu_unmap(struct iommu_domain *domain,
}
static inline size_t iommu_unmap_fast(struct iommu_domain *domain,
unsigned long iova, int gfp_order)
unsigned long iova, int gfp_order,
struct iommu_iotlb_gather *iotlb_gather)
{
return 0;
}
......@@ -637,12 +683,8 @@ static inline void iommu_flush_tlb_all(struct iommu_domain *domain)
{
}
static inline void iommu_tlb_range_add(struct iommu_domain *domain,
unsigned long iova, size_t size)
{
}
static inline void iommu_tlb_sync(struct iommu_domain *domain)
static inline void iommu_tlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *iotlb_gather)
{
}
......@@ -827,6 +869,16 @@ static inline struct iommu_device *dev_to_iommu_device(struct device *dev)
return NULL;
}
static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather)
{
}
static inline void iommu_iotlb_gather_add_page(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather,
unsigned long iova, size_t size)
{
}
static inline void iommu_device_unregister(struct iommu_device *iommu)
{
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment