Commit 0cc6f45c authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'iommu-updates-v6.10' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu

Pull iommu updates from Joerg Roedel:
 "Core:
   - IOMMU memory usage observability - This will make the memory used
     for IO page tables explicitly visible.
   - Simplify arch_setup_dma_ops()

  Intel VT-d:
   - Consolidate domain cache invalidation
   - Remove private data from page fault message
   - Allocate DMAR fault interrupts locally
   - Cleanup and refactoring

  ARM-SMMUv2:
   - Support for fault debugging hardware on Qualcomm implementations
   - Re-land support for the ->domain_alloc_paging() callback

  ARM-SMMUv3:
   - Improve handling of MSI allocation failure
   - Drop support for the "disable_bypass" cmdline option
   - Major rework of the CD creation code, following on directly from
     the STE rework merged last time around.
   - Add unit tests for the new STE/CD manipulation logic

  AMD-Vi:
   - Final part of SVA changes with generic IO page fault handling

  Renesas IPMMU:
   - Add support for R8A779H0 hardware

  ... and a couple smaller fixes and updates across the sub-tree"

* tag 'iommu-updates-v6.10' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (80 commits)
  iommu/arm-smmu-v3: Make the kunit into a module
  arm64: Properly clean up iommu-dma remnants
  iommu/amd: Enable Guest Translation after reading IOMMU feature register
  iommu/vt-d: Decouple igfx_off from graphic identity mapping
  iommu/amd: Fix compilation error
  iommu/arm-smmu-v3: Add unit tests for arm_smmu_write_entry
  iommu/arm-smmu-v3: Build the whole CD in arm_smmu_make_s1_cd()
  iommu/arm-smmu-v3: Move the CD generation for SVA into a function
  iommu/arm-smmu-v3: Allocate the CD table entry in advance
  iommu/arm-smmu-v3: Make arm_smmu_alloc_cd_ptr()
  iommu/arm-smmu-v3: Consolidate clearing a CD table entry
  iommu/arm-smmu-v3: Move the CD generation for S1 domains into a function
  iommu/arm-smmu-v3: Make CD programming use arm_smmu_write_entry()
  iommu/arm-smmu-v3: Add an ops indirection to the STE code
  iommu/arm-smmu-qcom: Don't build debug features as a kernel module
  iommu/amd: Add SVA domain support
  iommu: Add ops->domain_alloc_sva()
  iommu/amd: Initial SVA support for AMD IOMMU
  iommu/amd: Add support for enable/disable IOPF
  iommu/amd: Add IO page fault notifier handler
  ...
parents f0cd69b8 2bd5059c
......@@ -1435,7 +1435,7 @@ PAGE_SIZE multiple when read back.
sec_pagetables
Amount of memory allocated for secondary page tables,
this currently includes KVM mmu allocations on x86
and arm64.
and arm64 and IOMMU page tables.
percpu (npn)
Amount of memory used for storing per-cpu kernel
......
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/iommu/qcom,tbu.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Qualcomm TBU (Translation Buffer Unit)
maintainers:
- Georgi Djakov <quic_c_gdjako@quicinc.com>
description:
The Qualcomm SMMU500 implementation consists of TCU and TBU. The TBU contains
a Translation Lookaside Buffer (TLB) that caches page tables. TBUs provides
debug features to trace and trigger debug transactions. There are multiple TBU
instances with each client core.
properties:
compatible:
enum:
- qcom,sc7280-tbu
- qcom,sdm845-tbu
reg:
maxItems: 1
clocks:
maxItems: 1
interconnects:
maxItems: 1
power-domains:
maxItems: 1
qcom,stream-id-range:
description: |
Phandle of a SMMU device and Stream ID range (address and size) that
is assigned by the TBU
$ref: /schemas/types.yaml#/definitions/phandle-array
items:
- items:
- description: phandle of a smmu node
- description: stream id base address
- description: stream id size
required:
- compatible
- reg
- qcom,stream-id-range
additionalProperties: false
examples:
- |
#include <dt-bindings/clock/qcom,gcc-sdm845.h>
#include <dt-bindings/interconnect/qcom,icc.h>
#include <dt-bindings/interconnect/qcom,sdm845.h>
tbu@150e1000 {
compatible = "qcom,sdm845-tbu";
reg = <0x150e1000 0x1000>;
clocks = <&gcc GCC_AGGRE_NOC_PCIE_TBU_CLK>;
interconnects = <&system_noc MASTER_GNOC_SNOC QCOM_ICC_TAG_ACTIVE_ONLY
&config_noc SLAVE_IMEM_CFG QCOM_ICC_TAG_ACTIVE_ONLY>;
power-domains = <&gcc HLOS1_VOTE_AGGRE_NOC_MMU_PCIE_TBU_GDSC>;
qcom,stream-id-range = <&apps_smmu 0x1c00 0x400>;
};
...
......@@ -50,6 +50,7 @@ properties:
- renesas,ipmmu-r8a779a0 # R-Car V3U
- renesas,ipmmu-r8a779f0 # R-Car S4-8
- renesas,ipmmu-r8a779g0 # R-Car V4H
- renesas,ipmmu-r8a779h0 # R-Car V4M
- const: renesas,rcar-gen4-ipmmu-vmsa # R-Car Gen4
reg:
......
......@@ -1110,8 +1110,8 @@ KernelStack
PageTables
Memory consumed by userspace page tables
SecPageTables
Memory consumed by secondary page tables, this currently
currently includes KVM mmu allocations on x86 and arm64.
Memory consumed by secondary page tables, this currently includes
KVM mmu and IOMMU allocations on x86 and arm64.
NFS_Unstable
Always zero. Previous counted pages which had been written to
the server, but has not been committed to stable storage.
......
......@@ -90,8 +90,7 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
/*
* Plug in direct dma map ops.
*/
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
bool coherent)
void arch_setup_dma_ops(struct device *dev, bool coherent)
{
/*
* IOC hardware snoops all DMA traffic keeping the caches consistent
......
......@@ -33,8 +33,7 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
}
}
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
bool coherent)
void arch_setup_dma_ops(struct device *dev, bool coherent)
{
if (IS_ENABLED(CONFIG_CPU_V7M)) {
/*
......
......@@ -1709,11 +1709,15 @@ void arm_iommu_detach_device(struct device *dev)
}
EXPORT_SYMBOL_GPL(arm_iommu_detach_device);
static void arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size,
bool coherent)
static void arm_setup_iommu_dma_ops(struct device *dev)
{
struct dma_iommu_mapping *mapping;
u64 dma_base = 0, size = 1ULL << 32;
if (dev->dma_range_map) {
dma_base = dma_range_map_min(dev->dma_range_map);
size = dma_range_map_max(dev->dma_range_map) - dma_base;
}
mapping = arm_iommu_create_mapping(dev->bus, dma_base, size);
if (IS_ERR(mapping)) {
pr_warn("Failed to create %llu-byte IOMMU mapping for device %s\n",
......@@ -1744,8 +1748,7 @@ static void arm_teardown_iommu_dma_ops(struct device *dev)
#else
static void arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size,
bool coherent)
static void arm_setup_iommu_dma_ops(struct device *dev)
{
}
......@@ -1753,8 +1756,7 @@ static void arm_teardown_iommu_dma_ops(struct device *dev) { }
#endif /* CONFIG_ARM_DMA_USE_IOMMU */
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
bool coherent)
void arch_setup_dma_ops(struct device *dev, bool coherent)
{
/*
* Due to legacy code that sets the ->dma_coherent flag from a bus
......@@ -1774,7 +1776,7 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
return;
if (device_iommu_mapped(dev))
arm_setup_iommu_dma_ops(dev, dma_base, size, coherent);
arm_setup_iommu_dma_ops(dev);
xen_setup_dma_ops(dev);
dev->archdata.dma_ops_setup = true;
......
......@@ -46,7 +46,6 @@ config ARM64
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_HAS_SYNC_DMA_FOR_CPU
select ARCH_HAS_SYSCALL_WRAPPER
select ARCH_HAS_TEARDOWN_DMA_OPS if IOMMU_SUPPORT
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_ZONE_DMA_SET if EXPERT
select ARCH_HAVE_ELF_PROT
......
......@@ -7,7 +7,6 @@
#include <linux/gfp.h>
#include <linux/cache.h>
#include <linux/dma-map-ops.h>
#include <linux/iommu.h>
#include <xen/xen.h>
#include <asm/cacheflush.h>
......@@ -39,15 +38,7 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
dcache_clean_poc(start, start + size);
}
#ifdef CONFIG_IOMMU_DMA
void arch_teardown_dma_ops(struct device *dev)
{
dev->dma_ops = NULL;
}
#endif
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
bool coherent)
void arch_setup_dma_ops(struct device *dev, bool coherent)
{
int cls = cache_line_size_of_cpu();
......@@ -58,8 +49,6 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
ARCH_DMA_MINALIGN, cls);
dev->dma_coherent = coherent;
if (device_iommu_mapped(dev))
iommu_setup_dma_ops(dev, dma_base, dma_base + size - 1);
xen_setup_dma_ops(dev);
}
......@@ -8,17 +8,12 @@
void acpi_arch_dma_setup(struct device *dev)
{
int ret;
u64 mask, end = 0;
u64 mask, end;
const struct bus_dma_region *map = NULL;
ret = acpi_dma_get_range(dev, &map);
if (!ret && map) {
const struct bus_dma_region *r = map;
for (end = 0; r->size; r++) {
if (r->dma_start + r->size - 1 > end)
end = r->dma_start + r->size - 1;
}
end = dma_range_map_max(map);
mask = DMA_BIT_MASK(ilog2(end) + 1);
dev->bus_dma_limit = end;
......
......@@ -137,8 +137,7 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
#endif
#ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
bool coherent)
void arch_setup_dma_ops(struct device *dev, bool coherent)
{
dev->dma_coherent = coherent;
}
......
......@@ -128,8 +128,7 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
ALT_CMO_OP(FLUSH, flush_addr, size, riscv_cbom_block_size);
}
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
bool coherent)
void arch_setup_dma_ops(struct device *dev, bool coherent)
{
WARN_TAINT(!coherent && riscv_cbom_block_size > ARCH_DMA_MINALIGN,
TAINT_CPU_OUT_OF_SPEC,
......
......@@ -8,7 +8,6 @@ void acpi_arch_dma_setup(struct device *dev)
{
int ret;
u64 end, mask;
u64 size = 0;
const struct bus_dma_region *map = NULL;
/*
......@@ -23,31 +22,23 @@ void acpi_arch_dma_setup(struct device *dev)
}
if (dev->coherent_dma_mask)
size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1);
end = dev->coherent_dma_mask;
else
size = 1ULL << 32;
end = (1ULL << 32) - 1;
ret = acpi_dma_get_range(dev, &map);
if (!ret && map) {
const struct bus_dma_region *r = map;
for (end = 0; r->size; r++) {
if (r->dma_start + r->size - 1 > end)
end = r->dma_start + r->size - 1;
}
size = end + 1;
end = dma_range_map_max(map);
dev->dma_range_map = map;
}
if (ret == -ENODEV)
ret = iort_dma_get_ranges(dev, &size);
ret = iort_dma_get_ranges(dev, &end);
if (!ret) {
/*
* Limit coherent and dma mask based on size retrieved from
* firmware.
*/
end = size - 1;
mask = DMA_BIT_MASK(ilog2(end) + 1);
dev->bus_dma_limit = end;
dev->coherent_dma_mask = min(dev->coherent_dma_mask, mask);
......
......@@ -1367,7 +1367,7 @@ int iort_iommu_configure_id(struct device *dev, const u32 *input_id)
{ return -ENODEV; }
#endif
static int nc_dma_get_range(struct device *dev, u64 *size)
static int nc_dma_get_range(struct device *dev, u64 *limit)
{
struct acpi_iort_node *node;
struct acpi_iort_named_component *ncomp;
......@@ -1384,13 +1384,13 @@ static int nc_dma_get_range(struct device *dev, u64 *size)
return -EINVAL;
}
*size = ncomp->memory_address_limit >= 64 ? U64_MAX :
1ULL<<ncomp->memory_address_limit;
*limit = ncomp->memory_address_limit >= 64 ? U64_MAX :
(1ULL << ncomp->memory_address_limit) - 1;
return 0;
}
static int rc_dma_get_range(struct device *dev, u64 *size)
static int rc_dma_get_range(struct device *dev, u64 *limit)
{
struct acpi_iort_node *node;
struct acpi_iort_root_complex *rc;
......@@ -1408,8 +1408,8 @@ static int rc_dma_get_range(struct device *dev, u64 *size)
return -EINVAL;
}
*size = rc->memory_address_limit >= 64 ? U64_MAX :
1ULL<<rc->memory_address_limit;
*limit = rc->memory_address_limit >= 64 ? U64_MAX :
(1ULL << rc->memory_address_limit) - 1;
return 0;
}
......@@ -1417,16 +1417,16 @@ static int rc_dma_get_range(struct device *dev, u64 *size)
/**
* iort_dma_get_ranges() - Look up DMA addressing limit for the device
* @dev: device to lookup
* @size: DMA range size result pointer
* @limit: DMA limit result pointer
*
* Return: 0 on success, an error otherwise.
*/
int iort_dma_get_ranges(struct device *dev, u64 *size)
int iort_dma_get_ranges(struct device *dev, u64 *limit)
{
if (dev_is_pci(dev))
return rc_dma_get_range(dev, size);
return rc_dma_get_range(dev, limit);
else
return nc_dma_get_range(dev, size);
return nc_dma_get_range(dev, limit);
}
static void __init acpi_iort_register_irq(int hwirq, const char *name,
......
......@@ -1675,12 +1675,7 @@ int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr,
if (ret == -EPROBE_DEFER)
return -EPROBE_DEFER;
/*
* Historically this routine doesn't fail driver probing due to errors
* in acpi_iommu_configure_id().
*/
arch_setup_dma_ops(dev, 0, U64_MAX, attr == DEV_DMA_COHERENT);
arch_setup_dma_ops(dev, attr == DEV_DMA_COHERENT);
return 0;
}
......
......@@ -561,11 +561,7 @@ EXPORT_SYMBOL_GPL(hv_query_ext_cap);
void hv_setup_dma_ops(struct device *dev, bool coherent)
{
/*
* Hyper-V does not offer a vIOMMU in the guest
* VM, so pass 0/NULL for the IOMMU settings
*/
arch_setup_dma_ops(dev, 0, 0, coherent);
arch_setup_dma_ops(dev, coherent);
}
EXPORT_SYMBOL_GPL(hv_setup_dma_ops);
......
......@@ -376,13 +376,17 @@ config ARM_SMMU_QCOM
config ARM_SMMU_QCOM_DEBUG
bool "ARM SMMU QCOM implementation defined debug support"
depends on ARM_SMMU_QCOM
depends on ARM_SMMU_QCOM=y
help
Support for implementation specific debug features in ARM SMMU
hardware found in QTI platforms.
hardware found in QTI platforms. This include support for
the Translation Buffer Units (TBU) that can be used to obtain
additional information when debugging memory management issues
like context faults.
Say Y here to enable debug for issues such as TLB sync timeouts
which requires implementation defined register dumps.
Say Y here to enable debug for issues such as context faults
or TLB sync timeouts which requires implementation defined
register dumps.
config ARM_SMMU_V3
tristate "ARM Ltd. System MMU Version 3 (SMMUv3) Support"
......@@ -397,9 +401,9 @@ config ARM_SMMU_V3
Say Y here if your system includes an IOMMU device implementing
the ARM SMMUv3 architecture.
if ARM_SMMU_V3
config ARM_SMMU_V3_SVA
bool "Shared Virtual Addressing support for the ARM SMMUv3"
depends on ARM_SMMU_V3
select IOMMU_SVA
select IOMMU_IOPF
select MMU_NOTIFIER
......@@ -410,6 +414,17 @@ config ARM_SMMU_V3_SVA
Say Y here if your system supports SVA extensions such as PCIe PASID
and PRI.
config ARM_SMMU_V3_KUNIT_TEST
tristate "KUnit tests for arm-smmu-v3 driver" if !KUNIT_ALL_TESTS
depends on KUNIT
depends on ARM_SMMU_V3_SVA
default KUNIT_ALL_TESTS
help
Enable this option to unit-test arm-smmu-v3 driver functions.
If unsure, say N.
endif
config S390_IOMMU
def_bool y if S390 && PCI
depends on S390 && PCI
......
......@@ -7,9 +7,12 @@ config AMD_IOMMU
select PCI_ATS
select PCI_PRI
select PCI_PASID
select MMU_NOTIFIER
select IOMMU_API
select IOMMU_IOVA
select IOMMU_IO_PGTABLE
select IOMMU_SVA
select IOMMU_IOPF
select IOMMUFD_DRIVER if IOMMUFD
depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE
help
......
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o
obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o ppr.o pasid.o
obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o
......@@ -17,10 +17,16 @@ irqreturn_t amd_iommu_int_thread_pprlog(int irq, void *data);
irqreturn_t amd_iommu_int_thread_galog(int irq, void *data);
irqreturn_t amd_iommu_int_handler(int irq, void *data);
void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid);
void amd_iommu_restart_log(struct amd_iommu *iommu, const char *evt_type,
u8 cntrl_intr, u8 cntrl_log,
u32 status_run_mask, u32 status_overflow_mask);
void amd_iommu_restart_event_logging(struct amd_iommu *iommu);
void amd_iommu_restart_ga_log(struct amd_iommu *iommu);
void amd_iommu_restart_ppr_log(struct amd_iommu *iommu);
void amd_iommu_set_rlookup_table(struct amd_iommu *iommu, u16 devid);
void iommu_feature_enable(struct amd_iommu *iommu, u8 bit);
void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu,
gfp_t gfp, size_t size);
#ifdef CONFIG_AMD_IOMMU_DEBUGFS
void amd_iommu_debugfs_setup(struct amd_iommu *iommu);
......@@ -33,22 +39,47 @@ int amd_iommu_prepare(void);
int amd_iommu_enable(void);
void amd_iommu_disable(void);
int amd_iommu_reenable(int mode);
int amd_iommu_enable_faulting(void);
int amd_iommu_enable_faulting(unsigned int cpu);
extern int amd_iommu_guest_ir;
extern enum io_pgtable_fmt amd_iommu_pgtable;
extern int amd_iommu_gpt_level;
bool amd_iommu_v2_supported(void);
/* Device capabilities */
int amd_iommu_pdev_enable_cap_pri(struct pci_dev *pdev);
void amd_iommu_pdev_disable_cap_pri(struct pci_dev *pdev);
/* Protection domain ops */
struct protection_domain *protection_domain_alloc(unsigned int type);
void protection_domain_free(struct protection_domain *domain);
struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev,
struct mm_struct *mm);
void amd_iommu_domain_free(struct iommu_domain *dom);
int iommu_sva_set_dev_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid);
void amd_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
struct iommu_domain *domain);
/* SVA/PASID */
bool amd_iommu_pasid_supported(void);
/* IOPF */
int amd_iommu_iopf_init(struct amd_iommu *iommu);
void amd_iommu_iopf_uninit(struct amd_iommu *iommu);
void amd_iommu_page_response(struct device *dev, struct iopf_fault *evt,
struct iommu_page_response *resp);
int amd_iommu_iopf_add_device(struct amd_iommu *iommu,
struct iommu_dev_data *dev_data);
void amd_iommu_iopf_remove_device(struct amd_iommu *iommu,
struct iommu_dev_data *dev_data);
/* GCR3 setup */
int amd_iommu_set_gcr3(struct iommu_dev_data *dev_data,
ioasid_t pasid, unsigned long gcr3);
int amd_iommu_clear_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid);
/* PPR */
int __init amd_iommu_alloc_ppr_log(struct amd_iommu *iommu);
void __init amd_iommu_free_ppr_log(struct amd_iommu *iommu);
void amd_iommu_enable_ppr_log(struct amd_iommu *iommu);
void amd_iommu_poll_ppr_log(struct amd_iommu *iommu);
int amd_iommu_complete_ppr(struct device *dev, u32 pasid, int status, int tag);
/*
* This function flushes all internal caches of
* the IOMMU used by this driver.
......@@ -56,6 +87,7 @@ int amd_iommu_clear_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid);
void amd_iommu_flush_all_caches(struct amd_iommu *iommu);
void amd_iommu_update_and_flush_device_table(struct protection_domain *domain);
void amd_iommu_domain_update(struct protection_domain *domain);
void amd_iommu_dev_update_dte(struct iommu_dev_data *dev_data, bool set);
void amd_iommu_domain_flush_complete(struct protection_domain *domain);
void amd_iommu_domain_flush_pages(struct protection_domain *domain,
u64 address, size_t size);
......@@ -73,9 +105,6 @@ static inline int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
}
#endif
int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid,
int status, int tag);
static inline bool is_rd890_iommu(struct pci_dev *pdev)
{
return (pdev->vendor == PCI_VENDOR_ID_ATI) &&
......@@ -134,14 +163,6 @@ static inline int get_pci_sbdf_id(struct pci_dev *pdev)
return PCI_SEG_DEVID_TO_SBDF(seg, devid);
}
static inline void *alloc_pgtable_page(int nid, gfp_t gfp)
{
struct page *page;
page = alloc_pages_node(nid, gfp | __GFP_ZERO, 0);
return page ? page_address(page) : NULL;
}
/*
* This must be called after device probe completes. During probe
* use rlookup_amd_iommu() get the iommu.
......@@ -157,6 +178,11 @@ static inline struct amd_iommu *get_amd_iommu_from_dev_data(struct iommu_dev_dat
return iommu_get_iommu_dev(dev_data->dev, struct amd_iommu, iommu);
}
static inline struct protection_domain *to_pdomain(struct iommu_domain *dom)
{
return container_of(dom, struct protection_domain, domain);
}
bool translation_pre_enabled(struct amd_iommu *iommu);
bool amd_iommu_is_attach_deferred(struct device *dev);
int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line);
......
......@@ -8,7 +8,9 @@
#ifndef _ASM_X86_AMD_IOMMU_TYPES_H
#define _ASM_X86_AMD_IOMMU_TYPES_H
#include <linux/iommu.h>
#include <linux/types.h>
#include <linux/mmu_notifier.h>
#include <linux/mutex.h>
#include <linux/msi.h>
#include <linux/list.h>
......@@ -251,6 +253,14 @@
#define PPR_ENTRY_SIZE 16
#define PPR_LOG_SIZE (PPR_ENTRY_SIZE * PPR_LOG_ENTRIES)
/* PAGE_SERVICE_REQUEST PPR Log Buffer Entry flags */
#define PPR_FLAG_EXEC 0x002 /* Execute permission requested */
#define PPR_FLAG_READ 0x004 /* Read permission requested */
#define PPR_FLAG_WRITE 0x020 /* Write permission requested */
#define PPR_FLAG_US 0x040 /* 1: User, 0: Supervisor */
#define PPR_FLAG_RVSD 0x080 /* Reserved bit not zero */
#define PPR_FLAG_GN 0x100 /* GVA and PASID is valid */
#define PPR_REQ_TYPE(x) (((x) >> 60) & 0xfULL)
#define PPR_FLAGS(x) (((x) >> 48) & 0xfffULL)
#define PPR_DEVID(x) ((x) & 0xffffULL)
......@@ -503,6 +513,11 @@ extern struct kmem_cache *amd_iommu_irq_cache;
list_for_each_entry((iommu), &amd_iommu_list, list)
#define for_each_iommu_safe(iommu, next) \
list_for_each_entry_safe((iommu), (next), &amd_iommu_list, list)
/* Making iterating over protection_domain->dev_data_list easier */
#define for_each_pdom_dev_data(pdom_dev_data, pdom) \
list_for_each_entry(pdom_dev_data, &pdom->dev_data_list, list)
#define for_each_pdom_dev_data_safe(pdom_dev_data, next, pdom) \
list_for_each_entry_safe((pdom_dev_data), (next), &pdom->dev_data_list, list)
struct amd_iommu;
struct iommu_domain;
......@@ -544,6 +559,16 @@ enum protection_domain_mode {
PD_MODE_V2,
};
/* Track dev_data/PASID list for the protection domain */
struct pdom_dev_data {
/* Points to attached device data */
struct iommu_dev_data *dev_data;
/* PASID attached to the protection domain */
ioasid_t pasid;
/* For protection_domain->dev_data_list */
struct list_head list;
};
/*
* This structure contains generic data for IOMMU protection domains
* independent of their use.
......@@ -560,6 +585,9 @@ struct protection_domain {
bool dirty_tracking; /* dirty tracking is enabled in the domain */
unsigned dev_cnt; /* devices assigned to this domain */
unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
struct mmu_notifier mn; /* mmu notifier for the SVA domain */
struct list_head dev_data_list; /* List of pdom_dev_data */
};
/*
......@@ -762,6 +790,10 @@ struct amd_iommu {
/* DebugFS Info */
struct dentry *debugfs;
#endif
/* IOPF support */
struct iopf_queue *iopf_queue;
unsigned char iopfq_name[32];
};
static inline struct amd_iommu *dev_to_amd_iommu(struct device *dev)
......@@ -813,6 +845,7 @@ struct iommu_dev_data {
struct device *dev;
u16 devid; /* PCI Device ID */
u32 max_pasids; /* Max supported PASIDs */
u32 flags; /* Holds AMD_IOMMU_DEVICE_FLAG_<*> */
int ats_qdep;
u8 ats_enabled :1; /* ATS state */
......
This diff is collapsed.
......@@ -22,6 +22,7 @@
#include "amd_iommu_types.h"
#include "amd_iommu.h"
#include "../iommu-pages.h"
static void v1_tlb_flush_all(void *cookie)
{
......@@ -156,7 +157,7 @@ static bool increase_address_space(struct protection_domain *domain,
bool ret = true;
u64 *pte;
pte = alloc_pgtable_page(domain->nid, gfp);
pte = iommu_alloc_page_node(domain->nid, gfp);
if (!pte)
return false;
......@@ -187,7 +188,7 @@ static bool increase_address_space(struct protection_domain *domain,
out:
spin_unlock_irqrestore(&domain->lock, flags);
free_page((unsigned long)pte);
iommu_free_page(pte);
return ret;
}
......@@ -250,7 +251,7 @@ static u64 *alloc_pte(struct protection_domain *domain,
if (!IOMMU_PTE_PRESENT(__pte) ||
pte_level == PAGE_MODE_NONE) {
page = alloc_pgtable_page(domain->nid, gfp);
page = iommu_alloc_page_node(domain->nid, gfp);
if (!page)
return NULL;
......@@ -259,7 +260,7 @@ static u64 *alloc_pte(struct protection_domain *domain,
/* pte could have been changed somewhere. */
if (!try_cmpxchg64(pte, &__pte, __npte))
free_page((unsigned long)page);
iommu_free_page(page);
else if (IOMMU_PTE_PRESENT(__pte))
*updated = true;
......@@ -431,7 +432,7 @@ static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
}
/* Everything flushed out, free pages now */
put_pages_list(&freelist);
iommu_put_pages_list(&freelist);
return ret;
}
......@@ -580,7 +581,7 @@ static void v1_free_pgtable(struct io_pgtable *iop)
/* Make changes visible to IOMMUs */
amd_iommu_domain_update(dom);
put_pages_list(&freelist);
iommu_put_pages_list(&freelist);
}
static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
......
......@@ -18,6 +18,7 @@
#include "amd_iommu_types.h"
#include "amd_iommu.h"
#include "../iommu-pages.h"
#define IOMMU_PAGE_PRESENT BIT_ULL(0) /* Is present */
#define IOMMU_PAGE_RW BIT_ULL(1) /* Writeable */
......@@ -99,11 +100,6 @@ static inline int page_size_to_level(u64 pg_size)
return PAGE_MODE_1_LEVEL;
}
static inline void free_pgtable_page(u64 *pt)
{
free_page((unsigned long)pt);
}
static void free_pgtable(u64 *pt, int level)
{
u64 *p;
......@@ -125,10 +121,10 @@ static void free_pgtable(u64 *pt, int level)
if (level > 2)
free_pgtable(p, level - 1);
else
free_pgtable_page(p);
iommu_free_page(p);
}
free_pgtable_page(pt);
iommu_free_page(pt);
}
/* Allocate page table */
......@@ -156,14 +152,14 @@ static u64 *v2_alloc_pte(int nid, u64 *pgd, unsigned long iova,
}
if (!IOMMU_PTE_PRESENT(__pte)) {
page = alloc_pgtable_page(nid, gfp);
page = iommu_alloc_page_node(nid, gfp);
if (!page)
return NULL;
__npte = set_pgtable_attr(page);
/* pte could have been changed somewhere. */
if (cmpxchg64(pte, __pte, __npte) != __pte)
free_pgtable_page(page);
iommu_free_page(page);
else if (IOMMU_PTE_PRESENT(__pte))
*updated = true;
......@@ -185,7 +181,7 @@ static u64 *v2_alloc_pte(int nid, u64 *pgd, unsigned long iova,
if (pg_size == IOMMU_PAGE_SIZE_1G)
free_pgtable(__pte, end_level - 1);
else if (pg_size == IOMMU_PAGE_SIZE_2M)
free_pgtable_page(__pte);
iommu_free_page(__pte);
}
return pte;
......@@ -366,7 +362,7 @@ static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo
struct protection_domain *pdom = (struct protection_domain *)cookie;
int ias = IOMMU_IN_ADDR_BIT_SIZE;
pgtable->pgd = alloc_pgtable_page(pdom->nid, GFP_ATOMIC);
pgtable->pgd = iommu_alloc_page_node(pdom->nid, GFP_ATOMIC);
if (!pgtable->pgd)
return NULL;
......
This diff is collapsed.
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2024 Advanced Micro Devices, Inc.
*/
#define pr_fmt(fmt) "AMD-Vi: " fmt
#define dev_fmt(fmt) pr_fmt(fmt)
#include <linux/iommu.h>
#include <linux/mm_types.h>
#include "amd_iommu.h"
static inline bool is_pasid_enabled(struct iommu_dev_data *dev_data)
{
if (dev_data->pasid_enabled && dev_data->max_pasids &&
dev_data->gcr3_info.gcr3_tbl != NULL)
return true;
return false;
}
static inline bool is_pasid_valid(struct iommu_dev_data *dev_data,
ioasid_t pasid)
{
if (pasid > 0 && pasid < dev_data->max_pasids)
return true;
return false;
}
static void remove_dev_pasid(struct pdom_dev_data *pdom_dev_data)
{
/* Update GCR3 table and flush IOTLB */
amd_iommu_clear_gcr3(pdom_dev_data->dev_data, pdom_dev_data->pasid);
list_del(&pdom_dev_data->list);
kfree(pdom_dev_data);
}
/* Clear PASID from device GCR3 table and remove pdom_dev_data from list */
static void remove_pdom_dev_pasid(struct protection_domain *pdom,
struct device *dev, ioasid_t pasid)
{
struct pdom_dev_data *pdom_dev_data;
struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
lockdep_assert_held(&pdom->lock);
for_each_pdom_dev_data(pdom_dev_data, pdom) {
if (pdom_dev_data->dev_data == dev_data &&
pdom_dev_data->pasid == pasid) {
remove_dev_pasid(pdom_dev_data);
break;
}
}
}
static void sva_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start, unsigned long end)
{
struct pdom_dev_data *pdom_dev_data;
struct protection_domain *sva_pdom;
unsigned long flags;
sva_pdom = container_of(mn, struct protection_domain, mn);
spin_lock_irqsave(&sva_pdom->lock, flags);
for_each_pdom_dev_data(pdom_dev_data, sva_pdom) {
amd_iommu_dev_flush_pasid_pages(pdom_dev_data->dev_data,
pdom_dev_data->pasid,
start, end - start);
}
spin_unlock_irqrestore(&sva_pdom->lock, flags);
}
static void sva_mn_release(struct mmu_notifier *mn, struct mm_struct *mm)
{
struct pdom_dev_data *pdom_dev_data, *next;
struct protection_domain *sva_pdom;
unsigned long flags;
sva_pdom = container_of(mn, struct protection_domain, mn);
spin_lock_irqsave(&sva_pdom->lock, flags);
/* Assume dev_data_list contains same PASID with different devices */
for_each_pdom_dev_data_safe(pdom_dev_data, next, sva_pdom)
remove_dev_pasid(pdom_dev_data);
spin_unlock_irqrestore(&sva_pdom->lock, flags);
}
static const struct mmu_notifier_ops sva_mn = {
.arch_invalidate_secondary_tlbs = sva_arch_invalidate_secondary_tlbs,
.release = sva_mn_release,
};
int iommu_sva_set_dev_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid)
{
struct pdom_dev_data *pdom_dev_data;
struct protection_domain *sva_pdom = to_pdomain(domain);
struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
unsigned long flags;
int ret = -EINVAL;
/* PASID zero is used for requests from the I/O device without PASID */
if (!is_pasid_valid(dev_data, pasid))
return ret;
/* Make sure PASID is enabled */
if (!is_pasid_enabled(dev_data))
return ret;
/* Add PASID to protection domain pasid list */
pdom_dev_data = kzalloc(sizeof(*pdom_dev_data), GFP_KERNEL);
if (pdom_dev_data == NULL)
return ret;
pdom_dev_data->pasid = pasid;
pdom_dev_data->dev_data = dev_data;
spin_lock_irqsave(&sva_pdom->lock, flags);
/* Setup GCR3 table */
ret = amd_iommu_set_gcr3(dev_data, pasid,
iommu_virt_to_phys(domain->mm->pgd));
if (ret) {
kfree(pdom_dev_data);
goto out_unlock;
}
list_add(&pdom_dev_data->list, &sva_pdom->dev_data_list);
out_unlock:
spin_unlock_irqrestore(&sva_pdom->lock, flags);
return ret;
}
void amd_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
struct iommu_domain *domain)
{
struct protection_domain *sva_pdom;
unsigned long flags;
if (!is_pasid_valid(dev_iommu_priv_get(dev), pasid))
return;
sva_pdom = to_pdomain(domain);
spin_lock_irqsave(&sva_pdom->lock, flags);
/* Remove PASID from dev_data_list */
remove_pdom_dev_pasid(sva_pdom, dev, pasid);
spin_unlock_irqrestore(&sva_pdom->lock, flags);
}
static void iommu_sva_domain_free(struct iommu_domain *domain)
{
struct protection_domain *sva_pdom = to_pdomain(domain);
if (sva_pdom->mn.ops)
mmu_notifier_unregister(&sva_pdom->mn, domain->mm);
amd_iommu_domain_free(domain);
}
static const struct iommu_domain_ops amd_sva_domain_ops = {
.set_dev_pasid = iommu_sva_set_dev_pasid,
.free = iommu_sva_domain_free
};
struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev,
struct mm_struct *mm)
{
struct protection_domain *pdom;
int ret;
pdom = protection_domain_alloc(IOMMU_DOMAIN_SVA);
if (!pdom)
return ERR_PTR(-ENOMEM);
pdom->domain.ops = &amd_sva_domain_ops;
pdom->mn.ops = &sva_mn;
ret = mmu_notifier_register(&pdom->mn, mm);
if (ret) {
protection_domain_free(pdom);
return ERR_PTR(ret);
}
return &pdom->domain;
}
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2023 Advanced Micro Devices, Inc.
*/
#define pr_fmt(fmt) "AMD-Vi: " fmt
#define dev_fmt(fmt) pr_fmt(fmt)
#include <linux/amd-iommu.h>
#include <linux/delay.h>
#include <linux/mmu_notifier.h>
#include <asm/iommu.h>
#include "amd_iommu.h"
#include "amd_iommu_types.h"
#include "../iommu-pages.h"
int __init amd_iommu_alloc_ppr_log(struct amd_iommu *iommu)
{
iommu->ppr_log = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO,
PPR_LOG_SIZE);
return iommu->ppr_log ? 0 : -ENOMEM;
}
void amd_iommu_enable_ppr_log(struct amd_iommu *iommu)
{
u64 entry;
if (iommu->ppr_log == NULL)
return;
iommu_feature_enable(iommu, CONTROL_PPR_EN);
entry = iommu_virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512;
memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET,
&entry, sizeof(entry));
/* set head and tail to zero manually */
writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
iommu_feature_enable(iommu, CONTROL_PPRINT_EN);
iommu_feature_enable(iommu, CONTROL_PPRLOG_EN);
}
void __init amd_iommu_free_ppr_log(struct amd_iommu *iommu)
{
iommu_free_pages(iommu->ppr_log, get_order(PPR_LOG_SIZE));
}
/*
* This function restarts ppr logging in case the IOMMU experienced
* PPR log overflow.
*/
void amd_iommu_restart_ppr_log(struct amd_iommu *iommu)
{
amd_iommu_restart_log(iommu, "PPR", CONTROL_PPRINT_EN,
CONTROL_PPRLOG_EN, MMIO_STATUS_PPR_RUN_MASK,
MMIO_STATUS_PPR_OVERFLOW_MASK);
}
static inline u32 ppr_flag_to_fault_perm(u16 flag)
{
int perm = 0;
if (flag & PPR_FLAG_READ)
perm |= IOMMU_FAULT_PERM_READ;
if (flag & PPR_FLAG_WRITE)
perm |= IOMMU_FAULT_PERM_WRITE;
if (flag & PPR_FLAG_EXEC)
perm |= IOMMU_FAULT_PERM_EXEC;
if (!(flag & PPR_FLAG_US))
perm |= IOMMU_FAULT_PERM_PRIV;
return perm;
}
static bool ppr_is_valid(struct amd_iommu *iommu, u64 *raw)
{
struct device *dev = iommu->iommu.dev;
u16 devid = PPR_DEVID(raw[0]);
if (!(PPR_FLAGS(raw[0]) & PPR_FLAG_GN)) {
dev_dbg(dev, "PPR logged [Request ignored due to GN=0 (device=%04x:%02x:%02x.%x "
"pasid=0x%05llx address=0x%llx flags=0x%04llx tag=0x%03llx]\n",
iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
PPR_PASID(raw[0]), raw[1], PPR_FLAGS(raw[0]), PPR_TAG(raw[0]));
return false;
}
if (PPR_FLAGS(raw[0]) & PPR_FLAG_RVSD) {
dev_dbg(dev, "PPR logged [Invalid request format (device=%04x:%02x:%02x.%x "
"pasid=0x%05llx address=0x%llx flags=0x%04llx tag=0x%03llx]\n",
iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
PPR_PASID(raw[0]), raw[1], PPR_FLAGS(raw[0]), PPR_TAG(raw[0]));
return false;
}
return true;
}
static void iommu_call_iopf_notifier(struct amd_iommu *iommu, u64 *raw)
{
struct iommu_dev_data *dev_data;
struct iopf_fault event;
struct pci_dev *pdev;
u16 devid = PPR_DEVID(raw[0]);
if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) {
pr_info_ratelimited("Unknown PPR request received\n");
return;
}
pdev = pci_get_domain_bus_and_slot(iommu->pci_seg->id,
PCI_BUS_NUM(devid), devid & 0xff);
if (!pdev)
return;
if (!ppr_is_valid(iommu, raw))
goto out;
memset(&event, 0, sizeof(struct iopf_fault));
event.fault.type = IOMMU_FAULT_PAGE_REQ;
event.fault.prm.perm = ppr_flag_to_fault_perm(PPR_FLAGS(raw[0]));
event.fault.prm.addr = (u64)(raw[1] & PAGE_MASK);
event.fault.prm.pasid = PPR_PASID(raw[0]);
event.fault.prm.grpid = PPR_TAG(raw[0]) & 0x1FF;
/*
* PASID zero is used for requests from the I/O device without
* a PASID
*/
dev_data = dev_iommu_priv_get(&pdev->dev);
if (event.fault.prm.pasid == 0 ||
event.fault.prm.pasid >= dev_data->max_pasids) {
pr_info_ratelimited("Invalid PASID : 0x%x, device : 0x%x\n",
event.fault.prm.pasid, pdev->dev.id);
goto out;
}
event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
if (PPR_TAG(raw[0]) & 0x200)
event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
/* Submit event */
iommu_report_device_fault(&pdev->dev, &event);
return;
out:
/* Nobody cared, abort */
amd_iommu_complete_ppr(&pdev->dev, PPR_PASID(raw[0]),
IOMMU_PAGE_RESP_FAILURE,
PPR_TAG(raw[0]) & 0x1FF);
}
void amd_iommu_poll_ppr_log(struct amd_iommu *iommu)
{
u32 head, tail;
if (iommu->ppr_log == NULL)
return;
head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
while (head != tail) {
volatile u64 *raw;
u64 entry[2];
int i;
raw = (u64 *)(iommu->ppr_log + head);
/*
* Hardware bug: Interrupt may arrive before the entry is
* written to memory. If this happens we need to wait for the
* entry to arrive.
*/
for (i = 0; i < LOOP_TIMEOUT; ++i) {
if (PPR_REQ_TYPE(raw[0]) != 0)
break;
udelay(1);
}
/* Avoid memcpy function-call overhead */
entry[0] = raw[0];
entry[1] = raw[1];
/*
* To detect the hardware errata 733 we need to clear the
* entry back to zero. This issue does not exist on SNP
* enabled system. Also this buffer is not writeable on
* SNP enabled system.
*/
if (!amd_iommu_snp_en)
raw[0] = raw[1] = 0UL;
/* Update head pointer of hardware ring-buffer */
head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE;
writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
/* Handle PPR entry */
iommu_call_iopf_notifier(iommu, entry);
}
}
/**************************************************************
*
* IOPF handling stuff
*/
/* Setup per-IOMMU IOPF queue if not exist. */
int amd_iommu_iopf_init(struct amd_iommu *iommu)
{
int ret = 0;
if (iommu->iopf_queue)
return ret;
snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name),
"amdiommu-%#x-iopfq",
PCI_SEG_DEVID_TO_SBDF(iommu->pci_seg->id, iommu->devid));
iommu->iopf_queue = iopf_queue_alloc(iommu->iopfq_name);
if (!iommu->iopf_queue)
ret = -ENOMEM;
return ret;
}
/* Destroy per-IOMMU IOPF queue if no longer needed. */
void amd_iommu_iopf_uninit(struct amd_iommu *iommu)
{
iopf_queue_free(iommu->iopf_queue);
iommu->iopf_queue = NULL;
}
void amd_iommu_page_response(struct device *dev, struct iopf_fault *evt,
struct iommu_page_response *resp)
{
amd_iommu_complete_ppr(dev, resp->pasid, resp->code, resp->grpid);
}
int amd_iommu_iopf_add_device(struct amd_iommu *iommu,
struct iommu_dev_data *dev_data)
{
unsigned long flags;
int ret = 0;
if (!dev_data->pri_enabled)
return ret;
raw_spin_lock_irqsave(&iommu->lock, flags);
if (!iommu->iopf_queue) {
ret = -EINVAL;
goto out_unlock;
}
ret = iopf_queue_add_device(iommu->iopf_queue, dev_data->dev);
if (ret)
goto out_unlock;
dev_data->ppr = true;
out_unlock:
raw_spin_unlock_irqrestore(&iommu->lock, flags);
return ret;
}
/* Its assumed that caller has verified that device was added to iopf queue */
void amd_iommu_iopf_remove_device(struct amd_iommu *iommu,
struct iommu_dev_data *dev_data)
{
unsigned long flags;
raw_spin_lock_irqsave(&iommu->lock, flags);
iopf_queue_remove_device(iommu->iopf_queue, dev_data->dev);
dev_data->ppr = false;
raw_spin_unlock_irqrestore(&iommu->lock, flags);
}
......@@ -3,3 +3,5 @@ obj-$(CONFIG_ARM_SMMU_V3) += arm_smmu_v3.o
arm_smmu_v3-objs-y += arm-smmu-v3.o
arm_smmu_v3-objs-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o
arm_smmu_v3-objs := $(arm_smmu_v3-objs-y)
obj-$(CONFIG_ARM_SMMU_V3_KUNIT_TEST) += arm-smmu-v3-test.o
......@@ -8,6 +8,7 @@
#include <linux/mmu_notifier.h>
#include <linux/sched/mm.h>
#include <linux/slab.h>
#include <kunit/visibility.h>
#include "arm-smmu-v3.h"
#include "../../io-pgtable-arm.h"
......@@ -34,21 +35,25 @@ struct arm_smmu_bond {
static DEFINE_MUTEX(sva_lock);
/*
* Write the CD to the CD tables for all masters that this domain is attached
* to. Note that this is only used to update existing CD entries in the target
* CD table, for which it's assumed that arm_smmu_write_ctx_desc can't fail.
*/
static void arm_smmu_update_ctx_desc_devices(struct arm_smmu_domain *smmu_domain,
int ssid,
struct arm_smmu_ctx_desc *cd)
static void
arm_smmu_update_s1_domain_cd_entry(struct arm_smmu_domain *smmu_domain)
{
struct arm_smmu_master *master;
struct arm_smmu_cd target_cd;
unsigned long flags;
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
list_for_each_entry(master, &smmu_domain->devices, domain_head) {
arm_smmu_write_ctx_desc(master, ssid, cd);
struct arm_smmu_cd *cdptr;
/* S1 domains only support RID attachment right now */
cdptr = arm_smmu_get_cd_ptr(master, IOMMU_NO_PASID);
if (WARN_ON(!cdptr))
continue;
arm_smmu_make_s1_cd(&target_cd, master, smmu_domain);
arm_smmu_write_cd_entry(master, IOMMU_NO_PASID, cdptr,
&target_cd);
}
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
}
......@@ -96,7 +101,7 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
* be some overlap between use of both ASIDs, until we invalidate the
* TLB.
*/
arm_smmu_update_ctx_desc_devices(smmu_domain, IOMMU_NO_PASID, cd);
arm_smmu_update_s1_domain_cd_entry(smmu_domain);
/* Invalidate TLB entries previously associated with that context */
arm_smmu_tlb_inv_asid(smmu, asid);
......@@ -105,11 +110,87 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
return NULL;
}
static u64 page_size_to_cd(void)
{
static_assert(PAGE_SIZE == SZ_4K || PAGE_SIZE == SZ_16K ||
PAGE_SIZE == SZ_64K);
if (PAGE_SIZE == SZ_64K)
return ARM_LPAE_TCR_TG0_64K;
if (PAGE_SIZE == SZ_16K)
return ARM_LPAE_TCR_TG0_16K;
return ARM_LPAE_TCR_TG0_4K;
}
VISIBLE_IF_KUNIT
void arm_smmu_make_sva_cd(struct arm_smmu_cd *target,
struct arm_smmu_master *master, struct mm_struct *mm,
u16 asid)
{
u64 par;
memset(target, 0, sizeof(*target));
par = cpuid_feature_extract_unsigned_field(
read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1),
ID_AA64MMFR0_EL1_PARANGE_SHIFT);
target->data[0] = cpu_to_le64(
CTXDESC_CD_0_TCR_EPD1 |
#ifdef __BIG_ENDIAN
CTXDESC_CD_0_ENDI |
#endif
CTXDESC_CD_0_V |
FIELD_PREP(CTXDESC_CD_0_TCR_IPS, par) |
CTXDESC_CD_0_AA64 |
(master->stall_enabled ? CTXDESC_CD_0_S : 0) |
CTXDESC_CD_0_R |
CTXDESC_CD_0_A |
CTXDESC_CD_0_ASET |
FIELD_PREP(CTXDESC_CD_0_ASID, asid));
/*
* If no MM is passed then this creates a SVA entry that faults
* everything. arm_smmu_write_cd_entry() can hitlessly go between these
* two entries types since TTB0 is ignored by HW when EPD0 is set.
*/
if (mm) {
target->data[0] |= cpu_to_le64(
FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ,
64ULL - vabits_actual) |
FIELD_PREP(CTXDESC_CD_0_TCR_TG0, page_size_to_cd()) |
FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0,
ARM_LPAE_TCR_RGN_WBWA) |
FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0,
ARM_LPAE_TCR_RGN_WBWA) |
FIELD_PREP(CTXDESC_CD_0_TCR_SH0, ARM_LPAE_TCR_SH_IS));
target->data[1] = cpu_to_le64(virt_to_phys(mm->pgd) &
CTXDESC_CD_1_TTB0_MASK);
} else {
target->data[0] |= cpu_to_le64(CTXDESC_CD_0_TCR_EPD0);
/*
* Disable stall and immediately generate an abort if stall
* disable is permitted. This speeds up cleanup for an unclean
* exit if the device is still doing a lot of DMA.
*/
if (!(master->smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
target->data[0] &=
cpu_to_le64(~(CTXDESC_CD_0_S | CTXDESC_CD_0_R));
}
/*
* MAIR value is pretty much constant and global, so we can just get it
* from the current CPU register
*/
target->data[3] = cpu_to_le64(read_sysreg(mair_el1));
}
EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_sva_cd);
static struct arm_smmu_ctx_desc *arm_smmu_alloc_shared_cd(struct mm_struct *mm)
{
u16 asid;
int err = 0;
u64 tcr, par, reg;
struct arm_smmu_ctx_desc *cd;
struct arm_smmu_ctx_desc *ret = NULL;
......@@ -143,39 +224,6 @@ static struct arm_smmu_ctx_desc *arm_smmu_alloc_shared_cd(struct mm_struct *mm)
if (err)
goto out_free_asid;
tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, 64ULL - vabits_actual) |
FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, ARM_LPAE_TCR_RGN_WBWA) |
FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, ARM_LPAE_TCR_RGN_WBWA) |
FIELD_PREP(CTXDESC_CD_0_TCR_SH0, ARM_LPAE_TCR_SH_IS) |
CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
switch (PAGE_SIZE) {
case SZ_4K:
tcr |= FIELD_PREP(CTXDESC_CD_0_TCR_TG0, ARM_LPAE_TCR_TG0_4K);
break;
case SZ_16K:
tcr |= FIELD_PREP(CTXDESC_CD_0_TCR_TG0, ARM_LPAE_TCR_TG0_16K);
break;
case SZ_64K:
tcr |= FIELD_PREP(CTXDESC_CD_0_TCR_TG0, ARM_LPAE_TCR_TG0_64K);
break;
default:
WARN_ON(1);
err = -EINVAL;
goto out_free_asid;
}
reg = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
par = cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR0_EL1_PARANGE_SHIFT);
tcr |= FIELD_PREP(CTXDESC_CD_0_TCR_IPS, par);
cd->ttbr = virt_to_phys(mm->pgd);
cd->tcr = tcr;
/*
* MAIR value is pretty much constant and global, so we can just get it
* from the current CPU register
*/
cd->mair = read_sysreg(mair_el1);
cd->asid = asid;
cd->mm = mm;
......@@ -253,6 +301,8 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
{
struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn);
struct arm_smmu_domain *smmu_domain = smmu_mn->domain;
struct arm_smmu_master *master;
unsigned long flags;
mutex_lock(&sva_lock);
if (smmu_mn->cleared) {
......@@ -264,8 +314,19 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
* DMA may still be running. Keep the cd valid to avoid C_BAD_CD events,
* but disable translation.
*/
arm_smmu_update_ctx_desc_devices(smmu_domain, mm_get_enqcmd_pasid(mm),
&quiet_cd);
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
list_for_each_entry(master, &smmu_domain->devices, domain_head) {
struct arm_smmu_cd target;
struct arm_smmu_cd *cdptr;
cdptr = arm_smmu_get_cd_ptr(master, mm_get_enqcmd_pasid(mm));
if (WARN_ON(!cdptr))
continue;
arm_smmu_make_sva_cd(&target, master, NULL, smmu_mn->cd->asid);
arm_smmu_write_cd_entry(master, mm_get_enqcmd_pasid(mm), cdptr,
&target);
}
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_mn->cd->asid);
arm_smmu_atc_inv_domain(smmu_domain, mm_get_enqcmd_pasid(mm), 0, 0);
......@@ -360,6 +421,8 @@ static int __arm_smmu_sva_bind(struct device *dev, ioasid_t pasid,
struct mm_struct *mm)
{
int ret;
struct arm_smmu_cd target;
struct arm_smmu_cd *cdptr;
struct arm_smmu_bond *bond;
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
......@@ -386,9 +449,13 @@ static int __arm_smmu_sva_bind(struct device *dev, ioasid_t pasid,
goto err_free_bond;
}
ret = arm_smmu_write_ctx_desc(master, pasid, bond->smmu_mn->cd);
if (ret)
cdptr = arm_smmu_alloc_cd_ptr(master, mm_get_enqcmd_pasid(mm));
if (!cdptr) {
ret = -ENOMEM;
goto err_put_notifier;
}
arm_smmu_make_sva_cd(&target, master, mm, bond->smmu_mn->cd->asid);
arm_smmu_write_cd_entry(master, pasid, cdptr, &target);
list_add(&bond->list, &master->bonds);
return 0;
......@@ -546,7 +613,7 @@ void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain,
mutex_lock(&sva_lock);
arm_smmu_write_ctx_desc(master, id, NULL);
arm_smmu_clear_cd(master, id);
list_for_each_entry(t, &master->bonds, list) {
if (t->mm == mm) {
......@@ -569,6 +636,9 @@ static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain,
int ret = 0;
struct mm_struct *mm = domain->mm;
if (mm_get_enqcmd_pasid(mm) != id)
return -EINVAL;
mutex_lock(&sva_lock);
ret = __arm_smmu_sva_bind(dev, id, mm);
mutex_unlock(&sva_lock);
......
This diff is collapsed.
This diff is collapsed.
......@@ -275,14 +275,18 @@ struct arm_smmu_ste {
* 2lvl: at most 1024 L1 entries,
* 1024 lazy entries per table.
*/
#define CTXDESC_SPLIT 10
#define CTXDESC_L2_ENTRIES (1 << CTXDESC_SPLIT)
#define CTXDESC_L2_ENTRIES 1024
#define CTXDESC_L1_DESC_DWORDS 1
#define CTXDESC_L1_DESC_V (1UL << 0)
#define CTXDESC_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 12)
#define CTXDESC_CD_DWORDS 8
struct arm_smmu_cd {
__le64 data[CTXDESC_CD_DWORDS];
};
#define CTXDESC_CD_0_TCR_T0SZ GENMASK_ULL(5, 0)
#define CTXDESC_CD_0_TCR_TG0 GENMASK_ULL(7, 6)
#define CTXDESC_CD_0_TCR_IRGN0 GENMASK_ULL(9, 8)
......@@ -583,16 +587,13 @@ struct arm_smmu_strtab_l1_desc {
struct arm_smmu_ctx_desc {
u16 asid;
u64 ttbr;
u64 tcr;
u64 mair;
refcount_t refs;
struct mm_struct *mm;
};
struct arm_smmu_l1_ctx_desc {
__le64 *l2ptr;
struct arm_smmu_cd *l2ptr;
dma_addr_t l2ptr_dma;
};
......@@ -604,8 +605,6 @@ struct arm_smmu_ctx_desc_cfg {
u8 s1fmt;
/* log2 of the maximum number of CDs supported by this table */
u8 s1cdmax;
/* Whether CD entries in this table have the stall bit set. */
u8 stall_enabled:1;
};
struct arm_smmu_s2_cfg {
......@@ -737,6 +736,36 @@ struct arm_smmu_domain {
struct list_head mmu_notifiers;
};
/* The following are exposed for testing purposes. */
struct arm_smmu_entry_writer_ops;
struct arm_smmu_entry_writer {
const struct arm_smmu_entry_writer_ops *ops;
struct arm_smmu_master *master;
};
struct arm_smmu_entry_writer_ops {
void (*get_used)(const __le64 *entry, __le64 *used);
void (*sync)(struct arm_smmu_entry_writer *writer);
};
#if IS_ENABLED(CONFIG_KUNIT)
void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits);
void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, __le64 *cur,
const __le64 *target);
void arm_smmu_get_cd_used(const __le64 *ent, __le64 *used_bits);
void arm_smmu_make_abort_ste(struct arm_smmu_ste *target);
void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu,
struct arm_smmu_ste *target);
void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
struct arm_smmu_master *master);
void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
struct arm_smmu_master *master,
struct arm_smmu_domain *smmu_domain);
void arm_smmu_make_sva_cd(struct arm_smmu_cd *target,
struct arm_smmu_master *master, struct mm_struct *mm,
u16 asid);
#endif
static inline struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
{
return container_of(dom, struct arm_smmu_domain, domain);
......@@ -744,10 +773,19 @@ static inline struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
extern struct xarray arm_smmu_asid_xa;
extern struct mutex arm_smmu_asid_lock;
extern struct arm_smmu_ctx_desc quiet_cd;
int arm_smmu_write_ctx_desc(struct arm_smmu_master *smmu_master, int ssid,
struct arm_smmu_ctx_desc *cd);
void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid);
struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master,
u32 ssid);
struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master,
u32 ssid);
void arm_smmu_make_s1_cd(struct arm_smmu_cd *target,
struct arm_smmu_master *master,
struct arm_smmu_domain *smmu_domain);
void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid,
struct arm_smmu_cd *cdptr,
const struct arm_smmu_cd *target);
void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid);
void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
size_t granule, bool leaf,
......
......@@ -413,6 +413,10 @@ static const struct arm_smmu_impl qcom_smmu_500_impl = {
.reset = arm_mmu500_reset,
.write_s2cr = qcom_smmu_write_s2cr,
.tlb_sync = qcom_smmu_tlb_sync,
#ifdef CONFIG_ARM_SMMU_QCOM_DEBUG
.context_fault = qcom_smmu_context_fault,
.context_fault_needs_threaded_irq = true,
#endif
};
static const struct arm_smmu_impl sdm845_smmu_500_impl = {
......@@ -422,6 +426,10 @@ static const struct arm_smmu_impl sdm845_smmu_500_impl = {
.reset = qcom_sdm845_smmu500_reset,
.write_s2cr = qcom_smmu_write_s2cr,
.tlb_sync = qcom_smmu_tlb_sync,
#ifdef CONFIG_ARM_SMMU_QCOM_DEBUG
.context_fault = qcom_smmu_context_fault,
.context_fault_needs_threaded_irq = true,
#endif
};
static const struct arm_smmu_impl qcom_adreno_smmu_v2_impl = {
......
......@@ -30,6 +30,8 @@ struct qcom_smmu_match_data {
const struct arm_smmu_impl *adreno_impl;
};
irqreturn_t qcom_smmu_context_fault(int irq, void *dev);
#ifdef CONFIG_ARM_SMMU_QCOM_DEBUG
void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu);
#else
......
......@@ -806,8 +806,16 @@ static int arm_smmu_init_domain_context(struct arm_smmu_domain *smmu_domain,
else
context_fault = arm_smmu_context_fault;
if (smmu->impl && smmu->impl->context_fault_needs_threaded_irq)
ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
context_fault,
IRQF_ONESHOT | IRQF_SHARED,
"arm-smmu-context-fault",
smmu_domain);
else
ret = devm_request_irq(smmu->dev, irq, context_fault, IRQF_SHARED,
"arm-smmu-context-fault", smmu_domain);
if (ret < 0) {
dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
cfg->irptndx, irq);
......@@ -859,14 +867,10 @@ static void arm_smmu_destroy_domain_context(struct arm_smmu_domain *smmu_domain)
arm_smmu_rpm_put(smmu);
}
static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev)
{
struct arm_smmu_domain *smmu_domain;
if (type != IOMMU_DOMAIN_UNMANAGED) {
if (using_legacy_binding || type != IOMMU_DOMAIN_DMA)
return NULL;
}
/*
* Allocate the domain and initialise some of its data structures.
* We can't really do anything meaningful until we've added a
......@@ -1596,7 +1600,7 @@ static struct iommu_ops arm_smmu_ops = {
.identity_domain = &arm_smmu_identity_domain,
.blocked_domain = &arm_smmu_blocked_domain,
.capable = arm_smmu_capable,
.domain_alloc = arm_smmu_domain_alloc,
.domain_alloc_paging = arm_smmu_domain_alloc_paging,
.probe_device = arm_smmu_probe_device,
.release_device = arm_smmu_release_device,
.probe_finalize = arm_smmu_probe_finalize,
......
......@@ -136,6 +136,7 @@ enum arm_smmu_cbar_type {
#define ARM_SMMU_CBAR_VMID GENMASK(7, 0)
#define ARM_SMMU_GR1_CBFRSYNRA(n) (0x400 + ((n) << 2))
#define ARM_SMMU_CBFRSYNRA_SID GENMASK(15, 0)
#define ARM_SMMU_GR1_CBA2R(n) (0x800 + ((n) << 2))
#define ARM_SMMU_CBA2R_VMID16 GENMASK(31, 16)
......@@ -238,6 +239,7 @@ enum arm_smmu_cbar_type {
#define ARM_SMMU_CB_ATSR 0x8f0
#define ARM_SMMU_ATSR_ACTIVE BIT(0)
#define ARM_SMMU_RESUME_TERMINATE BIT(0)
/* Maximum number of context banks per SMMU */
#define ARM_SMMU_MAX_CBS 128
......@@ -436,6 +438,7 @@ struct arm_smmu_impl {
int (*def_domain_type)(struct device *dev);
irqreturn_t (*global_fault)(int irq, void *dev);
irqreturn_t (*context_fault)(int irq, void *dev);
bool context_fault_needs_threaded_irq;
int (*alloc_context_bank)(struct arm_smmu_domain *smmu_domain,
struct arm_smmu_device *smmu,
struct device *dev, int start);
......
......@@ -32,6 +32,7 @@
#include <trace/events/swiotlb.h>
#include "dma-iommu.h"
#include "iommu-pages.h"
struct iommu_dma_msi_page {
struct list_head list;
......@@ -156,7 +157,7 @@ static void fq_ring_free_locked(struct iommu_dma_cookie *cookie, struct iova_fq
if (fq->entries[idx].counter >= counter)
break;
put_pages_list(&fq->entries[idx].freelist);
iommu_put_pages_list(&fq->entries[idx].freelist);
free_iova_fast(&cookie->iovad,
fq->entries[idx].iova_pfn,
fq->entries[idx].pages);
......@@ -254,7 +255,7 @@ static void iommu_dma_free_fq_single(struct iova_fq *fq)
int idx;
fq_ring_for_each(idx, fq)
put_pages_list(&fq->entries[idx].freelist);
iommu_put_pages_list(&fq->entries[idx].freelist);
vfree(fq);
}
......@@ -267,7 +268,7 @@ static void iommu_dma_free_fq_percpu(struct iova_fq __percpu *percpu_fq)
struct iova_fq *fq = per_cpu_ptr(percpu_fq, cpu);
fq_ring_for_each(idx, fq)
put_pages_list(&fq->entries[idx].freelist);
iommu_put_pages_list(&fq->entries[idx].freelist);
}
free_percpu(percpu_fq);
......@@ -660,19 +661,16 @@ static void iommu_dma_init_options(struct iommu_dma_options *options,
/**
* iommu_dma_init_domain - Initialise a DMA mapping domain
* @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
* @base: IOVA at which the mappable address space starts
* @limit: Last address of the IOVA space
* @dev: Device the domain is being initialised for
*
* @base and @limit + 1 should be exact multiples of IOMMU page granularity to
* avoid rounding surprises. If necessary, we reserve the page at address 0
* If the geometry and dma_range_map include address 0, we reserve that page
* to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but
* any change which could make prior IOVAs invalid will fail.
*/
static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
dma_addr_t limit, struct device *dev)
static int iommu_dma_init_domain(struct iommu_domain *domain, struct device *dev)
{
struct iommu_dma_cookie *cookie = domain->iova_cookie;
const struct bus_dma_region *map = dev->dma_range_map;
unsigned long order, base_pfn;
struct iova_domain *iovad;
int ret;
......@@ -684,18 +682,18 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
/* Use the smallest supported page size for IOVA granularity */
order = __ffs(domain->pgsize_bitmap);
base_pfn = max_t(unsigned long, 1, base >> order);
base_pfn = 1;
/* Check the domain allows at least some access to the device... */
if (domain->geometry.force_aperture) {
if (map) {
dma_addr_t base = dma_range_map_min(map);
if (base > domain->geometry.aperture_end ||
limit < domain->geometry.aperture_start) {
dma_range_map_max(map) < domain->geometry.aperture_start) {
pr_warn("specified DMA range outside IOMMU capability\n");
return -EFAULT;
}
/* ...then finally give it a kicking to make sure it fits */
base_pfn = max_t(unsigned long, base_pfn,
domain->geometry.aperture_start >> order);
base_pfn = max(base, domain->geometry.aperture_start) >> order;
}
/* start_pfn is always nonzero for an already-initialised domain */
......@@ -1744,25 +1742,20 @@ static const struct dma_map_ops iommu_dma_ops = {
.max_mapping_size = iommu_dma_max_mapping_size,
};
/*
* The IOMMU core code allocates the default DMA domain, which the underlying
* IOMMU driver needs to support via the dma-iommu layer.
*/
void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit)
void iommu_setup_dma_ops(struct device *dev)
{
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
if (!domain)
goto out_err;
if (dev_is_pci(dev))
dev->iommu->pci_32bit_workaround = !iommu_dma_forcedac;
/*
* The IOMMU core code allocates the default DMA domain, which the
* underlying IOMMU driver needs to support via the dma-iommu layer.
*/
if (iommu_is_dma_domain(domain)) {
if (iommu_dma_init_domain(domain, dma_base, dma_limit, dev))
if (iommu_dma_init_domain(domain, dev))
goto out_err;
dev->dma_ops = &iommu_dma_ops;
} else if (dev->dma_ops == &iommu_dma_ops) {
/* Clean up if we've switched *from* a DMA domain */
dev->dma_ops = NULL;
}
return;
......@@ -1770,7 +1763,6 @@ void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit)
pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
dev_name(dev));
}
EXPORT_SYMBOL_GPL(iommu_setup_dma_ops);
static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
phys_addr_t msi_addr, struct iommu_domain *domain)
......
......@@ -9,6 +9,8 @@
#ifdef CONFIG_IOMMU_DMA
void iommu_setup_dma_ops(struct device *dev);
int iommu_get_dma_cookie(struct iommu_domain *domain);
void iommu_put_dma_cookie(struct iommu_domain *domain);
......@@ -17,13 +19,13 @@ int iommu_dma_init_fq(struct iommu_domain *domain);
void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list);
extern bool iommu_dma_forcedac;
static inline void iommu_dma_set_pci_32bit_workaround(struct device *dev)
{
dev->iommu->pci_32bit_workaround = !iommu_dma_forcedac;
}
#else /* CONFIG_IOMMU_DMA */
static inline void iommu_setup_dma_ops(struct device *dev)
{
}
static inline int iommu_dma_init_fq(struct iommu_domain *domain)
{
return -EINVAL;
......@@ -42,9 +44,5 @@ static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_he
{
}
static inline void iommu_dma_set_pci_32bit_workaround(struct device *dev)
{
}
#endif /* CONFIG_IOMMU_DMA */
#endif /* __DMA_IOMMU_H */
......@@ -22,6 +22,8 @@
#include <linux/pm_runtime.h>
#include <linux/slab.h>
#include "iommu-pages.h"
typedef u32 sysmmu_iova_t;
typedef u32 sysmmu_pte_t;
static struct iommu_domain exynos_identity_domain;
......@@ -900,11 +902,11 @@ static struct iommu_domain *exynos_iommu_domain_alloc_paging(struct device *dev)
if (!domain)
return NULL;
domain->pgtable = (sysmmu_pte_t *)__get_free_pages(GFP_KERNEL, 2);
domain->pgtable = iommu_alloc_pages(GFP_KERNEL, 2);
if (!domain->pgtable)
goto err_pgtable;
domain->lv2entcnt = (short *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
domain->lv2entcnt = iommu_alloc_pages(GFP_KERNEL, 1);
if (!domain->lv2entcnt)
goto err_counter;
......@@ -930,9 +932,9 @@ static struct iommu_domain *exynos_iommu_domain_alloc_paging(struct device *dev)
return &domain->domain;
err_lv2ent:
free_pages((unsigned long)domain->lv2entcnt, 1);
iommu_free_pages(domain->lv2entcnt, 1);
err_counter:
free_pages((unsigned long)domain->pgtable, 2);
iommu_free_pages(domain->pgtable, 2);
err_pgtable:
kfree(domain);
return NULL;
......@@ -973,8 +975,8 @@ static void exynos_iommu_domain_free(struct iommu_domain *iommu_domain)
phys_to_virt(base));
}
free_pages((unsigned long)domain->pgtable, 2);
free_pages((unsigned long)domain->lv2entcnt, 1);
iommu_free_pages(domain->pgtable, 2);
iommu_free_pages(domain->lv2entcnt, 1);
kfree(domain);
}
......
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_DMAR_TABLE) += dmar.o
obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o nested.o
obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o nested.o cache.o
obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o
obj-$(CONFIG_DMAR_PERF) += perf.o
obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o
......
This diff is collapsed.
......@@ -706,7 +706,6 @@ static ssize_t dmar_perf_latency_write(struct file *filp,
dmar_latency_disable(iommu, DMAR_LATENCY_INV_IOTLB);
dmar_latency_disable(iommu, DMAR_LATENCY_INV_DEVTLB);
dmar_latency_disable(iommu, DMAR_LATENCY_INV_IEC);
dmar_latency_disable(iommu, DMAR_LATENCY_PRQ);
}
rcu_read_unlock();
break;
......@@ -728,12 +727,6 @@ static ssize_t dmar_perf_latency_write(struct file *filp,
dmar_latency_enable(iommu, DMAR_LATENCY_INV_IEC);
rcu_read_unlock();
break;
case 4:
rcu_read_lock();
for_each_active_iommu(iommu, drhd)
dmar_latency_enable(iommu, DMAR_LATENCY_PRQ);
rcu_read_unlock();
break;
default:
return -EINVAL;
}
......
......@@ -32,6 +32,7 @@
#include "iommu.h"
#include "../irq_remapping.h"
#include "../iommu-pages.h"
#include "perf.h"
#include "trace.h"
#include "perfmon.h"
......@@ -1067,7 +1068,6 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
goto error_free_seq_id;
}
err = -EINVAL;
if (!cap_sagaw(iommu->cap) &&
(!ecap_smts(iommu->ecap) || ecap_slts(iommu->ecap))) {
pr_info("%s: No supported address widths. Not attempting DMA translation.\n",
......@@ -1187,7 +1187,7 @@ static void free_iommu(struct intel_iommu *iommu)
}
if (iommu->qi) {
free_page((unsigned long)iommu->qi->desc);
iommu_free_page(iommu->qi->desc);
kfree(iommu->qi->desc_status);
kfree(iommu->qi);
}
......@@ -1755,7 +1755,8 @@ static void __dmar_enable_qi(struct intel_iommu *iommu)
int dmar_enable_qi(struct intel_iommu *iommu)
{
struct q_inval *qi;
struct page *desc_page;
void *desc;
int order;
if (!ecap_qis(iommu->ecap))
return -ENOENT;
......@@ -1776,19 +1777,19 @@ int dmar_enable_qi(struct intel_iommu *iommu)
* Need two pages to accommodate 256 descriptors of 256 bits each
* if the remapping hardware supports scalable mode translation.
*/
desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
!!ecap_smts(iommu->ecap));
if (!desc_page) {
order = ecap_smts(iommu->ecap) ? 1 : 0;
desc = iommu_alloc_pages_node(iommu->node, GFP_ATOMIC, order);
if (!desc) {
kfree(qi);
iommu->qi = NULL;
return -ENOMEM;
}
qi->desc = page_address(desc_page);
qi->desc = desc;
qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC);
if (!qi->desc_status) {
free_page((unsigned long) qi->desc);
iommu_free_page(qi->desc);
kfree(qi);
iommu->qi = NULL;
return -ENOMEM;
......@@ -2122,7 +2123,7 @@ int dmar_set_interrupt(struct intel_iommu *iommu)
return ret;
}
int __init enable_drhd_fault_handling(void)
int enable_drhd_fault_handling(unsigned int cpu)
{
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
......@@ -2132,7 +2133,12 @@ int __init enable_drhd_fault_handling(void)
*/
for_each_iommu(iommu, drhd) {
u32 fault_status;
int ret = dmar_set_interrupt(iommu);
int ret;
if (iommu->irq || iommu->node != cpu_to_node(cpu))
continue;
ret = dmar_set_interrupt(iommu);
if (ret) {
pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
......
This diff is collapsed.
......@@ -35,6 +35,8 @@
#define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT)
#define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK)
#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
#define VTD_STRIDE_SHIFT (9)
#define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT)
......@@ -455,7 +457,6 @@ enum {
/* Page group response descriptor QW0 */
#define QI_PGRP_PASID_P(p) (((u64)(p)) << 4)
#define QI_PGRP_PDP(p) (((u64)(p)) << 5)
#define QI_PGRP_RESP_CODE(res) (((u64)(res)) << 12)
#define QI_PGRP_DID(rid) (((u64)(rid)) << 16)
#define QI_PGRP_PASID(pasid) (((u64)(pasid)) << 32)
......@@ -607,6 +608,9 @@ struct dmar_domain {
struct list_head devices; /* all devices' list */
struct list_head dev_pasids; /* all attached pasids */
spinlock_t cache_lock; /* Protect the cache tag list */
struct list_head cache_tags; /* Cache tag list */
int iommu_superpage;/* Level of superpages supported:
0 == 4KiB (no superpages), 1 == 2MiB,
2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
......@@ -644,6 +648,11 @@ struct dmar_domain {
/* link to parent domain siblings */
struct list_head s2_link;
};
/* SVA domain */
struct {
struct mmu_notifier notifier;
};
};
struct iommu_domain domain; /* generic domain data structure for
......@@ -1038,6 +1047,19 @@ static inline void context_set_sm_pre(struct context_entry *context)
context->lo |= BIT_ULL(4);
}
/* Returns a number of VTD pages, but aligned to MM page size */
static inline unsigned long aligned_nrpages(unsigned long host_addr, size_t size)
{
host_addr &= ~PAGE_MASK;
return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
}
/* Return a size from number of VTD pages. */
static inline unsigned long nrpages_to_size(unsigned long npages)
{
return npages << VTD_PAGE_SHIFT;
}
/* Convert value to context PASID directory size field coding. */
#define context_pdts(pds) (((pds) & 0x7) << 9)
......@@ -1085,48 +1107,60 @@ void domain_update_iommu_cap(struct dmar_domain *domain);
int dmar_ir_support(void);
void *alloc_pgtable_page(int node, gfp_t gfp);
void free_pgtable_page(void *vaddr);
void iommu_flush_write_buffer(struct intel_iommu *iommu);
struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent,
const struct iommu_user_data *user_data);
struct device *device_rbtree_find(struct intel_iommu *iommu, u16 rid);
enum cache_tag_type {
CACHE_TAG_IOTLB,
CACHE_TAG_DEVTLB,
CACHE_TAG_NESTING_IOTLB,
CACHE_TAG_NESTING_DEVTLB,
};
struct cache_tag {
struct list_head node;
enum cache_tag_type type;
struct intel_iommu *iommu;
/*
* The @dev field represents the location of the cache. For IOTLB, it
* resides on the IOMMU hardware. @dev stores the device pointer to
* the IOMMU hardware. For DevTLB, it locates in the PCIe endpoint.
* @dev stores the device pointer to that endpoint.
*/
struct device *dev;
u16 domain_id;
ioasid_t pasid;
unsigned int users;
};
int cache_tag_assign_domain(struct dmar_domain *domain,
struct device *dev, ioasid_t pasid);
void cache_tag_unassign_domain(struct dmar_domain *domain,
struct device *dev, ioasid_t pasid);
void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
unsigned long end, int ih);
void cache_tag_flush_all(struct dmar_domain *domain);
void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
unsigned long end);
#ifdef CONFIG_INTEL_IOMMU_SVM
void intel_svm_check(struct intel_iommu *iommu);
int intel_svm_enable_prq(struct intel_iommu *iommu);
int intel_svm_finish_prq(struct intel_iommu *iommu);
void intel_svm_page_response(struct device *dev, struct iopf_fault *evt,
struct iommu_page_response *msg);
struct iommu_domain *intel_svm_domain_alloc(void);
void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid);
struct iommu_domain *intel_svm_domain_alloc(struct device *dev,
struct mm_struct *mm);
void intel_drain_pasid_prq(struct device *dev, u32 pasid);
struct intel_svm_dev {
struct list_head list;
struct rcu_head rcu;
struct device *dev;
struct intel_iommu *iommu;
u16 did;
u16 sid, qdep;
};
struct intel_svm {
struct mmu_notifier notifier;
struct mm_struct *mm;
u32 pasid;
struct list_head devs;
};
#else
static inline void intel_svm_check(struct intel_iommu *iommu) {}
static inline void intel_drain_pasid_prq(struct device *dev, u32 pasid) {}
static inline struct iommu_domain *intel_svm_domain_alloc(void)
{
return NULL;
}
static inline void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid)
static inline struct iommu_domain *intel_svm_domain_alloc(struct device *dev,
struct mm_struct *mm)
{
return ERR_PTR(-ENODEV);
}
#endif
......
......@@ -23,6 +23,7 @@
#include "iommu.h"
#include "../irq_remapping.h"
#include "../iommu-pages.h"
#include "cap_audit.h"
enum irq_mode {
......@@ -529,7 +530,7 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
struct ir_table *ir_table;
struct fwnode_handle *fn;
unsigned long *bitmap;
struct page *pages;
void *ir_table_base;
if (iommu->ir_table)
return 0;
......@@ -538,9 +539,9 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
if (!ir_table)
return -ENOMEM;
pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO,
ir_table_base = iommu_alloc_pages_node(iommu->node, GFP_KERNEL,
INTR_REMAP_PAGE_ORDER);
if (!pages) {
if (!ir_table_base) {
pr_err("IR%d: failed to allocate pages of order %d\n",
iommu->seq_id, INTR_REMAP_PAGE_ORDER);
goto out_free_table;
......@@ -575,7 +576,7 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
else
iommu->ir_domain->msi_parent_ops = &dmar_msi_parent_ops;
ir_table->base = page_address(pages);
ir_table->base = ir_table_base;
ir_table->bitmap = bitmap;
iommu->ir_table = ir_table;
......@@ -624,7 +625,7 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
out_free_bitmap:
bitmap_free(bitmap);
out_free_pages:
__free_pages(pages, INTR_REMAP_PAGE_ORDER);
iommu_free_pages(ir_table_base, INTR_REMAP_PAGE_ORDER);
out_free_table:
kfree(ir_table);
......@@ -645,8 +646,7 @@ static void intel_teardown_irq_remapping(struct intel_iommu *iommu)
irq_domain_free_fwnode(fn);
iommu->ir_domain = NULL;
}
free_pages((unsigned long)iommu->ir_table->base,
INTR_REMAP_PAGE_ORDER);
iommu_free_pages(iommu->ir_table->base, INTR_REMAP_PAGE_ORDER);
bitmap_free(iommu->ir_table->bitmap);
kfree(iommu->ir_table);
iommu->ir_table = NULL;
......
This diff is collapsed.
......@@ -20,6 +20,7 @@
#include "iommu.h"
#include "pasid.h"
#include "../iommu-pages.h"
/*
* Intel IOMMU system wide PASID name space:
......@@ -38,7 +39,7 @@ int intel_pasid_alloc_table(struct device *dev)
{
struct device_domain_info *info;
struct pasid_table *pasid_table;
struct page *pages;
struct pasid_dir_entry *dir;
u32 max_pasid = 0;
int order, size;
......@@ -59,14 +60,13 @@ int intel_pasid_alloc_table(struct device *dev)
size = max_pasid >> (PASID_PDE_SHIFT - 3);
order = size ? get_order(size) : 0;
pages = alloc_pages_node(info->iommu->node,
GFP_KERNEL | __GFP_ZERO, order);
if (!pages) {
dir = iommu_alloc_pages_node(info->iommu->node, GFP_KERNEL, order);
if (!dir) {
kfree(pasid_table);
return -ENOMEM;
}
pasid_table->table = page_address(pages);
pasid_table->table = dir;
pasid_table->order = order;
pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3);
info->pasid_table = pasid_table;
......@@ -97,10 +97,10 @@ void intel_pasid_free_table(struct device *dev)
max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT;
for (i = 0; i < max_pde; i++) {
table = get_pasid_table_from_pde(&dir[i]);
free_pgtable_page(table);
iommu_free_page(table);
}
free_pages((unsigned long)pasid_table->table, pasid_table->order);
iommu_free_pages(pasid_table->table, pasid_table->order);
kfree(pasid_table);
}
......@@ -146,7 +146,7 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid)
retry:
entries = get_pasid_table_from_pde(&dir[dir_index]);
if (!entries) {
entries = alloc_pgtable_page(info->iommu->node, GFP_ATOMIC);
entries = iommu_alloc_page_node(info->iommu->node, GFP_ATOMIC);
if (!entries)
return NULL;
......@@ -158,7 +158,7 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid)
*/
if (cmpxchg64(&dir[dir_index].val, 0ULL,
(u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) {
free_pgtable_page(entries);
iommu_free_page(entries);
goto retry;
}
if (!ecap_coherent(info->iommu->ecap)) {
......
......@@ -11,7 +11,6 @@ enum latency_type {
DMAR_LATENCY_INV_IOTLB = 0,
DMAR_LATENCY_INV_DEVTLB,
DMAR_LATENCY_INV_IEC,
DMAR_LATENCY_PRQ,
DMAR_LATENCY_NUM
};
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -154,7 +154,10 @@ int __init irq_remap_enable_fault_handling(void)
if (!remap_ops->enable_faulting)
return -ENODEV;
return remap_ops->enable_faulting();
cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "dmar:enable_fault_handling",
remap_ops->enable_faulting, NULL);
return remap_ops->enable_faulting(smp_processor_id());
}
void panic_if_irq_remap(const char *msg)
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment