Commit d35ac6ac authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'iommu-updates-v6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu

Pull iommu updates from Joerg Roedel:
 "Core changes:
   - iova_magazine_alloc() optimization
   - Make flush-queue an IOMMU driver capability
   - Consolidate the error handling around device attachment

  AMD IOMMU changes:
   - AVIC Interrupt Remapping Improvements
   - Some minor fixes and cleanups

  Intel VT-d changes from Lu Baolu:
   - Small and misc cleanups

  ARM-SMMU changes from Will Deacon:
   - Device-tree binding updates:
      - Add missing clocks for SC8280XP and SA8775 Adreno SMMUs
      - Add two new Qualcomm SMMUs in SDX75 and SM6375
   - Workarounds for Arm MMU-700 errata:
      - 1076982: Avoid use of SEV-based cmdq wakeup
      - 2812531: Terminate command batches with a CMD_SYNC
      - Enforce single-stage translation to avoid nesting-related errata
   - Set the correct level hint for range TLB invalidation on teardown

  .. and some other minor fixes and cleanups (including Freescale PAMU
  and virtio-iommu changes)"

* tag 'iommu-updates-v6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (50 commits)
  iommu/vt-d: Remove commented-out code
  iommu/vt-d: Remove two WARN_ON in domain_context_mapping_one()
  iommu/vt-d: Handle the failure case of dmar_reenable_qi()
  iommu/vt-d: Remove unnecessary (void*) conversions
  iommu/amd: Remove extern from function prototypes
  iommu/amd: Use BIT/BIT_ULL macro to define bit fields
  iommu/amd: Fix DTE_IRQ_PHYS_ADDR_MASK macro
  iommu/amd: Fix compile error for unused function
  iommu/amd: Improving Interrupt Remapping Table Invalidation
  iommu/amd: Do not Invalidate IRT when IRTE caching is disabled
  iommu/amd: Introduce Disable IRTE Caching Support
  iommu/amd: Remove the unused struct amd_ir_data.ref
  iommu/amd: Switch amd_iommu_update_ga() to use modify_irte_ga()
  iommu/arm-smmu-v3: Set TTL invalidation hint better
  iommu/arm-smmu-v3: Document nesting-related errata
  iommu/arm-smmu-v3: Add explicit feature for nesting
  iommu/arm-smmu-v3: Document MMU-700 erratum 2812531
  iommu/arm-smmu-v3: Work around MMU-600 erratum 1076982
  dt-bindings: arm-smmu: Add SDX75 SMMU compatible
  dt-bindings: arm-smmu: Add SM6375 GPU SMMU
  ...
parents 0b26eadb a7a33407
......@@ -254,6 +254,7 @@ ForEachMacros:
- 'for_each_free_mem_range'
- 'for_each_free_mem_range_reverse'
- 'for_each_func_rsrc'
- 'for_each_group_device'
- 'for_each_group_evsel'
- 'for_each_group_member'
- 'for_each_hstate'
......
......@@ -323,6 +323,7 @@
option with care.
pgtbl_v1 - Use v1 page table for DMA-API (Default).
pgtbl_v2 - Use v2 page table for DMA-API.
irtcachedis - Disable Interrupt Remapping Table (IRT) caching.
amd_iommu_dump= [HW,X86-64]
Enable AMD IOMMU driver option to dump the ACPI table
......
......@@ -140,6 +140,10 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| ARM | MMU-500 | #841119,826419 | N/A |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | MMU-600 | #1076982,1209401| N/A |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | MMU-700 | #2268618,2812531| N/A |
+----------------+-----------------+-----------------+-----------------------------+
+----------------+-----------------+-----------------+-----------------------------+
| Broadcom | Brahma-B53 | N/A | ARM64_ERRATUM_845719 |
+----------------+-----------------+-----------------+-----------------------------+
......
......@@ -29,6 +29,7 @@ properties:
- qcom,msm8996-smmu-v2
- qcom,msm8998-smmu-v2
- qcom,sdm630-smmu-v2
- qcom,sm6375-smmu-v2
- const: qcom,smmu-v2
- description: Qcom SoCs implementing "qcom,smmu-500" and "arm,mmu-500"
......@@ -45,6 +46,7 @@ properties:
- qcom,sdm845-smmu-500
- qcom,sdx55-smmu-500
- qcom,sdx65-smmu-500
- qcom,sdx75-smmu-500
- qcom,sm6115-smmu-500
- qcom,sm6125-smmu-500
- qcom,sm6350-smmu-500
......@@ -79,7 +81,9 @@ properties:
- description: Qcom Adreno GPUs implementing "qcom,smmu-500" and "arm,mmu-500"
items:
- enum:
- qcom,sa8775p-smmu-500
- qcom,sc7280-smmu-500
- qcom,sc8280xp-smmu-500
- qcom,sm6115-smmu-500
- qcom,sm6125-smmu-500
- qcom,sm8150-smmu-500
......@@ -267,6 +271,7 @@ allOf:
enum:
- qcom,msm8998-smmu-v2
- qcom,sdm630-smmu-v2
- qcom,sm6375-smmu-v2
then:
anyOf:
- properties:
......@@ -331,7 +336,10 @@ allOf:
properties:
compatible:
contains:
const: qcom,sc7280-smmu-500
enum:
- qcom,sa8775p-smmu-500
- qcom,sc7280-smmu-500
- qcom,sc8280xp-smmu-500
then:
properties:
clock-names:
......@@ -413,10 +421,8 @@ allOf:
- nvidia,smmu-500
- qcom,qcm2290-smmu-500
- qcom,qdu1000-smmu-500
- qcom,sa8775p-smmu-500
- qcom,sc7180-smmu-500
- qcom,sc8180x-smmu-500
- qcom,sc8280xp-smmu-500
- qcom,sdm670-smmu-500
- qcom,sdm845-smmu-500
- qcom,sdx55-smmu-500
......
......@@ -1353,6 +1353,7 @@ static struct platform_driver fsl_pci_driver = {
.of_match_table = pci_ids,
},
.probe = fsl_pci_probe,
.driver_managed_dma = true,
};
static int __init fsl_pci_init(void)
......
......@@ -11,12 +11,15 @@
#include "amd_iommu_types.h"
extern irqreturn_t amd_iommu_int_thread(int irq, void *data);
extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
extern void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid);
extern void amd_iommu_restart_event_logging(struct amd_iommu *iommu);
extern void amd_iommu_restart_ga_log(struct amd_iommu *iommu);
extern void amd_iommu_set_rlookup_table(struct amd_iommu *iommu, u16 devid);
irqreturn_t amd_iommu_int_thread(int irq, void *data);
irqreturn_t amd_iommu_int_handler(int irq, void *data);
void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid);
void amd_iommu_restart_event_logging(struct amd_iommu *iommu);
void amd_iommu_restart_ga_log(struct amd_iommu *iommu);
int amd_iommu_init_devices(void);
void amd_iommu_uninit_devices(void);
void amd_iommu_init_notifier(void);
void amd_iommu_set_rlookup_table(struct amd_iommu *iommu, u16 devid);
#ifdef CONFIG_AMD_IOMMU_DEBUGFS
void amd_iommu_debugfs_setup(struct amd_iommu *iommu);
......@@ -25,11 +28,11 @@ static inline void amd_iommu_debugfs_setup(struct amd_iommu *iommu) {}
#endif
/* Needed for interrupt remapping */
extern int amd_iommu_prepare(void);
extern int amd_iommu_enable(void);
extern void amd_iommu_disable(void);
extern int amd_iommu_reenable(int);
extern int amd_iommu_enable_faulting(void);
int amd_iommu_prepare(void);
int amd_iommu_enable(void);
void amd_iommu_disable(void);
int amd_iommu_reenable(int mode);
int amd_iommu_enable_faulting(void);
extern int amd_iommu_guest_ir;
extern enum io_pgtable_fmt amd_iommu_pgtable;
extern int amd_iommu_gpt_level;
......@@ -37,33 +40,32 @@ extern int amd_iommu_gpt_level;
/* IOMMUv2 specific functions */
struct iommu_domain;
extern bool amd_iommu_v2_supported(void);
extern struct amd_iommu *get_amd_iommu(unsigned int idx);
extern u8 amd_iommu_pc_get_max_banks(unsigned int idx);
extern bool amd_iommu_pc_supported(void);
extern u8 amd_iommu_pc_get_max_counters(unsigned int idx);
extern int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
u8 fxn, u64 *value);
extern int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
u8 fxn, u64 *value);
extern int amd_iommu_register_ppr_notifier(struct notifier_block *nb);
extern int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb);
extern void amd_iommu_domain_direct_map(struct iommu_domain *dom);
extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids);
extern int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid,
u64 address);
extern void amd_iommu_update_and_flush_device_table(struct protection_domain *domain);
extern void amd_iommu_domain_update(struct protection_domain *domain);
extern void amd_iommu_domain_flush_complete(struct protection_domain *domain);
extern void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain);
extern int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid);
extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid,
unsigned long cr3);
extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid);
bool amd_iommu_v2_supported(void);
struct amd_iommu *get_amd_iommu(unsigned int idx);
u8 amd_iommu_pc_get_max_banks(unsigned int idx);
bool amd_iommu_pc_supported(void);
u8 amd_iommu_pc_get_max_counters(unsigned int idx);
int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
u8 fxn, u64 *value);
int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
u8 fxn, u64 *value);
int amd_iommu_register_ppr_notifier(struct notifier_block *nb);
int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb);
void amd_iommu_domain_direct_map(struct iommu_domain *dom);
int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids);
int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, u64 address);
void amd_iommu_update_and_flush_device_table(struct protection_domain *domain);
void amd_iommu_domain_update(struct protection_domain *domain);
void amd_iommu_domain_flush_complete(struct protection_domain *domain);
void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain);
int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid);
int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid,
unsigned long cr3);
int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid);
#ifdef CONFIG_IRQ_REMAP
extern int amd_iommu_create_irq_domain(struct amd_iommu *iommu);
int amd_iommu_create_irq_domain(struct amd_iommu *iommu);
#else
static inline int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
{
......@@ -75,8 +77,8 @@ static inline int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
#define PPR_INVALID 0x1
#define PPR_FAILURE 0xf
extern int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid,
int status, int tag);
int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid,
int status, int tag);
static inline bool is_rd890_iommu(struct pci_dev *pdev)
{
......@@ -129,10 +131,9 @@ static inline void *alloc_pgtable_page(int nid, gfp_t gfp)
return page ? page_address(page) : NULL;
}
extern bool translation_pre_enabled(struct amd_iommu *iommu);
extern bool amd_iommu_is_attach_deferred(struct device *dev);
extern int __init add_special_device(u8 type, u8 id, u32 *devid,
bool cmd_line);
bool translation_pre_enabled(struct amd_iommu *iommu);
bool amd_iommu_is_attach_deferred(struct device *dev);
int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line);
#ifdef CONFIG_DMI
void amd_iommu_apply_ivrs_quirks(void);
......@@ -140,9 +141,9 @@ void amd_iommu_apply_ivrs_quirks(void);
static inline void amd_iommu_apply_ivrs_quirks(void) { }
#endif
extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
u64 *root, int mode);
extern struct dev_table_entry *get_dev_table(struct amd_iommu *iommu);
void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
u64 *root, int mode);
struct dev_table_entry *get_dev_table(struct amd_iommu *iommu);
extern u64 amd_iommu_efr;
extern u64 amd_iommu_efr2;
......
......@@ -84,21 +84,21 @@
/* Extended Feature Bits */
#define FEATURE_PREFETCH (1ULL<<0)
#define FEATURE_PPR (1ULL<<1)
#define FEATURE_X2APIC (1ULL<<2)
#define FEATURE_NX (1ULL<<3)
#define FEATURE_GT (1ULL<<4)
#define FEATURE_IA (1ULL<<6)
#define FEATURE_GA (1ULL<<7)
#define FEATURE_HE (1ULL<<8)
#define FEATURE_PC (1ULL<<9)
#define FEATURE_PREFETCH BIT_ULL(0)
#define FEATURE_PPR BIT_ULL(1)
#define FEATURE_X2APIC BIT_ULL(2)
#define FEATURE_NX BIT_ULL(3)
#define FEATURE_GT BIT_ULL(4)
#define FEATURE_IA BIT_ULL(6)
#define FEATURE_GA BIT_ULL(7)
#define FEATURE_HE BIT_ULL(8)
#define FEATURE_PC BIT_ULL(9)
#define FEATURE_GATS_SHIFT (12)
#define FEATURE_GATS_MASK (3ULL)
#define FEATURE_GAM_VAPIC (1ULL<<21)
#define FEATURE_GIOSUP (1ULL<<48)
#define FEATURE_EPHSUP (1ULL<<50)
#define FEATURE_SNP (1ULL<<63)
#define FEATURE_GAM_VAPIC BIT_ULL(21)
#define FEATURE_GIOSUP BIT_ULL(48)
#define FEATURE_EPHSUP BIT_ULL(50)
#define FEATURE_SNP BIT_ULL(63)
#define FEATURE_PASID_SHIFT 32
#define FEATURE_PASID_MASK (0x1fULL << FEATURE_PASID_SHIFT)
......@@ -120,13 +120,13 @@
#define PASID_MASK 0x0000ffff
/* MMIO status bits */
#define MMIO_STATUS_EVT_OVERFLOW_INT_MASK (1 << 0)
#define MMIO_STATUS_EVT_INT_MASK (1 << 1)
#define MMIO_STATUS_COM_WAIT_INT_MASK (1 << 2)
#define MMIO_STATUS_PPR_INT_MASK (1 << 6)
#define MMIO_STATUS_GALOG_RUN_MASK (1 << 8)
#define MMIO_STATUS_GALOG_OVERFLOW_MASK (1 << 9)
#define MMIO_STATUS_GALOG_INT_MASK (1 << 10)
#define MMIO_STATUS_EVT_OVERFLOW_INT_MASK BIT(0)
#define MMIO_STATUS_EVT_INT_MASK BIT(1)
#define MMIO_STATUS_COM_WAIT_INT_MASK BIT(2)
#define MMIO_STATUS_PPR_INT_MASK BIT(6)
#define MMIO_STATUS_GALOG_RUN_MASK BIT(8)
#define MMIO_STATUS_GALOG_OVERFLOW_MASK BIT(9)
#define MMIO_STATUS_GALOG_INT_MASK BIT(10)
/* event logging constants */
#define EVENT_ENTRY_SIZE 0x10
......@@ -174,6 +174,7 @@
#define CONTROL_GAINT_EN 29
#define CONTROL_XT_EN 50
#define CONTROL_INTCAPXT_EN 51
#define CONTROL_IRTCACHEDIS 59
#define CONTROL_SNPAVIC_EN 61
#define CTRL_INV_TO_MASK (7 << CONTROL_INV_TIMEOUT)
......@@ -283,7 +284,7 @@
#define AMD_IOMMU_PGSIZES_V2 (PAGE_SIZE | (1ULL << 21) | (1ULL << 30))
/* Bit value definition for dte irq remapping fields*/
#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6)
#define DTE_IRQ_PHYS_ADDR_MASK GENMASK_ULL(51, 6)
#define DTE_IRQ_REMAP_INTCTL_MASK (0x3ULL << 60)
#define DTE_IRQ_REMAP_INTCTL (2ULL << 60)
#define DTE_IRQ_REMAP_ENABLE 1ULL
......@@ -369,23 +370,23 @@
/*
* Bit value definition for I/O PTE fields
*/
#define IOMMU_PTE_PR (1ULL << 0)
#define IOMMU_PTE_U (1ULL << 59)
#define IOMMU_PTE_FC (1ULL << 60)
#define IOMMU_PTE_IR (1ULL << 61)
#define IOMMU_PTE_IW (1ULL << 62)
#define IOMMU_PTE_PR BIT_ULL(0)
#define IOMMU_PTE_U BIT_ULL(59)
#define IOMMU_PTE_FC BIT_ULL(60)
#define IOMMU_PTE_IR BIT_ULL(61)
#define IOMMU_PTE_IW BIT_ULL(62)
/*
* Bit value definition for DTE fields
*/
#define DTE_FLAG_V (1ULL << 0)
#define DTE_FLAG_TV (1ULL << 1)
#define DTE_FLAG_IR (1ULL << 61)
#define DTE_FLAG_IW (1ULL << 62)
#define DTE_FLAG_IOTLB (1ULL << 32)
#define DTE_FLAG_GIOV (1ULL << 54)
#define DTE_FLAG_GV (1ULL << 55)
#define DTE_FLAG_V BIT_ULL(0)
#define DTE_FLAG_TV BIT_ULL(1)
#define DTE_FLAG_IR BIT_ULL(61)
#define DTE_FLAG_IW BIT_ULL(62)
#define DTE_FLAG_IOTLB BIT_ULL(32)
#define DTE_FLAG_GIOV BIT_ULL(54)
#define DTE_FLAG_GV BIT_ULL(55)
#define DTE_FLAG_MASK (0x3ffULL << 32)
#define DTE_GLX_SHIFT (56)
#define DTE_GLX_MASK (3)
......@@ -439,13 +440,13 @@
#define MAX_DOMAIN_ID 65536
/* Protection domain flags */
#define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */
#define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops
#define PD_DMA_OPS_MASK BIT(0) /* domain used for dma_ops */
#define PD_DEFAULT_MASK BIT(1) /* domain is a default dma_ops
domain for an IOMMU */
#define PD_PASSTHROUGH_MASK (1UL << 2) /* domain has no page
#define PD_PASSTHROUGH_MASK BIT(2) /* domain has no page
translation */
#define PD_IOMMUV2_MASK (1UL << 3) /* domain has gcr3 table */
#define PD_GIOV_MASK (1UL << 4) /* domain enable GIOV support */
#define PD_IOMMUV2_MASK BIT(3) /* domain has gcr3 table */
#define PD_GIOV_MASK BIT(4) /* domain enable GIOV support */
extern bool amd_iommu_dump;
#define DUMP_printk(format, arg...) \
......@@ -716,6 +717,9 @@ struct amd_iommu {
/* if one, we need to send a completion wait command */
bool need_sync;
/* true if disable irte caching */
bool irtcachedis_enabled;
/* Handle for IOMMU core code */
struct iommu_device iommu;
......@@ -748,7 +752,7 @@ struct amd_iommu {
u32 flags;
volatile u64 *cmd_sem;
u64 cmd_sem_val;
atomic64_t cmd_sem_val;
#ifdef CONFIG_AMD_IOMMU_DEBUGFS
/* DebugFS Info */
......@@ -882,7 +886,7 @@ extern int amd_iommu_max_glx_val;
* This function flushes all internal caches of
* the IOMMU used by this driver.
*/
extern void iommu_flush_all_caches(struct amd_iommu *iommu);
void iommu_flush_all_caches(struct amd_iommu *iommu);
static inline int get_ioapic_devid(int id)
{
......@@ -1006,7 +1010,6 @@ struct amd_ir_data {
struct irq_2_irte irq_2_irte;
struct msi_msg msi_entry;
void *entry; /* Pointer to union irte or struct irte_ga */
void *ref; /* Pointer to the actual irte */
/**
* Store information for activate/de-activate
......
......@@ -162,6 +162,7 @@ static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
static bool amd_iommu_detected;
static bool amd_iommu_disabled __initdata;
static bool amd_iommu_force_enable __initdata;
static bool amd_iommu_irtcachedis;
static int amd_iommu_target_ivhd_type;
/* Global EFR and EFR2 registers */
......@@ -484,6 +485,9 @@ static void iommu_disable(struct amd_iommu *iommu)
/* Disable IOMMU hardware itself */
iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
/* Clear IRTE cache disabling bit */
iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS);
}
/*
......@@ -1753,7 +1757,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
iommu->pci_seg = pci_seg;
raw_spin_lock_init(&iommu->lock);
iommu->cmd_sem_val = 0;
atomic64_set(&iommu->cmd_sem_val, 0);
/* Add IOMMU to internal data structures */
list_add_tail(&iommu->list, &amd_iommu_list);
......@@ -2710,6 +2714,33 @@ static void iommu_enable_ga(struct amd_iommu *iommu)
#endif
}
static void iommu_disable_irtcachedis(struct amd_iommu *iommu)
{
iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS);
}
static void iommu_enable_irtcachedis(struct amd_iommu *iommu)
{
u64 ctrl;
if (!amd_iommu_irtcachedis)
return;
/*
* Note:
* The support for IRTCacheDis feature is dertermined by
* checking if the bit is writable.
*/
iommu_feature_enable(iommu, CONTROL_IRTCACHEDIS);
ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
ctrl &= (1ULL << CONTROL_IRTCACHEDIS);
if (ctrl)
iommu->irtcachedis_enabled = true;
pr_info("iommu%d (%#06x) : IRT cache is %s\n",
iommu->index, iommu->devid,
iommu->irtcachedis_enabled ? "disabled" : "enabled");
}
static void early_enable_iommu(struct amd_iommu *iommu)
{
iommu_disable(iommu);
......@@ -2720,6 +2751,7 @@ static void early_enable_iommu(struct amd_iommu *iommu)
iommu_set_exclusion_range(iommu);
iommu_enable_ga(iommu);
iommu_enable_xt(iommu);
iommu_enable_irtcachedis(iommu);
iommu_enable(iommu);
iommu_flush_all_caches(iommu);
}
......@@ -2770,10 +2802,12 @@ static void early_enable_iommus(void)
for_each_iommu(iommu) {
iommu_disable_command_buffer(iommu);
iommu_disable_event_buffer(iommu);
iommu_disable_irtcachedis(iommu);
iommu_enable_command_buffer(iommu);
iommu_enable_event_buffer(iommu);
iommu_enable_ga(iommu);
iommu_enable_xt(iommu);
iommu_enable_irtcachedis(iommu);
iommu_set_device_table(iommu);
iommu_flush_all_caches(iommu);
}
......@@ -3426,6 +3460,8 @@ static int __init parse_amd_iommu_options(char *str)
amd_iommu_pgtable = AMD_IOMMU_V1;
} else if (strncmp(str, "pgtbl_v2", 8) == 0) {
amd_iommu_pgtable = AMD_IOMMU_V2;
} else if (strncmp(str, "irtcachedis", 11) == 0) {
amd_iommu_irtcachedis = true;
} else {
pr_notice("Unknown option - '%s'\n", str);
}
......
......@@ -310,8 +310,8 @@ static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
return NULL;
/* Large PTE */
if (PM_PTE_LEVEL(*pte) == 7 ||
PM_PTE_LEVEL(*pte) == 0)
if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL ||
PM_PTE_LEVEL(*pte) == PAGE_MODE_NONE)
break;
/* No level skipping support yet */
......
......@@ -2,7 +2,7 @@
/*
* CPU-agnostic AMD IO page table v2 allocator.
*
* Copyright (C) 2022 Advanced Micro Devices, Inc.
* Copyright (C) 2022, 2023 Advanced Micro Devices, Inc.
* Author: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
* Author: Vasant Hegde <vasant.hegde@amd.com>
*/
......
......@@ -1182,11 +1182,11 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
if (!iommu->need_sync)
return 0;
raw_spin_lock_irqsave(&iommu->lock, flags);
data = ++iommu->cmd_sem_val;
data = atomic64_add_return(1, &iommu->cmd_sem_val);
build_completion_wait(&cmd, iommu, data);
raw_spin_lock_irqsave(&iommu->lock, flags);
ret = __iommu_queue_command_sync(iommu, &cmd, false);
if (ret)
goto out_unlock;
......@@ -1273,6 +1273,9 @@ static void amd_iommu_flush_irt_all(struct amd_iommu *iommu)
u32 devid;
u16 last_bdf = iommu->pci_seg->last_bdf;
if (iommu->irtcachedis_enabled)
return;
for (devid = 0; devid <= last_bdf; devid++)
iommu_flush_irt(iommu, devid);
......@@ -2313,6 +2316,8 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap)
return amdr_ivrs_remap_support;
case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
return true;
case IOMMU_CAP_DEFERRED_FLUSH:
return true;
default:
break;
}
......@@ -2822,6 +2827,32 @@ EXPORT_SYMBOL(amd_iommu_device_info);
static struct irq_chip amd_ir_chip;
static DEFINE_SPINLOCK(iommu_table_lock);
static void iommu_flush_irt_and_complete(struct amd_iommu *iommu, u16 devid)
{
int ret;
u64 data;
unsigned long flags;
struct iommu_cmd cmd, cmd2;
if (iommu->irtcachedis_enabled)
return;
build_inv_irt(&cmd, devid);
data = atomic64_add_return(1, &iommu->cmd_sem_val);
build_completion_wait(&cmd2, iommu, data);
raw_spin_lock_irqsave(&iommu->lock, flags);
ret = __iommu_queue_command_sync(iommu, &cmd, true);
if (ret)
goto out;
ret = __iommu_queue_command_sync(iommu, &cmd2, false);
if (ret)
goto out;
wait_on_sem(iommu, data);
out:
raw_spin_unlock_irqrestore(&iommu->lock, flags);
}
static void set_dte_irq_entry(struct amd_iommu *iommu, u16 devid,
struct irq_remap_table *table)
{
......@@ -3021,7 +3052,7 @@ static int alloc_irq_index(struct amd_iommu *iommu, u16 devid, int count,
}
static int modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index,
struct irte_ga *irte, struct amd_ir_data *data)
struct irte_ga *irte)
{
struct irq_remap_table *table;
struct irte_ga *entry;
......@@ -3046,13 +3077,9 @@ static int modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index,
old = entry->irte;
WARN_ON(!try_cmpxchg128(&entry->irte, &old, irte->irte));
if (data)
data->ref = entry;
raw_spin_unlock_irqrestore(&table->lock, flags);
iommu_flush_irt(iommu, devid);
iommu_completion_wait(iommu);
iommu_flush_irt_and_complete(iommu, devid);
return 0;
}
......@@ -3071,8 +3098,7 @@ static int modify_irte(struct amd_iommu *iommu,
table->table[index] = irte->val;
raw_spin_unlock_irqrestore(&table->lock, flags);
iommu_flush_irt(iommu, devid);
iommu_completion_wait(iommu);
iommu_flush_irt_and_complete(iommu, devid);
return 0;
}
......@@ -3090,8 +3116,7 @@ static void free_irte(struct amd_iommu *iommu, u16 devid, int index)
iommu->irte_ops->clear_allocated(table, index);
raw_spin_unlock_irqrestore(&table->lock, flags);
iommu_flush_irt(iommu, devid);
iommu_completion_wait(iommu);
iommu_flush_irt_and_complete(iommu, devid);
}
static void irte_prepare(void *entry,
......@@ -3137,7 +3162,7 @@ static void irte_ga_activate(struct amd_iommu *iommu, void *entry, u16 devid, u1
struct irte_ga *irte = (struct irte_ga *) entry;
irte->lo.fields_remap.valid = 1;
modify_irte_ga(iommu, devid, index, irte, NULL);
modify_irte_ga(iommu, devid, index, irte);
}
static void irte_deactivate(struct amd_iommu *iommu, void *entry, u16 devid, u16 index)
......@@ -3153,7 +3178,7 @@ static void irte_ga_deactivate(struct amd_iommu *iommu, void *entry, u16 devid,
struct irte_ga *irte = (struct irte_ga *) entry;
irte->lo.fields_remap.valid = 0;
modify_irte_ga(iommu, devid, index, irte, NULL);
modify_irte_ga(iommu, devid, index, irte);
}
static void irte_set_affinity(struct amd_iommu *iommu, void *entry, u16 devid, u16 index,
......@@ -3177,7 +3202,7 @@ static void irte_ga_set_affinity(struct amd_iommu *iommu, void *entry, u16 devid
APICID_TO_IRTE_DEST_LO(dest_apicid);
irte->hi.fields.destination =
APICID_TO_IRTE_DEST_HI(dest_apicid);
modify_irte_ga(iommu, devid, index, irte, NULL);
modify_irte_ga(iommu, devid, index, irte);
}
}
......@@ -3527,7 +3552,7 @@ int amd_iommu_activate_guest_mode(void *data)
entry->lo.fields_vapic.ga_tag = ir_data->ga_tag;
return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid,
ir_data->irq_2_irte.index, entry, ir_data);
ir_data->irq_2_irte.index, entry);
}
EXPORT_SYMBOL(amd_iommu_activate_guest_mode);
......@@ -3557,7 +3582,7 @@ int amd_iommu_deactivate_guest_mode(void *data)
APICID_TO_IRTE_DEST_HI(cfg->dest_apicid);
return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid,
ir_data->irq_2_irte.index, entry, ir_data);
ir_data->irq_2_irte.index, entry);
}
EXPORT_SYMBOL(amd_iommu_deactivate_guest_mode);
......@@ -3719,44 +3744,26 @@ int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
int amd_iommu_update_ga(int cpu, bool is_run, void *data)
{
unsigned long flags;
struct amd_iommu *iommu;
struct irq_remap_table *table;
struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
int devid = ir_data->irq_2_irte.devid;
struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
struct irte_ga *ref = (struct irte_ga *) ir_data->ref;
if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) ||
!ref || !entry || !entry->lo.fields_vapic.guest_mode)
!entry || !entry->lo.fields_vapic.guest_mode)
return 0;
iommu = ir_data->iommu;
if (!iommu)
if (!ir_data->iommu)
return -ENODEV;
table = get_irq_table(iommu, devid);
if (!table)
return -ENODEV;
raw_spin_lock_irqsave(&table->lock, flags);
if (ref->lo.fields_vapic.guest_mode) {
if (cpu >= 0) {
ref->lo.fields_vapic.destination =
APICID_TO_IRTE_DEST_LO(cpu);
ref->hi.fields.destination =
APICID_TO_IRTE_DEST_HI(cpu);
}
ref->lo.fields_vapic.is_run = is_run;
barrier();
if (cpu >= 0) {
entry->lo.fields_vapic.destination =
APICID_TO_IRTE_DEST_LO(cpu);
entry->hi.fields.destination =
APICID_TO_IRTE_DEST_HI(cpu);
}
entry->lo.fields_vapic.is_run = is_run;
raw_spin_unlock_irqrestore(&table->lock, flags);
iommu_flush_irt(iommu, devid);
iommu_completion_wait(iommu);
return 0;
return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid,
ir_data->irq_2_irte.index, entry);
}
EXPORT_SYMBOL(amd_iommu_update_ga);
#endif
......@@ -894,6 +894,12 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
{
int index;
if (cmds->num == CMDQ_BATCH_ENTRIES - 1 &&
(smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) {
arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
cmds->num = 0;
}
if (cmds->num == CMDQ_BATCH_ENTRIES) {
arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
cmds->num = 0;
......@@ -1892,8 +1898,13 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
/* Convert page size of 12,14,16 (log2) to 1,2,3 */
cmd->tlbi.tg = (tg - 10) / 2;
/* Determine what level the granule is at */
cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
/*
* Determine what level the granule is at. For non-leaf, io-pgtable
* assumes .tlb_flush_walk can invalidate multiple levels at once,
* so ignore the nominal last-level granule and leave TTL=0.
*/
if (cmd->tlbi.leaf)
cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
num_pages = size >> tg;
}
......@@ -2008,6 +2019,7 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
/* Assume that a coherent TCU implies coherent TBUs */
return master->smmu->features & ARM_SMMU_FEAT_COHERENCY;
case IOMMU_CAP_NOEXEC:
case IOMMU_CAP_DEFERRED_FLUSH:
return true;
default:
return false;
......@@ -2023,7 +2035,6 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
if (type != IOMMU_DOMAIN_UNMANAGED &&
type != IOMMU_DOMAIN_DMA &&
type != IOMMU_DOMAIN_DMA_FQ &&
type != IOMMU_DOMAIN_IDENTITY)
return NULL;
......@@ -3429,6 +3440,44 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
return 0;
}
#define IIDR_IMPLEMENTER_ARM 0x43b
#define IIDR_PRODUCTID_ARM_MMU_600 0x483
#define IIDR_PRODUCTID_ARM_MMU_700 0x487
static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
{
u32 reg;
unsigned int implementer, productid, variant, revision;
reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
implementer = FIELD_GET(IIDR_IMPLEMENTER, reg);
productid = FIELD_GET(IIDR_PRODUCTID, reg);
variant = FIELD_GET(IIDR_VARIANT, reg);
revision = FIELD_GET(IIDR_REVISION, reg);
switch (implementer) {
case IIDR_IMPLEMENTER_ARM:
switch (productid) {
case IIDR_PRODUCTID_ARM_MMU_600:
/* Arm erratum 1076982 */
if (variant == 0 && revision <= 2)
smmu->features &= ~ARM_SMMU_FEAT_SEV;
/* Arm erratum 1209401 */
if (variant < 2)
smmu->features &= ~ARM_SMMU_FEAT_NESTING;
break;
case IIDR_PRODUCTID_ARM_MMU_700:
/* Arm erratum 2812531 */
smmu->features &= ~ARM_SMMU_FEAT_BTM;
smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC;
/* Arm errata 2268618, 2812531 */
smmu->features &= ~ARM_SMMU_FEAT_NESTING;
break;
}
break;
}
}
static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
{
u32 reg;
......@@ -3635,6 +3684,12 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
smmu->ias = max(smmu->ias, smmu->oas);
if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) &&
(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
smmu->features |= ARM_SMMU_FEAT_NESTING;
arm_smmu_device_iidr_probe(smmu);
if (arm_smmu_sva_supported(smmu))
smmu->features |= ARM_SMMU_FEAT_SVA;
......
......@@ -69,6 +69,12 @@
#define IDR5_VAX GENMASK(11, 10)
#define IDR5_VAX_52_BIT 1
#define ARM_SMMU_IIDR 0x18
#define IIDR_PRODUCTID GENMASK(31, 20)
#define IIDR_VARIANT GENMASK(19, 16)
#define IIDR_REVISION GENMASK(15, 12)
#define IIDR_IMPLEMENTER GENMASK(11, 0)
#define ARM_SMMU_CR0 0x20
#define CR0_ATSCHK (1 << 4)
#define CR0_CMDQEN (1 << 3)
......@@ -639,11 +645,13 @@ struct arm_smmu_device {
#define ARM_SMMU_FEAT_BTM (1 << 16)
#define ARM_SMMU_FEAT_SVA (1 << 17)
#define ARM_SMMU_FEAT_E2H (1 << 18)
#define ARM_SMMU_FEAT_NESTING (1 << 19)
u32 features;
#define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
#define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1)
#define ARM_SMMU_OPT_MSIPOLL (1 << 2)
#define ARM_SMMU_OPT_CMDQ_FORCE_SYNC (1 << 3)
u32 options;
struct arm_smmu_cmdq cmdq;
......
......@@ -856,8 +856,7 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
struct arm_smmu_domain *smmu_domain;
if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_IDENTITY) {
if (using_legacy_binding ||
(type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_DMA_FQ))
if (using_legacy_binding || type != IOMMU_DOMAIN_DMA)
return NULL;
}
/*
......@@ -1325,6 +1324,7 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
return cfg->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK ||
device_get_dma_attr(dev) == DEV_DMA_COHERENT;
case IOMMU_CAP_NOEXEC:
case IOMMU_CAP_DEFERRED_FLUSH:
return true;
default:
return false;
......
......@@ -615,7 +615,8 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
goto done_unlock;
/* If the FQ fails we can simply fall back to strict mode */
if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain))
if (domain->type == IOMMU_DOMAIN_DMA_FQ &&
(!device_iommu_capable(dev, IOMMU_CAP_DEFERRED_FLUSH) || iommu_dma_init_fq(domain)))
domain->type = IOMMU_DOMAIN_DMA;
ret = iova_reserve_iommu_regions(dev, domain);
......
......@@ -334,17 +334,6 @@ int fsl_pamu_configure_l1_stash(struct iommu_domain *domain, u32 cpu)
return ret;
}
static struct iommu_group *get_device_iommu_group(struct device *dev)
{
struct iommu_group *group;
group = iommu_group_get(dev);
if (!group)
group = iommu_group_alloc();
return group;
}
static bool check_pci_ctl_endpt_part(struct pci_controller *pci_ctl)
{
u32 version;
......@@ -356,94 +345,52 @@ static bool check_pci_ctl_endpt_part(struct pci_controller *pci_ctl)
return version >= 0x204;
}
/* Get iommu group information from peer devices or devices on the parent bus */
static struct iommu_group *get_shared_pci_device_group(struct pci_dev *pdev)
static struct iommu_group *fsl_pamu_device_group(struct device *dev)
{
struct pci_dev *tmp;
struct iommu_group *group;
struct pci_bus *bus = pdev->bus;
struct pci_dev *pdev;
/*
* Traverese the pci bus device list to get
* the shared iommu group.
* For platform devices we allocate a separate group for each of the
* devices.
*/
while (bus) {
list_for_each_entry(tmp, &bus->devices, bus_list) {
if (tmp == pdev)
continue;
group = iommu_group_get(&tmp->dev);
if (group)
return group;
}
if (!dev_is_pci(dev))
return generic_device_group(dev);
bus = bus->parent;
}
return NULL;
}
static struct iommu_group *get_pci_device_group(struct pci_dev *pdev)
{
struct pci_controller *pci_ctl;
bool pci_endpt_partitioning;
struct iommu_group *group = NULL;
pci_ctl = pci_bus_to_host(pdev->bus);
pci_endpt_partitioning = check_pci_ctl_endpt_part(pci_ctl);
/* We can partition PCIe devices so assign device group to the device */
if (pci_endpt_partitioning) {
group = pci_device_group(&pdev->dev);
/*
* PCIe controller is not a paritionable entity
* free the controller device iommu_group.
*/
if (pci_ctl->parent->iommu_group)
iommu_group_remove_device(pci_ctl->parent);
} else {
/*
* All devices connected to the controller will share the
* PCI controllers device group. If this is the first
* device to be probed for the pci controller, copy the
* device group information from the PCI controller device
* node and remove the PCI controller iommu group.
* For subsequent devices, the iommu group information can
* be obtained from sibling devices (i.e. from the bus_devices
* link list).
*/
if (pci_ctl->parent->iommu_group) {
group = get_device_iommu_group(pci_ctl->parent);
iommu_group_remove_device(pci_ctl->parent);
} else {
group = get_shared_pci_device_group(pdev);
}
}
if (!group)
group = ERR_PTR(-ENODEV);
/*
* We can partition PCIe devices so assign device group to the device
*/
pdev = to_pci_dev(dev);
if (check_pci_ctl_endpt_part(pci_bus_to_host(pdev->bus)))
return pci_device_group(&pdev->dev);
/*
* All devices connected to the controller will share the same device
* group.
*
* Due to ordering between fsl_pamu_init() and fsl_pci_init() it is
* guaranteed that the pci_ctl->parent platform_device will have the
* iommu driver bound and will already have a group set. So we just
* re-use this group as the group for every device in the hose.
*/
group = iommu_group_get(pci_bus_to_host(pdev->bus)->parent);
if (WARN_ON(!group))
return ERR_PTR(-EINVAL);
return group;
}
static struct iommu_group *fsl_pamu_device_group(struct device *dev)
static struct iommu_device *fsl_pamu_probe_device(struct device *dev)
{
struct iommu_group *group = ERR_PTR(-ENODEV);
int len;
/*
* For platform devices we allocate a separate group for
* each of the devices.
* uboot must fill the fsl,liodn for platform devices to be supported by
* the iommu.
*/
if (dev_is_pci(dev))
group = get_pci_device_group(to_pci_dev(dev));
else if (of_get_property(dev->of_node, "fsl,liodn", &len))
group = get_device_iommu_group(dev);
return group;
}
if (!dev_is_pci(dev) &&
!of_get_property(dev->of_node, "fsl,liodn", &len))
return ERR_PTR(-ENODEV);
static struct iommu_device *fsl_pamu_probe_device(struct device *dev)
{
return &pamu_iommu;
}
......
......@@ -1185,7 +1185,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
{
struct root_entry *root;
root = (struct root_entry *)alloc_pgtable_page(iommu->node, GFP_ATOMIC);
root = alloc_pgtable_page(iommu->node, GFP_ATOMIC);
if (!root) {
pr_err("Allocating root entry for %s failed\n",
iommu->name);
......@@ -1312,15 +1312,7 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
iommu->name, type);
return;
}
/* Note: set drain read/write */
#if 0
/*
* This is probably to be super secure.. Looks like we can
* ignore it without any impact.
*/
if (cap_read_drain(iommu->cap))
val |= DMA_TLB_READ_DRAIN;
#endif
if (cap_write_drain(iommu->cap))
val |= DMA_TLB_WRITE_DRAIN;
......@@ -1897,8 +1889,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
struct context_entry *context;
int ret;
WARN_ON(did == 0);
if (hw_pass_through && domain_type_is_si(domain))
translation = CONTEXT_TT_PASS_THROUGH;
......@@ -1944,8 +1934,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
if (sm_supported(iommu)) {
unsigned long pds;
WARN_ON(!table);
/* Setup the PASID DIR pointer: */
pds = context_get_sm_pds(table);
context->lo = (u64)virt_to_phys(table->table) |
......@@ -2967,10 +2955,15 @@ static int init_iommu_hw(void)
{
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu = NULL;
int ret;
for_each_active_iommu(iommu, drhd)
if (iommu->qi)
dmar_reenable_qi(iommu);
for_each_active_iommu(iommu, drhd) {
if (iommu->qi) {
ret = dmar_reenable_qi(iommu);
if (ret)
return ret;
}
}
for_each_iommu(iommu, drhd) {
if (drhd->ignored) {
......@@ -4064,7 +4057,6 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
case IOMMU_DOMAIN_BLOCKED:
return &blocking_domain;
case IOMMU_DOMAIN_DMA:
case IOMMU_DOMAIN_DMA_FQ:
case IOMMU_DOMAIN_UNMANAGED:
dmar_domain = alloc_domain(type);
if (!dmar_domain) {
......@@ -4369,6 +4361,7 @@ static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap)
switch (cap) {
case IOMMU_CAP_CACHE_COHERENCY:
case IOMMU_CAP_DEFERRED_FLUSH:
return true;
case IOMMU_CAP_PRE_BOOT_PROTECTION:
return dmar_platform_optin();
......
......@@ -68,6 +68,10 @@ struct group_device {
char *name;
};
/* Iterate over each struct group_device in a struct iommu_group */
#define for_each_group_device(group, pos) \
list_for_each_entry(pos, &(group)->devices, list)
struct iommu_group_attribute {
struct attribute attr;
ssize_t (*show)(struct iommu_group *group, char *buf);
......@@ -89,17 +93,39 @@ static const char * const iommu_group_resv_type_string[] = {
static int iommu_bus_notifier(struct notifier_block *nb,
unsigned long action, void *data);
static void iommu_release_device(struct device *dev);
static int iommu_alloc_default_domain(struct iommu_group *group,
struct device *dev);
static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus,
unsigned type);
static int __iommu_attach_device(struct iommu_domain *domain,
struct device *dev);
static int __iommu_attach_group(struct iommu_domain *domain,
struct iommu_group *group);
enum {
IOMMU_SET_DOMAIN_MUST_SUCCEED = 1 << 0,
};
static int __iommu_device_set_domain(struct iommu_group *group,
struct device *dev,
struct iommu_domain *new_domain,
unsigned int flags);
static int __iommu_group_set_domain_internal(struct iommu_group *group,
struct iommu_domain *new_domain,
unsigned int flags);
static int __iommu_group_set_domain(struct iommu_group *group,
struct iommu_domain *new_domain);
static int iommu_create_device_direct_mappings(struct iommu_group *group,
struct iommu_domain *new_domain)
{
return __iommu_group_set_domain_internal(group, new_domain, 0);
}
static void __iommu_group_set_domain_nofail(struct iommu_group *group,
struct iommu_domain *new_domain)
{
WARN_ON(__iommu_group_set_domain_internal(
group, new_domain, IOMMU_SET_DOMAIN_MUST_SUCCEED));
}
static int iommu_setup_default_domain(struct iommu_group *group,
int target_type);
static int iommu_create_device_direct_mappings(struct iommu_domain *domain,
struct device *dev);
static struct iommu_group *iommu_group_get_for_dev(struct device *dev);
static ssize_t iommu_group_store_type(struct iommu_group *group,
......@@ -176,16 +202,16 @@ static int __init iommu_subsys_init(void)
if (!iommu_default_passthrough() && !iommu_dma_strict)
iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ;
pr_info("Default domain type: %s %s\n",
pr_info("Default domain type: %s%s\n",
iommu_domain_type_str(iommu_def_domain_type),
(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ?
"(set via kernel command line)" : "");
" (set via kernel command line)" : "");
if (!iommu_default_passthrough())
pr_info("DMA domain TLB invalidation policy: %s mode %s\n",
pr_info("DMA domain TLB invalidation policy: %s mode%s\n",
iommu_dma_strict ? "strict" : "lazy",
(iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ?
"(set via kernel command line)" : "");
" (set via kernel command line)" : "");
nb = kcalloc(ARRAY_SIZE(iommu_buses), sizeof(*nb), GFP_KERNEL);
if (!nb)
......@@ -343,6 +369,8 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
dev->iommu->iommu_dev = iommu_dev;
dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev);
if (ops->is_attach_deferred)
dev->iommu->attach_deferred = ops->is_attach_deferred(dev);
group = iommu_group_get_for_dev(dev);
if (IS_ERR(group)) {
......@@ -377,30 +405,6 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
return ret;
}
static bool iommu_is_attach_deferred(struct device *dev)
{
const struct iommu_ops *ops = dev_iommu_ops(dev);
if (ops->is_attach_deferred)
return ops->is_attach_deferred(dev);
return false;
}
static int iommu_group_do_dma_first_attach(struct device *dev, void *data)
{
struct iommu_domain *domain = data;
lockdep_assert_held(&dev->iommu_group->mutex);
if (iommu_is_attach_deferred(dev)) {
dev->iommu->attach_deferred = 1;
return 0;
}
return __iommu_attach_device(domain, dev);
}
int iommu_probe_device(struct device *dev)
{
const struct iommu_ops *ops;
......@@ -417,29 +421,20 @@ int iommu_probe_device(struct device *dev)
goto err_release;
}
/*
* Try to allocate a default domain - needs support from the
* IOMMU driver. There are still some drivers which don't
* support default domains, so the return value is not yet
* checked.
*/
mutex_lock(&group->mutex);
iommu_alloc_default_domain(group, dev);
/*
* If device joined an existing group which has been claimed, don't
* attach the default domain.
*/
if (group->default_domain && !group->owner) {
ret = iommu_group_do_dma_first_attach(dev, group->default_domain);
if (ret) {
mutex_unlock(&group->mutex);
iommu_group_put(group);
goto err_release;
}
}
if (group->default_domain)
iommu_create_device_direct_mappings(group->default_domain, dev);
iommu_create_device_direct_mappings(group, dev);
if (group->domain) {
ret = __iommu_device_set_domain(group, dev, group->domain, 0);
if (ret)
goto err_unlock;
} else if (!group->default_domain) {
ret = iommu_setup_default_domain(group, 0);
if (ret)
goto err_unlock;
}
mutex_unlock(&group->mutex);
iommu_group_put(group);
......@@ -450,6 +445,9 @@ int iommu_probe_device(struct device *dev)
return 0;
err_unlock:
mutex_unlock(&group->mutex);
iommu_group_put(group);
err_release:
iommu_release_device(dev);
......@@ -468,7 +466,7 @@ __iommu_group_remove_device(struct iommu_group *group, struct device *dev)
struct group_device *device;
lockdep_assert_held(&group->mutex);
list_for_each_entry(device, &group->devices, list) {
for_each_group_device(group, device) {
if (device->dev == dev) {
list_del(&device->list);
return device;
......@@ -707,7 +705,7 @@ int iommu_get_group_resv_regions(struct iommu_group *group,
int ret = 0;
mutex_lock(&group->mutex);
list_for_each_entry(device, &group->devices, list) {
for_each_group_device(group, device) {
struct list_head dev_resv_regions;
/*
......@@ -953,16 +951,15 @@ int iommu_group_set_name(struct iommu_group *group, const char *name)
}
EXPORT_SYMBOL_GPL(iommu_group_set_name);
static int iommu_create_device_direct_mappings(struct iommu_group *group,
static int iommu_create_device_direct_mappings(struct iommu_domain *domain,
struct device *dev)
{
struct iommu_domain *domain = group->default_domain;
struct iommu_resv_region *entry;
struct list_head mappings;
unsigned long pg_size;
int ret = 0;
if (!domain || !iommu_is_dma_domain(domain))
if (!iommu_is_dma_domain(domain))
return 0;
BUG_ON(!domain->pgsize_bitmap);
......@@ -1069,25 +1066,13 @@ int iommu_group_add_device(struct iommu_group *group, struct device *dev)
mutex_lock(&group->mutex);
list_add_tail(&device->list, &group->devices);
if (group->domain)
ret = iommu_group_do_dma_first_attach(dev, group->domain);
mutex_unlock(&group->mutex);
if (ret)
goto err_put_group;
trace_add_device_to_group(group->id, dev);
dev_info(dev, "Adding to iommu group %d\n", group->id);
return 0;
err_put_group:
mutex_lock(&group->mutex);
list_del(&device->list);
mutex_unlock(&group->mutex);
dev->iommu_group = NULL;
kobject_put(group->devices_kobj);
sysfs_remove_link(group->devices_kobj, device->name);
err_free_name:
kfree(device->name);
err_remove_link:
......@@ -1125,31 +1110,6 @@ void iommu_group_remove_device(struct device *dev)
}
EXPORT_SYMBOL_GPL(iommu_group_remove_device);
static int iommu_group_device_count(struct iommu_group *group)
{
struct group_device *entry;
int ret = 0;
list_for_each_entry(entry, &group->devices, list)
ret++;
return ret;
}
static int __iommu_group_for_each_dev(struct iommu_group *group, void *data,
int (*fn)(struct device *, void *))
{
struct group_device *device;
int ret = 0;
list_for_each_entry(device, &group->devices, list) {
ret = fn(device->dev, data);
if (ret)
break;
}
return ret;
}
/**
* iommu_group_for_each_dev - iterate over each device in the group
* @group: the group
......@@ -1164,10 +1124,15 @@ static int __iommu_group_for_each_dev(struct iommu_group *group, void *data,
int iommu_group_for_each_dev(struct iommu_group *group, void *data,
int (*fn)(struct device *, void *))
{
int ret;
struct group_device *device;
int ret = 0;
mutex_lock(&group->mutex);
ret = __iommu_group_for_each_dev(group, data, fn);
for_each_group_device(group, device) {
ret = fn(device->dev, data);
if (ret)
break;
}
mutex_unlock(&group->mutex);
return ret;
......@@ -1656,40 +1621,47 @@ static int iommu_get_def_domain_type(struct device *dev)
return 0;
}
static int iommu_group_alloc_default_domain(const struct bus_type *bus,
struct iommu_group *group,
unsigned int type)
static struct iommu_domain *
__iommu_group_alloc_default_domain(const struct bus_type *bus,
struct iommu_group *group, int req_type)
{
struct iommu_domain *dom;
dom = __iommu_domain_alloc(bus, type);
if (!dom && type != IOMMU_DOMAIN_DMA) {
dom = __iommu_domain_alloc(bus, IOMMU_DOMAIN_DMA);
if (dom)
pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA",
type, group->name);
}
if (!dom)
return -ENOMEM;
group->default_domain = dom;
if (!group->domain)
group->domain = dom;
return 0;
if (group->default_domain && group->default_domain->type == req_type)
return group->default_domain;
return __iommu_domain_alloc(bus, req_type);
}
static int iommu_alloc_default_domain(struct iommu_group *group,
struct device *dev)
/*
* req_type of 0 means "auto" which means to select a domain based on
* iommu_def_domain_type or what the driver actually supports.
*/
static struct iommu_domain *
iommu_group_alloc_default_domain(struct iommu_group *group, int req_type)
{
unsigned int type;
const struct bus_type *bus =
list_first_entry(&group->devices, struct group_device, list)
->dev->bus;
struct iommu_domain *dom;
if (group->default_domain)
return 0;
lockdep_assert_held(&group->mutex);
if (req_type)
return __iommu_group_alloc_default_domain(bus, group, req_type);
type = iommu_get_def_domain_type(dev) ? : iommu_def_domain_type;
/* The driver gave no guidance on what type to use, try the default */
dom = __iommu_group_alloc_default_domain(bus, group, iommu_def_domain_type);
if (dom)
return dom;
return iommu_group_alloc_default_domain(dev->bus, group, type);
/* Otherwise IDENTITY and DMA_FQ defaults will try DMA */
if (iommu_def_domain_type == IOMMU_DOMAIN_DMA)
return NULL;
dom = __iommu_group_alloc_default_domain(bus, group, IOMMU_DOMAIN_DMA);
if (!dom)
return NULL;
pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA",
iommu_def_domain_type, group->name);
return dom;
}
/**
......@@ -1774,87 +1746,48 @@ static int iommu_bus_notifier(struct notifier_block *nb,
return 0;
}
struct __group_domain_type {
struct device *dev;
unsigned int type;
};
static int probe_get_default_domain_type(struct device *dev, void *data)
{
struct __group_domain_type *gtype = data;
unsigned int type = iommu_get_def_domain_type(dev);
if (type) {
if (gtype->type && gtype->type != type) {
dev_warn(dev, "Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n",
iommu_domain_type_str(type),
dev_name(gtype->dev),
iommu_domain_type_str(gtype->type));
gtype->type = 0;
}
if (!gtype->dev) {
gtype->dev = dev;
gtype->type = type;
}
}
return 0;
}
static void probe_alloc_default_domain(const struct bus_type *bus,
struct iommu_group *group)
/* A target_type of 0 will select the best domain type and cannot fail */
static int iommu_get_default_domain_type(struct iommu_group *group,
int target_type)
{
struct __group_domain_type gtype;
int best_type = target_type;
struct group_device *gdev;
struct device *last_dev;
memset(&gtype, 0, sizeof(gtype));
/* Ask for default domain requirements of all devices in the group */
__iommu_group_for_each_dev(group, &gtype,
probe_get_default_domain_type);
if (!gtype.type)
gtype.type = iommu_def_domain_type;
lockdep_assert_held(&group->mutex);
iommu_group_alloc_default_domain(bus, group, gtype.type);
for_each_group_device(group, gdev) {
unsigned int type = iommu_get_def_domain_type(gdev->dev);
}
if (best_type && type && best_type != type) {
if (target_type) {
dev_err_ratelimited(
gdev->dev,
"Device cannot be in %s domain\n",
iommu_domain_type_str(target_type));
return -1;
}
static int __iommu_group_dma_first_attach(struct iommu_group *group)
{
return __iommu_group_for_each_dev(group, group->default_domain,
iommu_group_do_dma_first_attach);
dev_warn(
gdev->dev,
"Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n",
iommu_domain_type_str(type), dev_name(last_dev),
iommu_domain_type_str(best_type));
return 0;
}
if (!best_type)
best_type = type;
last_dev = gdev->dev;
}
return best_type;
}
static int iommu_group_do_probe_finalize(struct device *dev, void *data)
static void iommu_group_do_probe_finalize(struct device *dev)
{
const struct iommu_ops *ops = dev_iommu_ops(dev);
if (ops->probe_finalize)
ops->probe_finalize(dev);
return 0;
}
static void __iommu_group_dma_finalize(struct iommu_group *group)
{
__iommu_group_for_each_dev(group, group->default_domain,
iommu_group_do_probe_finalize);
}
static int iommu_do_create_direct_mappings(struct device *dev, void *data)
{
struct iommu_group *group = data;
iommu_create_device_direct_mappings(group, dev);
return 0;
}
static int iommu_group_create_direct_mappings(struct iommu_group *group)
{
return __iommu_group_for_each_dev(group, group,
iommu_do_create_direct_mappings);
}
int bus_iommu_probe(const struct bus_type *bus)
......@@ -1873,32 +1806,31 @@ int bus_iommu_probe(const struct bus_type *bus)
return ret;
list_for_each_entry_safe(group, next, &group_list, entry) {
struct group_device *gdev;
mutex_lock(&group->mutex);
/* Remove item from the list */
list_del_init(&group->entry);
/* Try to allocate default domain */
probe_alloc_default_domain(bus, group);
if (!group->default_domain) {
ret = iommu_setup_default_domain(group, 0);
if (ret) {
mutex_unlock(&group->mutex);
continue;
return ret;
}
iommu_group_create_direct_mappings(group);
ret = __iommu_group_dma_first_attach(group);
mutex_unlock(&group->mutex);
if (ret)
break;
__iommu_group_dma_finalize(group);
/*
* FIXME: Mis-locked because the ops->probe_finalize() call-back
* of some IOMMU drivers calls arm_iommu_attach_device() which
* in-turn might call back into IOMMU core code, where it tries
* to take group->mutex, resulting in a deadlock.
*/
for_each_group_device(group, gdev)
iommu_group_do_probe_finalize(gdev->dev);
}
return ret;
return 0;
}
bool iommu_present(const struct bus_type *bus)
......@@ -1946,7 +1878,7 @@ bool iommu_group_has_isolated_msi(struct iommu_group *group)
bool ret = true;
mutex_lock(&group->mutex);
list_for_each_entry(group_dev, &group->devices, list)
for_each_group_device(group, group_dev)
ret &= msi_device_has_isolated_msi(group_dev->dev);
mutex_unlock(&group->mutex);
return ret;
......@@ -1980,11 +1912,12 @@ static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus,
unsigned type)
{
struct iommu_domain *domain;
unsigned int alloc_type = type & IOMMU_DOMAIN_ALLOC_FLAGS;
if (bus == NULL || bus->iommu_ops == NULL)
return NULL;
domain = bus->iommu_ops->domain_alloc(type);
domain = bus->iommu_ops->domain_alloc(alloc_type);
if (!domain)
return NULL;
......@@ -2028,15 +1961,13 @@ EXPORT_SYMBOL_GPL(iommu_domain_free);
static void __iommu_group_set_core_domain(struct iommu_group *group)
{
struct iommu_domain *new_domain;
int ret;
if (group->owner)
new_domain = group->blocking_domain;
else
new_domain = group->default_domain;
ret = __iommu_group_set_domain(group, new_domain);
WARN(ret, "iommu driver failed to attach the default/blocking domain");
__iommu_group_set_domain_nofail(group, new_domain);
}
static int __iommu_attach_device(struct iommu_domain *domain,
......@@ -2082,7 +2013,7 @@ int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
*/
mutex_lock(&group->mutex);
ret = -EINVAL;
if (iommu_group_device_count(group) != 1)
if (list_count_nodes(&group->devices) != 1)
goto out_unlock;
ret = __iommu_attach_group(domain, group);
......@@ -2113,7 +2044,7 @@ void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
mutex_lock(&group->mutex);
if (WARN_ON(domain != group->domain) ||
WARN_ON(iommu_group_device_count(group) != 1))
WARN_ON(list_count_nodes(&group->devices) != 1))
goto out_unlock;
__iommu_group_set_core_domain(group);
......@@ -2149,52 +2080,14 @@ struct iommu_domain *iommu_get_dma_domain(struct device *dev)
return dev->iommu_group->default_domain;
}
/*
* IOMMU groups are really the natural working unit of the IOMMU, but
* the IOMMU API works on domains and devices. Bridge that gap by
* iterating over the devices in a group. Ideally we'd have a single
* device which represents the requestor ID of the group, but we also
* allow IOMMU drivers to create policy defined minimum sets, where
* the physical hardware may be able to distiguish members, but we
* wish to group them at a higher level (ex. untrusted multi-function
* PCI devices). Thus we attach each device.
*/
static int iommu_group_do_attach_device(struct device *dev, void *data)
{
struct iommu_domain *domain = data;
return __iommu_attach_device(domain, dev);
}
static int __iommu_attach_group(struct iommu_domain *domain,
struct iommu_group *group)
{
int ret;
if (group->domain && group->domain != group->default_domain &&
group->domain != group->blocking_domain)
return -EBUSY;
ret = __iommu_group_for_each_dev(group, domain,
iommu_group_do_attach_device);
if (ret == 0) {
group->domain = domain;
} else {
/*
* To recover from the case when certain device within the
* group fails to attach to the new domain, we need force
* attaching all devices back to the old domain. The old
* domain is compatible for all devices in the group,
* hence the iommu driver should always return success.
*/
struct iommu_domain *old_domain = group->domain;
group->domain = NULL;
WARN(__iommu_group_set_domain(group, old_domain),
"iommu driver failed to attach a compatible domain");
}
return ret;
return __iommu_group_set_domain(group, domain);
}
/**
......@@ -2221,21 +2114,61 @@ int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group)
}
EXPORT_SYMBOL_GPL(iommu_attach_group);
static int iommu_group_do_set_platform_dma(struct device *dev, void *data)
static int __iommu_device_set_domain(struct iommu_group *group,
struct device *dev,
struct iommu_domain *new_domain,
unsigned int flags)
{
const struct iommu_ops *ops = dev_iommu_ops(dev);
int ret;
if (!WARN_ON(!ops->set_platform_dma_ops))
ops->set_platform_dma_ops(dev);
if (dev->iommu->attach_deferred) {
if (new_domain == group->default_domain)
return 0;
dev->iommu->attach_deferred = 0;
}
ret = __iommu_attach_device(new_domain, dev);
if (ret) {
/*
* If we have a blocking domain then try to attach that in hopes
* of avoiding a UAF. Modern drivers should implement blocking
* domains as global statics that cannot fail.
*/
if ((flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) &&
group->blocking_domain &&
group->blocking_domain != new_domain)
__iommu_attach_device(group->blocking_domain, dev);
return ret;
}
return 0;
}
static int __iommu_group_set_domain(struct iommu_group *group,
struct iommu_domain *new_domain)
/*
* If 0 is returned the group's domain is new_domain. If an error is returned
* then the group's domain will be set back to the existing domain unless
* IOMMU_SET_DOMAIN_MUST_SUCCEED, otherwise an error is returned and the group's
* domains is left inconsistent. This is a driver bug to fail attach with a
* previously good domain. We try to avoid a kernel UAF because of this.
*
* IOMMU groups are really the natural working unit of the IOMMU, but the IOMMU
* API works on domains and devices. Bridge that gap by iterating over the
* devices in a group. Ideally we'd have a single device which represents the
* requestor ID of the group, but we also allow IOMMU drivers to create policy
* defined minimum sets, where the physical hardware may be able to distiguish
* members, but we wish to group them at a higher level (ex. untrusted
* multi-function PCI devices). Thus we attach each device.
*/
static int __iommu_group_set_domain_internal(struct iommu_group *group,
struct iommu_domain *new_domain,
unsigned int flags)
{
struct group_device *last_gdev;
struct group_device *gdev;
int result;
int ret;
lockdep_assert_held(&group->mutex);
if (group->domain == new_domain)
return 0;
......@@ -2245,8 +2178,12 @@ static int __iommu_group_set_domain(struct iommu_group *group,
* platform specific behavior.
*/
if (!new_domain) {
__iommu_group_for_each_dev(group, NULL,
iommu_group_do_set_platform_dma);
for_each_group_device(group, gdev) {
const struct iommu_ops *ops = dev_iommu_ops(gdev->dev);
if (!WARN_ON(!ops->set_platform_dma_ops))
ops->set_platform_dma_ops(gdev->dev);
}
group->domain = NULL;
return 0;
}
......@@ -2256,16 +2193,52 @@ static int __iommu_group_set_domain(struct iommu_group *group,
* domain. This switch does not have to be atomic and DMA can be
* discarded during the transition. DMA must only be able to access
* either new_domain or group->domain, never something else.
*
* Note that this is called in error unwind paths, attaching to a
* domain that has already been attached cannot fail.
*/
ret = __iommu_group_for_each_dev(group, new_domain,
iommu_group_do_attach_device);
if (ret)
return ret;
result = 0;
for_each_group_device(group, gdev) {
ret = __iommu_device_set_domain(group, gdev->dev, new_domain,
flags);
if (ret) {
result = ret;
/*
* Keep trying the other devices in the group. If a
* driver fails attach to an otherwise good domain, and
* does not support blocking domains, it should at least
* drop its reference on the current domain so we don't
* UAF.
*/
if (flags & IOMMU_SET_DOMAIN_MUST_SUCCEED)
continue;
goto err_revert;
}
}
group->domain = new_domain;
return 0;
return result;
err_revert:
/*
* This is called in error unwind paths. A well behaved driver should
* always allow us to attach to a domain that was already attached.
*/
last_gdev = gdev;
for_each_group_device(group, gdev) {
const struct iommu_ops *ops = dev_iommu_ops(gdev->dev);
/*
* If set_platform_dma_ops is not present a NULL domain can
* happen only for first probe, in which case we leave
* group->domain as NULL and let release clean everything up.
*/
if (group->domain)
WARN_ON(__iommu_device_set_domain(
group, gdev->dev, group->domain,
IOMMU_SET_DOMAIN_MUST_SUCCEED));
else if (ops->set_platform_dma_ops)
ops->set_platform_dma_ops(gdev->dev);
if (gdev == last_gdev)
break;
}
return ret;
}
void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group)
......@@ -2846,78 +2819,112 @@ int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat)
}
EXPORT_SYMBOL_GPL(iommu_dev_disable_feature);
/*
* Changes the default domain of an iommu group
*
* @group: The group for which the default domain should be changed
* @dev: The first device in the group
* @type: The type of the new default domain that gets associated with the group
*
* Returns 0 on success and error code on failure
/**
* iommu_setup_default_domain - Set the default_domain for the group
* @group: Group to change
* @target_type: Domain type to set as the default_domain
*
* Note:
* 1. Presently, this function is called only when user requests to change the
* group's default domain type through /sys/kernel/iommu_groups/<grp_id>/type
* Please take a closer look if intended to use for other purposes.
* Allocate a default domain and set it as the current domain on the group. If
* the group already has a default domain it will be changed to the target_type.
* When target_type is 0 the default domain is selected based on driver and
* system preferences.
*/
static int iommu_change_dev_def_domain(struct iommu_group *group,
struct device *dev, int type)
static int iommu_setup_default_domain(struct iommu_group *group,
int target_type)
{
struct __group_domain_type gtype = {NULL, 0};
struct iommu_domain *prev_dom;
struct iommu_domain *old_dom = group->default_domain;
struct group_device *gdev;
struct iommu_domain *dom;
bool direct_failed;
int req_type;
int ret;
lockdep_assert_held(&group->mutex);
prev_dom = group->default_domain;
__iommu_group_for_each_dev(group, &gtype,
probe_get_default_domain_type);
if (!type) {
/*
* If the user hasn't requested any specific type of domain and
* if the device supports both the domains, then default to the
* domain the device was booted with
*/
type = gtype.type ? : iommu_def_domain_type;
} else if (gtype.type && type != gtype.type) {
dev_err_ratelimited(dev, "Device cannot be in %s domain\n",
iommu_domain_type_str(type));
req_type = iommu_get_default_domain_type(group, target_type);
if (req_type < 0)
return -EINVAL;
}
/*
* Switch to a new domain only if the requested domain type is different
* from the existing default domain type
* There are still some drivers which don't support default domains, so
* we ignore the failure and leave group->default_domain NULL.
*
* We assume that the iommu driver starts up the device in
* 'set_platform_dma_ops' mode if it does not support default domains.
*/
if (prev_dom->type == type)
dom = iommu_group_alloc_default_domain(group, req_type);
if (!dom) {
/* Once in default_domain mode we never leave */
if (group->default_domain)
return -ENODEV;
group->default_domain = NULL;
return 0;
}
group->default_domain = NULL;
group->domain = NULL;
/* Sets group->default_domain to the newly allocated domain */
ret = iommu_group_alloc_default_domain(dev->bus, group, type);
if (ret)
goto restore_old_domain;
ret = iommu_group_create_direct_mappings(group);
if (ret)
goto free_new_domain;
ret = __iommu_attach_group(group->default_domain, group);
if (ret)
goto free_new_domain;
if (group->default_domain == dom)
return 0;
iommu_domain_free(prev_dom);
/*
* IOMMU_RESV_DIRECT and IOMMU_RESV_DIRECT_RELAXABLE regions must be
* mapped before their device is attached, in order to guarantee
* continuity with any FW activity
*/
direct_failed = false;
for_each_group_device(group, gdev) {
if (iommu_create_device_direct_mappings(dom, gdev->dev)) {
direct_failed = true;
dev_warn_once(
gdev->dev->iommu->iommu_dev->dev,
"IOMMU driver was not able to establish FW requested direct mapping.");
}
}
return 0;
/* We must set default_domain early for __iommu_device_set_domain */
group->default_domain = dom;
if (!group->domain) {
/*
* Drivers are not allowed to fail the first domain attach.
* The only way to recover from this is to fail attaching the
* iommu driver and call ops->release_device. Put the domain
* in group->default_domain so it is freed after.
*/
ret = __iommu_group_set_domain_internal(
group, dom, IOMMU_SET_DOMAIN_MUST_SUCCEED);
if (WARN_ON(ret))
goto out_free;
} else {
ret = __iommu_group_set_domain(group, dom);
if (ret) {
iommu_domain_free(dom);
group->default_domain = old_dom;
return ret;
}
}
free_new_domain:
iommu_domain_free(group->default_domain);
restore_old_domain:
group->default_domain = prev_dom;
group->domain = prev_dom;
/*
* Drivers are supposed to allow mappings to be installed in a domain
* before device attachment, but some don't. Hack around this defect by
* trying again after attaching. If this happens it means the device
* will not continuously have the IOMMU_RESV_DIRECT map.
*/
if (direct_failed) {
for_each_group_device(group, gdev) {
ret = iommu_create_device_direct_mappings(dom, gdev->dev);
if (ret)
goto err_restore;
}
}
err_restore:
if (old_dom) {
__iommu_group_set_domain_internal(
group, old_dom, IOMMU_SET_DOMAIN_MUST_SUCCEED);
iommu_domain_free(dom);
old_dom = NULL;
}
out_free:
if (old_dom)
iommu_domain_free(old_dom);
return ret;
}
......@@ -2933,8 +2940,7 @@ static int iommu_change_dev_def_domain(struct iommu_group *group,
static ssize_t iommu_group_store_type(struct iommu_group *group,
const char *buf, size_t count)
{
struct group_device *grp_dev;
struct device *dev;
struct group_device *gdev;
int ret, req_type;
if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
......@@ -2959,23 +2965,23 @@ static ssize_t iommu_group_store_type(struct iommu_group *group,
if (req_type == IOMMU_DOMAIN_DMA_FQ &&
group->default_domain->type == IOMMU_DOMAIN_DMA) {
ret = iommu_dma_init_fq(group->default_domain);
if (!ret)
group->default_domain->type = IOMMU_DOMAIN_DMA_FQ;
mutex_unlock(&group->mutex);
if (ret)
goto out_unlock;
return ret ?: count;
group->default_domain->type = IOMMU_DOMAIN_DMA_FQ;
ret = count;
goto out_unlock;
}
/* Otherwise, ensure that device exists and no driver is bound. */
if (list_empty(&group->devices) || group->owner_cnt) {
mutex_unlock(&group->mutex);
return -EPERM;
ret = -EPERM;
goto out_unlock;
}
grp_dev = list_first_entry(&group->devices, struct group_device, list);
dev = grp_dev->dev;
ret = iommu_change_dev_def_domain(group, dev, req_type);
ret = iommu_setup_default_domain(group, req_type);
if (ret)
goto out_unlock;
/*
* Release the mutex here because ops->probe_finalize() call-back of
......@@ -2986,9 +2992,12 @@ static ssize_t iommu_group_store_type(struct iommu_group *group,
mutex_unlock(&group->mutex);
/* Make sure dma_ops is appropriatley set */
if (!ret)
__iommu_group_dma_finalize(group);
for_each_group_device(group, gdev)
iommu_group_do_probe_finalize(gdev->dev);
return count;
out_unlock:
mutex_unlock(&group->mutex);
return ret ?: count;
}
......@@ -3182,16 +3191,13 @@ EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner);
static void __iommu_release_dma_ownership(struct iommu_group *group)
{
int ret;
if (WARN_ON(!group->owner_cnt || !group->owner ||
!xa_empty(&group->pasid_array)))
return;
group->owner_cnt = 0;
group->owner = NULL;
ret = __iommu_group_set_domain(group, group->default_domain);
WARN(ret, "iommu driver failed to attach the default domain");
__iommu_group_set_domain_nofail(group, group->default_domain);
}
/**
......@@ -3253,7 +3259,7 @@ static int __iommu_set_group_pasid(struct iommu_domain *domain,
struct group_device *device;
int ret = 0;
list_for_each_entry(device, &group->devices, list) {
for_each_group_device(group, device) {
ret = domain->ops->set_dev_pasid(domain, device->dev, pasid);
if (ret)
break;
......@@ -3268,7 +3274,7 @@ static void __iommu_remove_group_pasid(struct iommu_group *group,
struct group_device *device;
const struct iommu_ops *ops;
list_for_each_entry(device, &group->devices, list) {
for_each_group_device(group, device) {
ops = dev_iommu_ops(device->dev);
ops->remove_dev_pasid(device->dev, pasid);
}
......
......@@ -647,7 +647,13 @@ struct iova_rcache {
static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
{
return kzalloc(sizeof(struct iova_magazine), flags);
struct iova_magazine *mag;
mag = kmalloc(sizeof(*mag), flags);
if (mag)
mag->size = 0;
return mag;
}
static void iova_magazine_free(struct iova_magazine *mag)
......
......@@ -788,6 +788,29 @@ static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev)
return 0;
}
static void viommu_detach_dev(struct viommu_endpoint *vdev)
{
int i;
struct virtio_iommu_req_detach req;
struct viommu_domain *vdomain = vdev->vdomain;
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(vdev->dev);
if (!vdomain)
return;
req = (struct virtio_iommu_req_detach) {
.head.type = VIRTIO_IOMMU_T_DETACH,
.domain = cpu_to_le32(vdomain->id),
};
for (i = 0; i < fwspec->num_ids; i++) {
req.endpoint = cpu_to_le32(fwspec->ids[i]);
WARN_ON(viommu_send_req_sync(vdev->viommu, &req, sizeof(req)));
}
vdomain->nr_endpoints--;
vdev->vdomain = NULL;
}
static int viommu_map_pages(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t pgsize, size_t pgcount,
int prot, gfp_t gfp, size_t *mapped)
......@@ -810,25 +833,26 @@ static int viommu_map_pages(struct iommu_domain *domain, unsigned long iova,
if (ret)
return ret;
map = (struct virtio_iommu_req_map) {
.head.type = VIRTIO_IOMMU_T_MAP,
.domain = cpu_to_le32(vdomain->id),
.virt_start = cpu_to_le64(iova),
.phys_start = cpu_to_le64(paddr),
.virt_end = cpu_to_le64(end),
.flags = cpu_to_le32(flags),
};
if (!vdomain->nr_endpoints)
return 0;
if (vdomain->nr_endpoints) {
map = (struct virtio_iommu_req_map) {
.head.type = VIRTIO_IOMMU_T_MAP,
.domain = cpu_to_le32(vdomain->id),
.virt_start = cpu_to_le64(iova),
.phys_start = cpu_to_le64(paddr),
.virt_end = cpu_to_le64(end),
.flags = cpu_to_le32(flags),
};
ret = viommu_send_req_sync(vdomain->viommu, &map, sizeof(map));
if (ret)
viommu_del_mappings(vdomain, iova, end);
else if (mapped)
ret = viommu_send_req_sync(vdomain->viommu, &map, sizeof(map));
if (ret) {
viommu_del_mappings(vdomain, iova, end);
return ret;
}
}
if (mapped)
*mapped = size;
return ret;
return 0;
}
static size_t viommu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
......@@ -990,6 +1014,7 @@ static void viommu_release_device(struct device *dev)
{
struct viommu_endpoint *vdev = dev_iommu_priv_get(dev);
viommu_detach_dev(vdev);
iommu_put_resv_regions(dev, &vdev->resv_regions);
kfree(vdev);
}
......
......@@ -65,6 +65,7 @@ struct iommu_domain_geometry {
#define __IOMMU_DOMAIN_SVA (1U << 4) /* Shared process address space */
#define IOMMU_DOMAIN_ALLOC_FLAGS ~__IOMMU_DOMAIN_DMA_FQ
/*
* This are the possible domain-types
*
......@@ -127,6 +128,11 @@ enum iommu_cap {
* this device.
*/
IOMMU_CAP_ENFORCE_CACHE_COHERENCY,
/*
* IOMMU driver does not issue TLB maintenance during .unmap, so can
* usefully support the non-strict DMA flush queue.
*/
IOMMU_CAP_DEFERRED_FLUSH,
};
/* These are the possible reserved region types */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment