Merge tag 'iommu-updates-v6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu

Pull iommu updates from Joerg Roedel: "Core changes: - iova_magazine_alloc() optimization - Make flush-queue an IOMMU driver capability - Consolidate the error handling around device attachment AMD IOMMU changes: - AVIC Interrupt Remapping Improvements - Some minor fixes and cleanups Intel VT-d changes from Lu Baolu: - Small and misc cleanups ARM-SMMU changes from Will Deacon: - Device-tree binding updates: - Add missing clocks for SC8280XP and SA8775 Adreno SMMUs - Add two new Qualcomm SMMUs in SDX75 and SM6375 - Workarounds for Arm MMU-700 errata: - 1076982: Avoid use of SEV-based cmdq wakeup - 2812531: Terminate command batches with a CMD_SYNC - Enforce single-stage translation to avoid nesting-related errata - Set the correct level hint for range TLB invalidation on teardown .. and some other minor fixes and cleanups (including Freescale PAMU and virtio-iommu changes)" * tag 'iommu-updates-v6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (50 commits) iommu/vt-d: Remove commented-out code iommu/vt-d: Remove two WARN_ON in domain_context_mapping_one() iommu/vt-d: Handle the failure case of dmar_reenable_qi() iommu/vt-d: Remove unnecessary (void*) conversions iommu/amd: Remove extern from function prototypes iommu/amd: Use BIT/BIT_ULL macro to define bit fields iommu/amd: Fix DTE_IRQ_PHYS_ADDR_MASK macro iommu/amd: Fix compile error for unused function iommu/amd: Improving Interrupt Remapping Table Invalidation iommu/amd: Do not Invalidate IRT when IRTE caching is disabled iommu/amd: Introduce Disable IRTE Caching Support iommu/amd: Remove the unused struct amd_ir_data.ref iommu/amd: Switch amd_iommu_update_ga() to use modify_irte_ga() iommu/arm-smmu-v3: Set TTL invalidation hint better iommu/arm-smmu-v3: Document nesting-related errata iommu/arm-smmu-v3: Add explicit feature for nesting iommu/arm-smmu-v3: Document MMU-700 erratum 2812531 iommu/arm-smmu-v3: Work around MMU-600 erratum 1076982 dt-bindings: arm-smmu: Add SDX75 SMMU compatible dt-bindings: arm-smmu: Add SM6375 GPU SMMU ...

Merge tag 'iommu-updates-v6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
Pull iommu updates from Joerg Roedel: "Core changes: - iova_magazine_alloc() optimization - Make flush-queue an IOMMU driver capability - Consolidate the error handling around device attachment AMD IOMMU changes: - AVIC Interrupt Remapping Improvements - Some minor fixes and cleanups Intel VT-d changes from Lu Baolu: - Small and misc cleanups ARM-SMMU changes from Will Deacon: - Device-tree binding updates: - Add missing clocks for SC8280XP and SA8775 Adreno SMMUs - Add two new Qualcomm SMMUs in SDX75 and SM6375 - Workarounds for Arm MMU-700 errata: - 1076982: Avoid use of SEV-based cmdq wakeup - 2812531: Terminate command batches with a CMD_SYNC - Enforce single-stage translation to avoid nesting-related errata - Set the correct level hint for range TLB invalidation on teardown .. and some other minor fixes and cleanups (including Freescale PAMU and virtio-iommu changes)" * tag 'iommu-updates-v6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (50 commits) iommu/vt-d: Remove commented-out code iommu/vt-d: Remove two WARN_ON in domain_context_mapping_one() iommu/vt-d: Handle the failure case of dmar_reenable_qi() iommu/vt-d: Remove unnecessary (void*) conversions iommu/amd: Remove extern from function prototypes iommu/amd: Use BIT/BIT_ULL macro to define bit fields iommu/amd: Fix DTE_IRQ_PHYS_ADDR_MASK macro iommu/amd: Fix compile error for unused function iommu/amd: Improving Interrupt Remapping Table Invalidation iommu/amd: Do not Invalidate IRT when IRTE caching is disabled iommu/amd: Introduce Disable IRTE Caching Support iommu/amd: Remove the unused struct amd_ir_data.ref iommu/amd: Switch amd_iommu_update_ga() to use modify_irte_ga() iommu/arm-smmu-v3: Set TTL invalidation hint better iommu/arm-smmu-v3: Document nesting-related errata iommu/arm-smmu-v3: Add explicit feature for nesting iommu/arm-smmu-v3: Document MMU-700 erratum 2812531 iommu/arm-smmu-v3: Work around MMU-600 erratum 1076982 dt-bindings: arm-smmu: Add SDX75 SMMU compatible dt-bindings: arm-smmu: Add SM6375 GPU SMMU ...
d35ac6ac · Linus Torvalds · 0b26eadb · a7a33407 · d35ac6ac · d35ac6ac
Commit d35ac6ac authored Jun 29, 2023 by Linus Torvalds
21 changed files
--- a/.clang-format
+++ b/.clang-format
@@ -254,6 +254,7 @@ ForEachMacros:
  - 'for_each_free_mem_range'
  - 'for_each_free_mem_range_reverse'
  - 'for_each_func_rsrc'
+  - 'for_each_group_device'
  - 'for_each_group_evsel'
  - 'for_each_group_member'
  - 'for_each_hstate'

--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -323,6 +323,7 @@
 				       option with care.
 			pgtbl_v1     - Use v1 page table for DMA-API (Default).
 			pgtbl_v2     - Use v2 page table for DMA-API.
+			irtcachedis  - Disable Interrupt Remapping Table (IRT) caching.

 	amd_iommu_dump=	[HW,X86-64]
 			Enable AMD IOMMU driver option to dump the ACPI table

--- a/Documentation/arch/arm64/silicon-errata.rst
+++ b/Documentation/arch/arm64/silicon-errata.rst
@@ -140,6 +140,10 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | MMU-500         | #841119,826419  | N/A                         |
 +----------------+-----------------+-----------------+-----------------------------+
+| ARM            | MMU-600         | #1076982,1209401| N/A                         |
+----------------+-----------------+-----------------+-----------------------------+
+| ARM            | MMU-700         | #2268618,2812531| N/A                         |
+----------------+-----------------+-----------------+-----------------------------+
 +----------------+-----------------+-----------------+-----------------------------+
 | Broadcom       | Brahma-B53      | N/A             | ARM64_ERRATUM_845719        |
 +----------------+-----------------+-----------------+-----------------------------+

--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -29,6 +29,7 @@ properties:
              - qcom,msm8996-smmu-v2
              - qcom,msm8998-smmu-v2
              - qcom,sdm630-smmu-v2
+              - qcom,sm6375-smmu-v2
          - const: qcom,smmu-v2

      - description: Qcom SoCs implementing "qcom,smmu-500" and "arm,mmu-500"
@@ -45,6 +46,7 @@ properties:
              - qcom,sdm845-smmu-500
              - qcom,sdx55-smmu-500
              - qcom,sdx65-smmu-500
+              - qcom,sdx75-smmu-500
              - qcom,sm6115-smmu-500
              - qcom,sm6125-smmu-500
              - qcom,sm6350-smmu-500
@@ -79,7 +81,9 @@ properties:
      - description: Qcom Adreno GPUs implementing "qcom,smmu-500" and "arm,mmu-500"
        items:
          - enum:
+              - qcom,sa8775p-smmu-500
              - qcom,sc7280-smmu-500
+              - qcom,sc8280xp-smmu-500
              - qcom,sm6115-smmu-500
              - qcom,sm6125-smmu-500
              - qcom,sm8150-smmu-500
@@ -267,6 +271,7 @@ allOf:
            enum:
              - qcom,msm8998-smmu-v2
              - qcom,sdm630-smmu-v2
+              - qcom,sm6375-smmu-v2
    then:
      anyOf:
        - properties:
@@ -331,7 +336,10 @@ allOf:
      properties:
        compatible:
          contains:
-            const: qcom,sc7280-smmu-500
+            enum:
+              - qcom,sa8775p-smmu-500
+              - qcom,sc7280-smmu-500
+              - qcom,sc8280xp-smmu-500
    then:
      properties:
        clock-names:
@@ -413,10 +421,8 @@ allOf:
              - nvidia,smmu-500
              - qcom,qcm2290-smmu-500
              - qcom,qdu1000-smmu-500
-              - qcom,sa8775p-smmu-500
              - qcom,sc7180-smmu-500
              - qcom,sc8180x-smmu-500
-              - qcom,sc8280xp-smmu-500
              - qcom,sdm670-smmu-500
              - qcom,sdm845-smmu-500
              - qcom,sdx55-smmu-500

--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -1353,6 +1353,7 @@ static struct platform_driver fsl_pci_driver = {
 		.of_match_table = pci_ids,
 	},
 	.probe = fsl_pci_probe,
+	.driver_managed_dma = true,
 };

 static int __init fsl_pci_init(void)

--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -11,12 +11,15 @@

 #include "amd_iommu_types.h"

-extern irqreturn_t amd_iommu_int_thread(int irq, void *data);
-extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
-extern void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid);
-extern void amd_iommu_restart_event_logging(struct amd_iommu *iommu);
-extern void amd_iommu_restart_ga_log(struct amd_iommu *iommu);
-extern void amd_iommu_set_rlookup_table(struct amd_iommu *iommu, u16 devid);
+irqreturn_t amd_iommu_int_thread(int irq, void *data);
+irqreturn_t amd_iommu_int_handler(int irq, void *data);
+void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid);
+void amd_iommu_restart_event_logging(struct amd_iommu *iommu);
+void amd_iommu_restart_ga_log(struct amd_iommu *iommu);
+int amd_iommu_init_devices(void);
+void amd_iommu_uninit_devices(void);
+void amd_iommu_init_notifier(void);
+void amd_iommu_set_rlookup_table(struct amd_iommu *iommu, u16 devid);

 #ifdef CONFIG_AMD_IOMMU_DEBUGFS
 void amd_iommu_debugfs_setup(struct amd_iommu *iommu);
@@ -25,11 +28,11 @@ static inline void amd_iommu_debugfs_setup(struct amd_iommu *iommu) {}
 #endif

 /* Needed for interrupt remapping */
-extern int amd_iommu_prepare(void);
-extern int amd_iommu_enable(void);
-extern void amd_iommu_disable(void);
-extern int amd_iommu_reenable(int);
-extern int amd_iommu_enable_faulting(void);
+int amd_iommu_prepare(void);
+int amd_iommu_enable(void);
+void amd_iommu_disable(void);
+int amd_iommu_reenable(int mode);
+int amd_iommu_enable_faulting(void);
 extern int amd_iommu_guest_ir;
 extern enum io_pgtable_fmt amd_iommu_pgtable;
 extern int amd_iommu_gpt_level;
@@ -37,33 +40,32 @@ extern int amd_iommu_gpt_level;
 /* IOMMUv2 specific functions */
 struct iommu_domain;

-extern bool amd_iommu_v2_supported(void);
-extern struct amd_iommu *get_amd_iommu(unsigned int idx);
-extern u8 amd_iommu_pc_get_max_banks(unsigned int idx);
-extern bool amd_iommu_pc_supported(void);
-extern u8 amd_iommu_pc_get_max_counters(unsigned int idx);
-extern int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
-				u8 fxn, u64 *value);
-extern int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
-				u8 fxn, u64 *value);
-
-extern int amd_iommu_register_ppr_notifier(struct notifier_block *nb);
-extern int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb);
-extern void amd_iommu_domain_direct_map(struct iommu_domain *dom);
-extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids);
-extern int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid,
-				u64 address);
-extern void amd_iommu_update_and_flush_device_table(struct protection_domain *domain);
-extern void amd_iommu_domain_update(struct protection_domain *domain);
-extern void amd_iommu_domain_flush_complete(struct protection_domain *domain);
-extern void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain);
-extern int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid);
-extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid,
-				     unsigned long cr3);
-extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid);
+bool amd_iommu_v2_supported(void);
+struct amd_iommu *get_amd_iommu(unsigned int idx);
+u8 amd_iommu_pc_get_max_banks(unsigned int idx);
+bool amd_iommu_pc_supported(void);
+u8 amd_iommu_pc_get_max_counters(unsigned int idx);
+int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
+			 u8 fxn, u64 *value);
+int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
+			 u8 fxn, u64 *value);
+
+int amd_iommu_register_ppr_notifier(struct notifier_block *nb);
+int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb);
+void amd_iommu_domain_direct_map(struct iommu_domain *dom);
+int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids);
+int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, u64 address);
+void amd_iommu_update_and_flush_device_table(struct protection_domain *domain);
+void amd_iommu_domain_update(struct protection_domain *domain);
+void amd_iommu_domain_flush_complete(struct protection_domain *domain);
+void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain);
+int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid);
+int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid,
+			      unsigned long cr3);
+int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid);

 #ifdef CONFIG_IRQ_REMAP
-extern int amd_iommu_create_irq_domain(struct amd_iommu *iommu);
+int amd_iommu_create_irq_domain(struct amd_iommu *iommu);
 #else
 static inline int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
 {
@@ -75,8 +77,8 @@ static inline int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
 #define PPR_INVALID			0x1
 #define PPR_FAILURE			0xf

-extern int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid,
-				  int status, int tag);
+int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid,
+			   int status, int tag);

 static inline bool is_rd890_iommu(struct pci_dev *pdev)
 {
@@ -129,10 +131,9 @@ static inline void *alloc_pgtable_page(int nid, gfp_t gfp)
 	return page ? page_address(page) : NULL;
 }

-extern bool translation_pre_enabled(struct amd_iommu *iommu);
-extern bool amd_iommu_is_attach_deferred(struct device *dev);
-extern int __init add_special_device(u8 type, u8 id, u32 *devid,
-				     bool cmd_line);
+bool translation_pre_enabled(struct amd_iommu *iommu);
+bool amd_iommu_is_attach_deferred(struct device *dev);
+int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line);

 #ifdef CONFIG_DMI
 void amd_iommu_apply_ivrs_quirks(void);
@@ -140,9 +141,9 @@ void amd_iommu_apply_ivrs_quirks(void);
 static inline void amd_iommu_apply_ivrs_quirks(void) { }
 #endif

-extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
-					 u64 *root, int mode);
-extern struct dev_table_entry *get_dev_table(struct amd_iommu *iommu);
+void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
+				  u64 *root, int mode);
+struct dev_table_entry *get_dev_table(struct amd_iommu *iommu);

 extern u64 amd_iommu_efr;
 extern u64 amd_iommu_efr2;

--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -84,21 +84,21 @@


 /* Extended Feature Bits */
-#define FEATURE_PREFETCH	(1ULL<<0)
-#define FEATURE_PPR		(1ULL<<1)
-#define FEATURE_X2APIC		(1ULL<<2)
-#define FEATURE_NX		(1ULL<<3)
-#define FEATURE_GT		(1ULL<<4)
-#define FEATURE_IA		(1ULL<<6)
-#define FEATURE_GA		(1ULL<<7)
-#define FEATURE_HE		(1ULL<<8)
-#define FEATURE_PC		(1ULL<<9)
+#define FEATURE_PREFETCH	BIT_ULL(0)
+#define FEATURE_PPR		BIT_ULL(1)
+#define FEATURE_X2APIC		BIT_ULL(2)
+#define FEATURE_NX		BIT_ULL(3)
+#define FEATURE_GT		BIT_ULL(4)
+#define FEATURE_IA		BIT_ULL(6)
+#define FEATURE_GA		BIT_ULL(7)
+#define FEATURE_HE		BIT_ULL(8)
+#define FEATURE_PC		BIT_ULL(9)
 #define FEATURE_GATS_SHIFT	(12)
 #define FEATURE_GATS_MASK	(3ULL)
-#define FEATURE_GAM_VAPIC	(1ULL<<21)
-#define FEATURE_GIOSUP		(1ULL<<48)
-#define FEATURE_EPHSUP		(1ULL<<50)
-#define FEATURE_SNP		(1ULL<<63)
+#define FEATURE_GAM_VAPIC	BIT_ULL(21)
+#define FEATURE_GIOSUP		BIT_ULL(48)
+#define FEATURE_EPHSUP		BIT_ULL(50)
+#define FEATURE_SNP		BIT_ULL(63)

 #define FEATURE_PASID_SHIFT	32
 #define FEATURE_PASID_MASK	(0x1fULL << FEATURE_PASID_SHIFT)
@@ -120,13 +120,13 @@
 #define PASID_MASK		0x0000ffff

 /* MMIO status bits */
-#define MMIO_STATUS_EVT_OVERFLOW_INT_MASK	(1 << 0)
-#define MMIO_STATUS_EVT_INT_MASK	(1 << 1)
-#define MMIO_STATUS_COM_WAIT_INT_MASK	(1 << 2)
-#define MMIO_STATUS_PPR_INT_MASK	(1 << 6)
-#define MMIO_STATUS_GALOG_RUN_MASK	(1 << 8)
-#define MMIO_STATUS_GALOG_OVERFLOW_MASK	(1 << 9)
-#define MMIO_STATUS_GALOG_INT_MASK	(1 << 10)
+#define MMIO_STATUS_EVT_OVERFLOW_INT_MASK	BIT(0)
+#define MMIO_STATUS_EVT_INT_MASK		BIT(1)
+#define MMIO_STATUS_COM_WAIT_INT_MASK		BIT(2)
+#define MMIO_STATUS_PPR_INT_MASK		BIT(6)
+#define MMIO_STATUS_GALOG_RUN_MASK		BIT(8)
+#define MMIO_STATUS_GALOG_OVERFLOW_MASK		BIT(9)
+#define MMIO_STATUS_GALOG_INT_MASK		BIT(10)

 /* event logging constants */
 #define EVENT_ENTRY_SIZE	0x10
@@ -174,6 +174,7 @@
 #define CONTROL_GAINT_EN	29
 #define CONTROL_XT_EN		50
 #define CONTROL_INTCAPXT_EN	51
+#define CONTROL_IRTCACHEDIS	59
 #define CONTROL_SNPAVIC_EN	61

 #define CTRL_INV_TO_MASK	(7 << CONTROL_INV_TIMEOUT)
@@ -283,7 +284,7 @@
 #define AMD_IOMMU_PGSIZES_V2	(PAGE_SIZE | (1ULL << 21) | (1ULL << 30))

 /* Bit value definition for dte irq remapping fields*/
-#define DTE_IRQ_PHYS_ADDR_MASK	(((1ULL << 45)-1) << 6)
+#define DTE_IRQ_PHYS_ADDR_MASK		GENMASK_ULL(51, 6)
 #define DTE_IRQ_REMAP_INTCTL_MASK	(0x3ULL << 60)
 #define DTE_IRQ_REMAP_INTCTL    (2ULL << 60)
 #define DTE_IRQ_REMAP_ENABLE    1ULL
@@ -369,23 +370,23 @@
 /*
 * Bit value definition for I/O PTE fields
 */
-#define IOMMU_PTE_PR (1ULL << 0)
-#define IOMMU_PTE_U  (1ULL << 59)
-#define IOMMU_PTE_FC (1ULL << 60)
-#define IOMMU_PTE_IR (1ULL << 61)
-#define IOMMU_PTE_IW (1ULL << 62)
+#define IOMMU_PTE_PR	BIT_ULL(0)
+#define IOMMU_PTE_U	BIT_ULL(59)
+#define IOMMU_PTE_FC	BIT_ULL(60)
+#define IOMMU_PTE_IR	BIT_ULL(61)
+#define IOMMU_PTE_IW	BIT_ULL(62)

 /*
 * Bit value definition for DTE fields
 */
-#define DTE_FLAG_V  (1ULL << 0)
-#define DTE_FLAG_TV (1ULL << 1)
-#define DTE_FLAG_IR (1ULL << 61)
-#define DTE_FLAG_IW (1ULL << 62)
-
-#define DTE_FLAG_IOTLB	(1ULL << 32)
-#define DTE_FLAG_GIOV	(1ULL << 54)
-#define DTE_FLAG_GV	(1ULL << 55)
+#define DTE_FLAG_V	BIT_ULL(0)
+#define DTE_FLAG_TV	BIT_ULL(1)
+#define DTE_FLAG_IR	BIT_ULL(61)
+#define DTE_FLAG_IW	BIT_ULL(62)
+
+#define DTE_FLAG_IOTLB	BIT_ULL(32)
+#define DTE_FLAG_GIOV	BIT_ULL(54)
+#define DTE_FLAG_GV	BIT_ULL(55)
 #define DTE_FLAG_MASK	(0x3ffULL << 32)
 #define DTE_GLX_SHIFT	(56)
 #define DTE_GLX_MASK	(3)
@@ -439,13 +440,13 @@
 #define MAX_DOMAIN_ID 65536

 /* Protection domain flags */
-#define PD_DMA_OPS_MASK		(1UL << 0) /* domain used for dma_ops */
-#define PD_DEFAULT_MASK		(1UL << 1) /* domain is a default dma_ops
+#define PD_DMA_OPS_MASK		BIT(0) /* domain used for dma_ops */
+#define PD_DEFAULT_MASK		BIT(1) /* domain is a default dma_ops
 					      domain for an IOMMU */
-#define PD_PASSTHROUGH_MASK	(1UL << 2) /* domain has no page
+#define PD_PASSTHROUGH_MASK	BIT(2) /* domain has no page
 					      translation */
-#define PD_IOMMUV2_MASK		(1UL << 3) /* domain has gcr3 table */
-#define PD_GIOV_MASK		(1UL << 4) /* domain enable GIOV support */
+#define PD_IOMMUV2_MASK		BIT(3) /* domain has gcr3 table */
+#define PD_GIOV_MASK		BIT(4) /* domain enable GIOV support */

 extern bool amd_iommu_dump;
 #define DUMP_printk(format, arg...)				\
@@ -716,6 +717,9 @@ struct amd_iommu {
 	/* if one, we need to send a completion wait command */
 	bool need_sync;

+	/* true if disable irte caching */
+	bool irtcachedis_enabled;
+
 	/* Handle for IOMMU core code */
 	struct iommu_device iommu;

@@ -748,7 +752,7 @@ struct amd_iommu {

 	u32 flags;
 	volatile u64 *cmd_sem;
-	u64 cmd_sem_val;
+	atomic64_t cmd_sem_val;

 #ifdef CONFIG_AMD_IOMMU_DEBUGFS
 	/* DebugFS Info */
@@ -882,7 +886,7 @@ extern int amd_iommu_max_glx_val;
 * This function flushes all internal caches of
 * the IOMMU used by this driver.
 */
-extern void iommu_flush_all_caches(struct amd_iommu *iommu);
+void iommu_flush_all_caches(struct amd_iommu *iommu);

 static inline int get_ioapic_devid(int id)
 {
@@ -1006,7 +1010,6 @@ struct amd_ir_data {
 	struct irq_2_irte irq_2_irte;
 	struct msi_msg msi_entry;
 	void *entry;    /* Pointer to union irte or struct irte_ga */
-	void *ref;      /* Pointer to the actual irte */

 	/**
 	 * Store information for activate/de-activate

--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -162,6 +162,7 @@ static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
 static bool amd_iommu_detected;
 static bool amd_iommu_disabled __initdata;
 static bool amd_iommu_force_enable __initdata;
+static bool amd_iommu_irtcachedis;
 static int amd_iommu_target_ivhd_type;

 /* Global EFR and EFR2 registers */
@@ -484,6 +485,9 @@ static void iommu_disable(struct amd_iommu *iommu)

 	/* Disable IOMMU hardware itself */
 	iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
+
+	/* Clear IRTE cache disabling bit */
+	iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS);
 }

 /*
@@ -1753,7 +1757,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
 	iommu->pci_seg = pci_seg;

 	raw_spin_lock_init(&iommu->lock);
-	iommu->cmd_sem_val = 0;
+	atomic64_set(&iommu->cmd_sem_val, 0);

 	/* Add IOMMU to internal data structures */
 	list_add_tail(&iommu->list, &amd_iommu_list);
@@ -2710,6 +2714,33 @@ static void iommu_enable_ga(struct amd_iommu *iommu)
 #endif
 }

+static void iommu_disable_irtcachedis(struct amd_iommu *iommu)
+{
+	iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS);
+}
+
+static void iommu_enable_irtcachedis(struct amd_iommu *iommu)
+{
+	u64 ctrl;
+
+	if (!amd_iommu_irtcachedis)
+		return;
+
+	/*
+	 * Note:
+	 * The support for IRTCacheDis feature is dertermined by
+	 * checking if the bit is writable.
+	 */
+	iommu_feature_enable(iommu, CONTROL_IRTCACHEDIS);
+	ctrl = readq(iommu->mmio_base +  MMIO_CONTROL_OFFSET);
+	ctrl &= (1ULL << CONTROL_IRTCACHEDIS);
+	if (ctrl)
+		iommu->irtcachedis_enabled = true;
+	pr_info("iommu%d (%#06x) : IRT cache is %s\n",
+		iommu->index, iommu->devid,
+		iommu->irtcachedis_enabled ? "disabled" : "enabled");
+}
+
 static void early_enable_iommu(struct amd_iommu *iommu)
 {
 	iommu_disable(iommu);
@@ -2720,6 +2751,7 @@ static void early_enable_iommu(struct amd_iommu *iommu)
 	iommu_set_exclusion_range(iommu);
 	iommu_enable_ga(iommu);
 	iommu_enable_xt(iommu);
+	iommu_enable_irtcachedis(iommu);
 	iommu_enable(iommu);
 	iommu_flush_all_caches(iommu);
 }
@@ -2770,10 +2802,12 @@ static void early_enable_iommus(void)
 		for_each_iommu(iommu) {
 			iommu_disable_command_buffer(iommu);
 			iommu_disable_event_buffer(iommu);
+			iommu_disable_irtcachedis(iommu);
 			iommu_enable_command_buffer(iommu);
 			iommu_enable_event_buffer(iommu);
 			iommu_enable_ga(iommu);
 			iommu_enable_xt(iommu);
+			iommu_enable_irtcachedis(iommu);
 			iommu_set_device_table(iommu);
 			iommu_flush_all_caches(iommu);
 		}
@@ -3426,6 +3460,8 @@ static int __init parse_amd_iommu_options(char *str)
 			amd_iommu_pgtable = AMD_IOMMU_V1;
 		} else if (strncmp(str, "pgtbl_v2", 8) == 0) {
 			amd_iommu_pgtable = AMD_IOMMU_V2;
+		} else if (strncmp(str, "irtcachedis", 11) == 0) {
+			amd_iommu_irtcachedis = true;
 		} else {
 			pr_notice("Unknown option - '%s'\n", str);
 		}

--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -310,8 +310,8 @@ static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
 			return NULL;

 		/* Large PTE */
-		if (PM_PTE_LEVEL(*pte) == 7 ||
-		    PM_PTE_LEVEL(*pte) == 0)
+		if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL ||
+		    PM_PTE_LEVEL(*pte) == PAGE_MODE_NONE)
 			break;

 		/* No level skipping support yet */

--- a/drivers/iommu/amd/io_pgtable_v2.c
+++ b/drivers/iommu/amd/io_pgtable_v2.c
@@ -2,7 +2,7 @@
 /*
 * CPU-agnostic AMD IO page table v2 allocator.
 *
- * Copyright (C) 2022 Advanced Micro Devices, Inc.
+ * Copyright (C) 2022, 2023 Advanced Micro Devices, Inc.
 * Author: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
 * Author: Vasant Hegde <vasant.hegde@amd.com>
 */

--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1182,11 +1182,11 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
 	if (!iommu->need_sync)
 		return 0;

-	raw_spin_lock_irqsave(&iommu->lock, flags);
-
-	data = ++iommu->cmd_sem_val;
+	data = atomic64_add_return(1, &iommu->cmd_sem_val);
 	build_completion_wait(&cmd, iommu, data);

+	raw_spin_lock_irqsave(&iommu->lock, flags);
+
 	ret = __iommu_queue_command_sync(iommu, &cmd, false);
 	if (ret)
 		goto out_unlock;
@@ -1273,6 +1273,9 @@ static void amd_iommu_flush_irt_all(struct amd_iommu *iommu)
 	u32 devid;
 	u16 last_bdf = iommu->pci_seg->last_bdf;

+	if (iommu->irtcachedis_enabled)
+		return;
+
 	for (devid = 0; devid <= last_bdf; devid++)
 		iommu_flush_irt(iommu, devid);

@@ -2313,6 +2316,8 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap)
 		return amdr_ivrs_remap_support;
 	case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
 		return true;
+	case IOMMU_CAP_DEFERRED_FLUSH:
+		return true;
 	default:
 		break;
 	}
@@ -2822,6 +2827,32 @@ EXPORT_SYMBOL(amd_iommu_device_info);
 static struct irq_chip amd_ir_chip;
 static DEFINE_SPINLOCK(iommu_table_lock);

+static void iommu_flush_irt_and_complete(struct amd_iommu *iommu, u16 devid)
+{
+	int ret;
+	u64 data;
+	unsigned long flags;
+	struct iommu_cmd cmd, cmd2;
+
+	if (iommu->irtcachedis_enabled)
+		return;
+
+	build_inv_irt(&cmd, devid);
+	data = atomic64_add_return(1, &iommu->cmd_sem_val);
+	build_completion_wait(&cmd2, iommu, data);
+
+	raw_spin_lock_irqsave(&iommu->lock, flags);
+	ret = __iommu_queue_command_sync(iommu, &cmd, true);
+	if (ret)
+		goto out;
+	ret = __iommu_queue_command_sync(iommu, &cmd2, false);
+	if (ret)
+		goto out;
+	wait_on_sem(iommu, data);
+out:
+	raw_spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
 static void set_dte_irq_entry(struct amd_iommu *iommu, u16 devid,
 			      struct irq_remap_table *table)
 {
@@ -3021,7 +3052,7 @@ static int alloc_irq_index(struct amd_iommu *iommu, u16 devid, int count,
 }

 static int modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index,
-			  struct irte_ga *irte, struct amd_ir_data *data)
+			  struct irte_ga *irte)
 {
 	struct irq_remap_table *table;
 	struct irte_ga *entry;
@@ -3046,13 +3077,9 @@ static int modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index,
 	old = entry->irte;
 	WARN_ON(!try_cmpxchg128(&entry->irte, &old, irte->irte));

-	if (data)
-		data->ref = entry;
-
 	raw_spin_unlock_irqrestore(&table->lock, flags);

-	iommu_flush_irt(iommu, devid);
-	iommu_completion_wait(iommu);
+	iommu_flush_irt_and_complete(iommu, devid);

 	return 0;
 }
@@ -3071,8 +3098,7 @@ static int modify_irte(struct amd_iommu *iommu,
 	table->table[index] = irte->val;
 	raw_spin_unlock_irqrestore(&table->lock, flags);

-	iommu_flush_irt(iommu, devid);
-	iommu_completion_wait(iommu);
+	iommu_flush_irt_and_complete(iommu, devid);

 	return 0;
 }
@@ -3090,8 +3116,7 @@ static void free_irte(struct amd_iommu *iommu, u16 devid, int index)
 	iommu->irte_ops->clear_allocated(table, index);
 	raw_spin_unlock_irqrestore(&table->lock, flags);

-	iommu_flush_irt(iommu, devid);
-	iommu_completion_wait(iommu);
+	iommu_flush_irt_and_complete(iommu, devid);
 }

 static void irte_prepare(void *entry,
@@ -3137,7 +3162,7 @@ static void irte_ga_activate(struct amd_iommu *iommu, void *entry, u16 devid, u1
 	struct irte_ga *irte = (struct irte_ga *) entry;

 	irte->lo.fields_remap.valid = 1;
-	modify_irte_ga(iommu, devid, index, irte, NULL);
+	modify_irte_ga(iommu, devid, index, irte);
 }

 static void irte_deactivate(struct amd_iommu *iommu, void *entry, u16 devid, u16 index)
@@ -3153,7 +3178,7 @@ static void irte_ga_deactivate(struct amd_iommu *iommu, void *entry, u16 devid,
 	struct irte_ga *irte = (struct irte_ga *) entry;

 	irte->lo.fields_remap.valid = 0;
-	modify_irte_ga(iommu, devid, index, irte, NULL);
+	modify_irte_ga(iommu, devid, index, irte);
 }

 static void irte_set_affinity(struct amd_iommu *iommu, void *entry, u16 devid, u16 index,
@@ -3177,7 +3202,7 @@ static void irte_ga_set_affinity(struct amd_iommu *iommu, void *entry, u16 devid
 					APICID_TO_IRTE_DEST_LO(dest_apicid);
 		irte->hi.fields.destination =
 					APICID_TO_IRTE_DEST_HI(dest_apicid);
-		modify_irte_ga(iommu, devid, index, irte, NULL);
+		modify_irte_ga(iommu, devid, index, irte);
 	}
 }

@@ -3527,7 +3552,7 @@ int amd_iommu_activate_guest_mode(void *data)
 	entry->lo.fields_vapic.ga_tag      = ir_data->ga_tag;

 	return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid,
-			      ir_data->irq_2_irte.index, entry, ir_data);
+			      ir_data->irq_2_irte.index, entry);
 }
 EXPORT_SYMBOL(amd_iommu_activate_guest_mode);

@@ -3557,7 +3582,7 @@ int amd_iommu_deactivate_guest_mode(void *data)
 				APICID_TO_IRTE_DEST_HI(cfg->dest_apicid);

 	return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid,
-			      ir_data->irq_2_irte.index, entry, ir_data);
+			      ir_data->irq_2_irte.index, entry);
 }
 EXPORT_SYMBOL(amd_iommu_deactivate_guest_mode);

@@ -3719,44 +3744,26 @@ int amd_iommu_create_irq_domain(struct amd_iommu *iommu)

 int amd_iommu_update_ga(int cpu, bool is_run, void *data)
 {
-	unsigned long flags;
-	struct amd_iommu *iommu;
-	struct irq_remap_table *table;
 	struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
-	int devid = ir_data->irq_2_irte.devid;
 	struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
-	struct irte_ga *ref = (struct irte_ga *) ir_data->ref;

 	if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) ||
-	    !ref || !entry || !entry->lo.fields_vapic.guest_mode)
+	    !entry || !entry->lo.fields_vapic.guest_mode)
 		return 0;

-	iommu = ir_data->iommu;
-	if (!iommu)
+	if (!ir_data->iommu)
 		return -ENODEV;

-	table = get_irq_table(iommu, devid);
-	if (!table)
-		return -ENODEV;
-
-	raw_spin_lock_irqsave(&table->lock, flags);
-
-	if (ref->lo.fields_vapic.guest_mode) {
-		if (cpu >= 0) {
-			ref->lo.fields_vapic.destination =
-						APICID_TO_IRTE_DEST_LO(cpu);
-			ref->hi.fields.destination =
-						APICID_TO_IRTE_DEST_HI(cpu);
-		}
-		ref->lo.fields_vapic.is_run = is_run;
-		barrier();
+	if (cpu >= 0) {
+		entry->lo.fields_vapic.destination =
+					APICID_TO_IRTE_DEST_LO(cpu);
+		entry->hi.fields.destination =
+					APICID_TO_IRTE_DEST_HI(cpu);
 	}
+	entry->lo.fields_vapic.is_run = is_run;

-	raw_spin_unlock_irqrestore(&table->lock, flags);
-
-	iommu_flush_irt(iommu, devid);
-	iommu_completion_wait(iommu);
-	return 0;
+	return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid,
+			      ir_data->irq_2_irte.index, entry);
 }
 EXPORT_SYMBOL(amd_iommu_update_ga);
 #endif
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -894,6 +894,12 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
 {
 	int index;

+	if (cmds->num == CMDQ_BATCH_ENTRIES - 1 &&
+	    (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) {
+		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
+		cmds->num = 0;
+	}
+
 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
 		cmds->num = 0;
@@ -1892,8 +1898,13 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
 		cmd->tlbi.tg = (tg - 10) / 2;

-		/* Determine what level the granule is at */
-		cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
+		/*
+		 * Determine what level the granule is at. For non-leaf, io-pgtable
+		 * assumes .tlb_flush_walk can invalidate multiple levels at once,
+		 * so ignore the nominal last-level granule and leave TTL=0.
+		 */
+		if (cmd->tlbi.leaf)
+			cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));

 		num_pages = size >> tg;
 	}
@@ -2008,6 +2019,7 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
 		/* Assume that a coherent TCU implies coherent TBUs */
 		return master->smmu->features & ARM_SMMU_FEAT_COHERENCY;
 	case IOMMU_CAP_NOEXEC:
+	case IOMMU_CAP_DEFERRED_FLUSH:
 		return true;
 	default:
 		return false;
@@ -2023,7 +2035,6 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)

 	if (type != IOMMU_DOMAIN_UNMANAGED &&
 	    type != IOMMU_DOMAIN_DMA &&
-	    type != IOMMU_DOMAIN_DMA_FQ &&
 	    type != IOMMU_DOMAIN_IDENTITY)
 		return NULL;

@@ -3429,6 +3440,44 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
 	return 0;
 }

+#define IIDR_IMPLEMENTER_ARM		0x43b
+#define IIDR_PRODUCTID_ARM_MMU_600	0x483
+#define IIDR_PRODUCTID_ARM_MMU_700	0x487
+
+static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
+{
+	u32 reg;
+	unsigned int implementer, productid, variant, revision;
+
+	reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
+	implementer = FIELD_GET(IIDR_IMPLEMENTER, reg);
+	productid = FIELD_GET(IIDR_PRODUCTID, reg);
+	variant = FIELD_GET(IIDR_VARIANT, reg);
+	revision = FIELD_GET(IIDR_REVISION, reg);
+
+	switch (implementer) {
+	case IIDR_IMPLEMENTER_ARM:
+		switch (productid) {
+		case IIDR_PRODUCTID_ARM_MMU_600:
+			/* Arm erratum 1076982 */
+			if (variant == 0 && revision <= 2)
+				smmu->features &= ~ARM_SMMU_FEAT_SEV;
+			/* Arm erratum 1209401 */
+			if (variant < 2)
+				smmu->features &= ~ARM_SMMU_FEAT_NESTING;
+			break;
+		case IIDR_PRODUCTID_ARM_MMU_700:
+			/* Arm erratum 2812531 */
+			smmu->features &= ~ARM_SMMU_FEAT_BTM;
+			smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC;
+			/* Arm errata 2268618, 2812531 */
+			smmu->features &= ~ARM_SMMU_FEAT_NESTING;
+			break;
+		}
+		break;
+	}
+}
+
 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 {
 	u32 reg;
@@ -3635,6 +3684,12 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)

 	smmu->ias = max(smmu->ias, smmu->oas);

+	if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) &&
+	    (smmu->features & ARM_SMMU_FEAT_TRANS_S2))
+		smmu->features |= ARM_SMMU_FEAT_NESTING;
+
+	arm_smmu_device_iidr_probe(smmu);
+
 	if (arm_smmu_sva_supported(smmu))
 		smmu->features |= ARM_SMMU_FEAT_SVA;


--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -69,6 +69,12 @@
 #define IDR5_VAX			GENMASK(11, 10)
 #define IDR5_VAX_52_BIT			1

+#define ARM_SMMU_IIDR			0x18
+#define IIDR_PRODUCTID			GENMASK(31, 20)
+#define IIDR_VARIANT			GENMASK(19, 16)
+#define IIDR_REVISION			GENMASK(15, 12)
+#define IIDR_IMPLEMENTER		GENMASK(11, 0)
+
 #define ARM_SMMU_CR0			0x20
 #define CR0_ATSCHK			(1 << 4)
 #define CR0_CMDQEN			(1 << 3)
@@ -639,11 +645,13 @@ struct arm_smmu_device {
 #define ARM_SMMU_FEAT_BTM		(1 << 16)
 #define ARM_SMMU_FEAT_SVA		(1 << 17)
 #define ARM_SMMU_FEAT_E2H		(1 << 18)
+#define ARM_SMMU_FEAT_NESTING		(1 << 19)
 	u32				features;

 #define ARM_SMMU_OPT_SKIP_PREFETCH	(1 << 0)
 #define ARM_SMMU_OPT_PAGE0_REGS_ONLY	(1 << 1)
 #define ARM_SMMU_OPT_MSIPOLL		(1 << 2)
+#define ARM_SMMU_OPT_CMDQ_FORCE_SYNC	(1 << 3)
 	u32				options;

 	struct arm_smmu_cmdq		cmdq;

--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -856,8 +856,7 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
 	struct arm_smmu_domain *smmu_domain;

 	if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_IDENTITY) {
-		if (using_legacy_binding ||
-		    (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_DMA_FQ))
+		if (using_legacy_binding || type != IOMMU_DOMAIN_DMA)
 			return NULL;
 	}
 	/*
@@ -1325,6 +1324,7 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
 		return cfg->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK ||
 			device_get_dma_attr(dev) == DEV_DMA_COHERENT;
 	case IOMMU_CAP_NOEXEC:
+	case IOMMU_CAP_DEFERRED_FLUSH:
 		return true;
 	default:
 		return false;

--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -615,7 +615,8 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
 		goto done_unlock;

 	/* If the FQ fails we can simply fall back to strict mode */
-	if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain))
+	if (domain->type == IOMMU_DOMAIN_DMA_FQ &&
+	    (!device_iommu_capable(dev, IOMMU_CAP_DEFERRED_FLUSH) || iommu_dma_init_fq(domain)))
 		domain->type = IOMMU_DOMAIN_DMA;

 	ret = iova_reserve_iommu_regions(dev, domain);

--- a/drivers/iommu/fsl_pamu_domain.c
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -334,17 +334,6 @@ int fsl_pamu_configure_l1_stash(struct iommu_domain *domain, u32 cpu)
 	return ret;
 }

-static struct iommu_group *get_device_iommu_group(struct device *dev)
-{
-	struct iommu_group *group;
-
-	group = iommu_group_get(dev);
-	if (!group)
-		group = iommu_group_alloc();
-
-	return group;
-}
-
 static  bool check_pci_ctl_endpt_part(struct pci_controller *pci_ctl)
 {
 	u32 version;
@@ -356,94 +345,52 @@ static  bool check_pci_ctl_endpt_part(struct pci_controller *pci_ctl)
 	return version >= 0x204;
 }

-/* Get iommu group information from peer devices or devices on the parent bus */
-static struct iommu_group *get_shared_pci_device_group(struct pci_dev *pdev)
+static struct iommu_group *fsl_pamu_device_group(struct device *dev)
 {
-	struct pci_dev *tmp;
 	struct iommu_group *group;
-	struct pci_bus *bus = pdev->bus;
+	struct pci_dev *pdev;

 	/*
-	 * Traverese the pci bus device list to get
-	 * the shared iommu group.
+	 * For platform devices we allocate a separate group for each of the
+	 * devices.
 	 */
-	while (bus) {
-		list_for_each_entry(tmp, &bus->devices, bus_list) {
-			if (tmp == pdev)
-				continue;
-			group = iommu_group_get(&tmp->dev);
-			if (group)
-				return group;
-		}
+	if (!dev_is_pci(dev))
+		return generic_device_group(dev);

-		bus = bus->parent;
-	}
-
-	return NULL;
-}
-
-static struct iommu_group *get_pci_device_group(struct pci_dev *pdev)
-{
-	struct pci_controller *pci_ctl;
-	bool pci_endpt_partitioning;
-	struct iommu_group *group = NULL;
-
-	pci_ctl = pci_bus_to_host(pdev->bus);
-	pci_endpt_partitioning = check_pci_ctl_endpt_part(pci_ctl);
-	/* We can partition PCIe devices so assign device group to the device */
-	if (pci_endpt_partitioning) {
-		group = pci_device_group(&pdev->dev);
-
-		/*
-		 * PCIe controller is not a paritionable entity
-		 * free the controller device iommu_group.
-		 */
-		if (pci_ctl->parent->iommu_group)
-			iommu_group_remove_device(pci_ctl->parent);
-	} else {
-		/*
-		 * All devices connected to the controller will share the
-		 * PCI controllers device group. If this is the first
-		 * device to be probed for the pci controller, copy the
-		 * device group information from the PCI controller device
-		 * node and remove the PCI controller iommu group.
-		 * For subsequent devices, the iommu group information can
-		 * be obtained from sibling devices (i.e. from the bus_devices
-		 * link list).
-		 */
-		if (pci_ctl->parent->iommu_group) {
-			group = get_device_iommu_group(pci_ctl->parent);
-			iommu_group_remove_device(pci_ctl->parent);
-		} else {
-			group = get_shared_pci_device_group(pdev);
-		}
-	}
-
-	if (!group)
-		group = ERR_PTR(-ENODEV);
+	/*
+	 * We can partition PCIe devices so assign device group to the device
+	 */
+	pdev = to_pci_dev(dev);
+	if (check_pci_ctl_endpt_part(pci_bus_to_host(pdev->bus)))
+		return pci_device_group(&pdev->dev);

+	/*
+	 * All devices connected to the controller will share the same device
+	 * group.
+	 *
+	 * Due to ordering between fsl_pamu_init() and fsl_pci_init() it is
+	 * guaranteed that the pci_ctl->parent platform_device will have the
+	 * iommu driver bound and will already have a group set. So we just
+	 * re-use this group as the group for every device in the hose.
+	 */
+	group = iommu_group_get(pci_bus_to_host(pdev->bus)->parent);
+	if (WARN_ON(!group))
+		return ERR_PTR(-EINVAL);
 	return group;
 }

-static struct iommu_group *fsl_pamu_device_group(struct device *dev)
+static struct iommu_device *fsl_pamu_probe_device(struct device *dev)
 {
-	struct iommu_group *group = ERR_PTR(-ENODEV);
 	int len;

 	/*
-	 * For platform devices we allocate a separate group for
-	 * each of the devices.
+	 * uboot must fill the fsl,liodn for platform devices to be supported by
+	 * the iommu.
 	 */
-	if (dev_is_pci(dev))
-		group = get_pci_device_group(to_pci_dev(dev));
-	else if (of_get_property(dev->of_node, "fsl,liodn", &len))
-		group = get_device_iommu_group(dev);
-
-	return group;
-}
+	if (!dev_is_pci(dev) &&
+	    !of_get_property(dev->of_node, "fsl,liodn", &len))
+		return ERR_PTR(-ENODEV);

-static struct iommu_device *fsl_pamu_probe_device(struct device *dev)
-{
 	return &pamu_iommu;
 }


--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1185,7 +1185,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
 {
 	struct root_entry *root;

-	root = (struct root_entry *)alloc_pgtable_page(iommu->node, GFP_ATOMIC);
+	root = alloc_pgtable_page(iommu->node, GFP_ATOMIC);
 	if (!root) {
 		pr_err("Allocating root entry for %s failed\n",
 			iommu->name);
@@ -1312,15 +1312,7 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
 			iommu->name, type);
 		return;
 	}
-	/* Note: set drain read/write */
-#if 0
-	/*
-	 * This is probably to be super secure.. Looks like we can
-	 * ignore it without any impact.
-	 */
-	if (cap_read_drain(iommu->cap))
-		val |= DMA_TLB_READ_DRAIN;
-#endif
+
 	if (cap_write_drain(iommu->cap))
 		val |= DMA_TLB_WRITE_DRAIN;

@@ -1897,8 +1889,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
 	struct context_entry *context;
 	int ret;

-	WARN_ON(did == 0);
-
 	if (hw_pass_through && domain_type_is_si(domain))
 		translation = CONTEXT_TT_PASS_THROUGH;

@@ -1944,8 +1934,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
 	if (sm_supported(iommu)) {
 		unsigned long pds;

-		WARN_ON(!table);
-
 		/* Setup the PASID DIR pointer: */
 		pds = context_get_sm_pds(table);
 		context->lo = (u64)virt_to_phys(table->table) |
@@ -2967,10 +2955,15 @@ static int init_iommu_hw(void)
 {
 	struct dmar_drhd_unit *drhd;
 	struct intel_iommu *iommu = NULL;
+	int ret;

-	for_each_active_iommu(iommu, drhd)
-		if (iommu->qi)
-			dmar_reenable_qi(iommu);
+	for_each_active_iommu(iommu, drhd) {
+		if (iommu->qi) {
+			ret = dmar_reenable_qi(iommu);
+			if (ret)
+				return ret;
+		}
+	}

 	for_each_iommu(iommu, drhd) {
 		if (drhd->ignored) {
@@ -4064,7 +4057,6 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
 	case IOMMU_DOMAIN_BLOCKED:
 		return &blocking_domain;
 	case IOMMU_DOMAIN_DMA:
-	case IOMMU_DOMAIN_DMA_FQ:
 	case IOMMU_DOMAIN_UNMANAGED:
 		dmar_domain = alloc_domain(type);
 		if (!dmar_domain) {
@@ -4369,6 +4361,7 @@ static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap)

 	switch (cap) {
 	case IOMMU_CAP_CACHE_COHERENCY:
+	case IOMMU_CAP_DEFERRED_FLUSH:
 		return true;
 	case IOMMU_CAP_PRE_BOOT_PROTECTION:
 		return dmar_platform_optin();

--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -68,6 +68,10 @@ struct group_device {
 	char *name;
 };

+/* Iterate over each struct group_device in a struct iommu_group */
+#define for_each_group_device(group, pos) \
+	list_for_each_entry(pos, &(group)->devices, list)
+
 struct iommu_group_attribute {
 	struct attribute attr;
 	ssize_t (*show)(struct iommu_group *group, char *buf);
@@ -89,17 +93,39 @@ static const char * const iommu_group_resv_type_string[] = {
 static int iommu_bus_notifier(struct notifier_block *nb,
 			      unsigned long action, void *data);
 static void iommu_release_device(struct device *dev);
-static int iommu_alloc_default_domain(struct iommu_group *group,
-				      struct device *dev);
 static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus,
 						 unsigned type);
 static int __iommu_attach_device(struct iommu_domain *domain,
 				 struct device *dev);
 static int __iommu_attach_group(struct iommu_domain *domain,
 				struct iommu_group *group);
+
+enum {
+	IOMMU_SET_DOMAIN_MUST_SUCCEED = 1 << 0,
+};
+
+static int __iommu_device_set_domain(struct iommu_group *group,
+				     struct device *dev,
+				     struct iommu_domain *new_domain,
+				     unsigned int flags);
+static int __iommu_group_set_domain_internal(struct iommu_group *group,
+					     struct iommu_domain *new_domain,
+					     unsigned int flags);
 static int __iommu_group_set_domain(struct iommu_group *group,
-				    struct iommu_domain *new_domain);
-static int iommu_create_device_direct_mappings(struct iommu_group *group,
+				    struct iommu_domain *new_domain)
+{
+	return __iommu_group_set_domain_internal(group, new_domain, 0);
+}
+static void __iommu_group_set_domain_nofail(struct iommu_group *group,
+					    struct iommu_domain *new_domain)
+{
+	WARN_ON(__iommu_group_set_domain_internal(
+		group, new_domain, IOMMU_SET_DOMAIN_MUST_SUCCEED));
+}
+
+static int iommu_setup_default_domain(struct iommu_group *group,
+				      int target_type);
+static int iommu_create_device_direct_mappings(struct iommu_domain *domain,
 					       struct device *dev);
 static struct iommu_group *iommu_group_get_for_dev(struct device *dev);
 static ssize_t iommu_group_store_type(struct iommu_group *group,
@@ -176,16 +202,16 @@ static int __init iommu_subsys_init(void)
 	if (!iommu_default_passthrough() && !iommu_dma_strict)
 		iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ;

-	pr_info("Default domain type: %s %s\n",
+	pr_info("Default domain type: %s%s\n",
 		iommu_domain_type_str(iommu_def_domain_type),
 		(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ?
-			"(set via kernel command line)" : "");
+			" (set via kernel command line)" : "");

 	if (!iommu_default_passthrough())
-		pr_info("DMA domain TLB invalidation policy: %s mode %s\n",
+		pr_info("DMA domain TLB invalidation policy: %s mode%s\n",
 			iommu_dma_strict ? "strict" : "lazy",
 			(iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ?
-				"(set via kernel command line)" : "");
+				" (set via kernel command line)" : "");

 	nb = kcalloc(ARRAY_SIZE(iommu_buses), sizeof(*nb), GFP_KERNEL);
 	if (!nb)
@@ -343,6 +369,8 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list

 	dev->iommu->iommu_dev = iommu_dev;
 	dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev);
+	if (ops->is_attach_deferred)
+		dev->iommu->attach_deferred = ops->is_attach_deferred(dev);

 	group = iommu_group_get_for_dev(dev);
 	if (IS_ERR(group)) {
@@ -377,30 +405,6 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
 	return ret;
 }

-static bool iommu_is_attach_deferred(struct device *dev)
-{
-	const struct iommu_ops *ops = dev_iommu_ops(dev);
-
-	if (ops->is_attach_deferred)
-		return ops->is_attach_deferred(dev);
-
-	return false;
-}
-
-static int iommu_group_do_dma_first_attach(struct device *dev, void *data)
-{
-	struct iommu_domain *domain = data;
-
-	lockdep_assert_held(&dev->iommu_group->mutex);
-
-	if (iommu_is_attach_deferred(dev)) {
-		dev->iommu->attach_deferred = 1;
-		return 0;
-	}
-
-	return __iommu_attach_device(domain, dev);
-}
-
 int iommu_probe_device(struct device *dev)
 {
 	const struct iommu_ops *ops;
@@ -417,29 +421,20 @@ int iommu_probe_device(struct device *dev)
 		goto err_release;
 	}

-	/*
-	 * Try to allocate a default domain - needs support from the
-	 * IOMMU driver. There are still some drivers which don't
-	 * support default domains, so the return value is not yet
-	 * checked.
-	 */
 	mutex_lock(&group->mutex);
-	iommu_alloc_default_domain(group, dev);

-	/*
-	 * If device joined an existing group which has been claimed, don't
-	 * attach the default domain.
-	 */
-	if (group->default_domain && !group->owner) {
-		ret = iommu_group_do_dma_first_attach(dev, group->default_domain);
-		if (ret) {
-			mutex_unlock(&group->mutex);
-			iommu_group_put(group);
-			goto err_release;
-		}
-	}
+	if (group->default_domain)
+		iommu_create_device_direct_mappings(group->default_domain, dev);

-	iommu_create_device_direct_mappings(group, dev);
+	if (group->domain) {
+		ret = __iommu_device_set_domain(group, dev, group->domain, 0);
+		if (ret)
+			goto err_unlock;
+	} else if (!group->default_domain) {
+		ret = iommu_setup_default_domain(group, 0);
+		if (ret)
+			goto err_unlock;
+	}

 	mutex_unlock(&group->mutex);
 	iommu_group_put(group);
@@ -450,6 +445,9 @@ int iommu_probe_device(struct device *dev)

 	return 0;

+err_unlock:
+	mutex_unlock(&group->mutex);
+	iommu_group_put(group);
 err_release:
 	iommu_release_device(dev);

@@ -468,7 +466,7 @@ __iommu_group_remove_device(struct iommu_group *group, struct device *dev)
 	struct group_device *device;

 	lockdep_assert_held(&group->mutex);
-	list_for_each_entry(device, &group->devices, list) {
+	for_each_group_device(group, device) {
 		if (device->dev == dev) {
 			list_del(&device->list);
 			return device;
@@ -707,7 +705,7 @@ int iommu_get_group_resv_regions(struct iommu_group *group,
 	int ret = 0;

 	mutex_lock(&group->mutex);
-	list_for_each_entry(device, &group->devices, list) {
+	for_each_group_device(group, device) {
 		struct list_head dev_resv_regions;

 		/*
@@ -953,16 +951,15 @@ int iommu_group_set_name(struct iommu_group *group, const char *name)
 }
 EXPORT_SYMBOL_GPL(iommu_group_set_name);

-static int iommu_create_device_direct_mappings(struct iommu_group *group,
+static int iommu_create_device_direct_mappings(struct iommu_domain *domain,
 					       struct device *dev)
 {
-	struct iommu_domain *domain = group->default_domain;
 	struct iommu_resv_region *entry;
 	struct list_head mappings;
 	unsigned long pg_size;
 	int ret = 0;

-	if (!domain || !iommu_is_dma_domain(domain))
+	if (!iommu_is_dma_domain(domain))
 		return 0;

 	BUG_ON(!domain->pgsize_bitmap);
@@ -1069,25 +1066,13 @@ int iommu_group_add_device(struct iommu_group *group, struct device *dev)

 	mutex_lock(&group->mutex);
 	list_add_tail(&device->list, &group->devices);
-	if (group->domain)
-		ret = iommu_group_do_dma_first_attach(dev, group->domain);
 	mutex_unlock(&group->mutex);
-	if (ret)
-		goto err_put_group;
-
 	trace_add_device_to_group(group->id, dev);

 	dev_info(dev, "Adding to iommu group %d\n", group->id);

 	return 0;

-err_put_group:
-	mutex_lock(&group->mutex);
-	list_del(&device->list);
-	mutex_unlock(&group->mutex);
-	dev->iommu_group = NULL;
-	kobject_put(group->devices_kobj);
-	sysfs_remove_link(group->devices_kobj, device->name);
 err_free_name:
 	kfree(device->name);
 err_remove_link:
@@ -1125,31 +1110,6 @@ void iommu_group_remove_device(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(iommu_group_remove_device);

-static int iommu_group_device_count(struct iommu_group *group)
-{
-	struct group_device *entry;
-	int ret = 0;
-
-	list_for_each_entry(entry, &group->devices, list)
-		ret++;
-
-	return ret;
-}
-
-static int __iommu_group_for_each_dev(struct iommu_group *group, void *data,
-				      int (*fn)(struct device *, void *))
-{
-	struct group_device *device;
-	int ret = 0;
-
-	list_for_each_entry(device, &group->devices, list) {
-		ret = fn(device->dev, data);
-		if (ret)
-			break;
-	}
-	return ret;
-}
-
 /**
 * iommu_group_for_each_dev - iterate over each device in the group
 * @group: the group
@@ -1164,10 +1124,15 @@ static int __iommu_group_for_each_dev(struct iommu_group *group, void *data,
 int iommu_group_for_each_dev(struct iommu_group *group, void *data,
 			     int (*fn)(struct device *, void *))
 {
-	int ret;
+	struct group_device *device;
+	int ret = 0;

 	mutex_lock(&group->mutex);
-	ret = __iommu_group_for_each_dev(group, data, fn);
+	for_each_group_device(group, device) {
+		ret = fn(device->dev, data);
+		if (ret)
+			break;
+	}
 	mutex_unlock(&group->mutex);

 	return ret;
@@ -1656,40 +1621,47 @@ static int iommu_get_def_domain_type(struct device *dev)
 	return 0;
 }

-static int iommu_group_alloc_default_domain(const struct bus_type *bus,
-					    struct iommu_group *group,
-					    unsigned int type)
+static struct iommu_domain *
+__iommu_group_alloc_default_domain(const struct bus_type *bus,
+				   struct iommu_group *group, int req_type)
 {
-	struct iommu_domain *dom;
-
-	dom = __iommu_domain_alloc(bus, type);
-	if (!dom && type != IOMMU_DOMAIN_DMA) {
-		dom = __iommu_domain_alloc(bus, IOMMU_DOMAIN_DMA);
-		if (dom)
-			pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA",
-				type, group->name);
-	}
-
-	if (!dom)
-		return -ENOMEM;
-
-	group->default_domain = dom;
-	if (!group->domain)
-		group->domain = dom;
-	return 0;
+	if (group->default_domain && group->default_domain->type == req_type)
+		return group->default_domain;
+	return __iommu_domain_alloc(bus, req_type);
 }

-static int iommu_alloc_default_domain(struct iommu_group *group,
-				      struct device *dev)
+/*
+ * req_type of 0 means "auto" which means to select a domain based on
+ * iommu_def_domain_type or what the driver actually supports.
+ */
+static struct iommu_domain *
+iommu_group_alloc_default_domain(struct iommu_group *group, int req_type)
 {
-	unsigned int type;
+	const struct bus_type *bus =
+		list_first_entry(&group->devices, struct group_device, list)
+			->dev->bus;
+	struct iommu_domain *dom;

-	if (group->default_domain)
-		return 0;
+	lockdep_assert_held(&group->mutex);
+
+	if (req_type)
+		return __iommu_group_alloc_default_domain(bus, group, req_type);

-	type = iommu_get_def_domain_type(dev) ? : iommu_def_domain_type;
+	/* The driver gave no guidance on what type to use, try the default */
+	dom = __iommu_group_alloc_default_domain(bus, group, iommu_def_domain_type);
+	if (dom)
+		return dom;

-	return iommu_group_alloc_default_domain(dev->bus, group, type);
+	/* Otherwise IDENTITY and DMA_FQ defaults will try DMA */
+	if (iommu_def_domain_type == IOMMU_DOMAIN_DMA)
+		return NULL;
+	dom = __iommu_group_alloc_default_domain(bus, group, IOMMU_DOMAIN_DMA);
+	if (!dom)
+		return NULL;
+
+	pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA",
+		iommu_def_domain_type, group->name);
+	return dom;
 }

 /**
@@ -1774,87 +1746,48 @@ static int iommu_bus_notifier(struct notifier_block *nb,
 	return 0;
 }

-struct __group_domain_type {
-	struct device *dev;
-	unsigned int type;
-};
-
-static int probe_get_default_domain_type(struct device *dev, void *data)
-{
-	struct __group_domain_type *gtype = data;
-	unsigned int type = iommu_get_def_domain_type(dev);
-
-	if (type) {
-		if (gtype->type && gtype->type != type) {
-			dev_warn(dev, "Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n",
-				 iommu_domain_type_str(type),
-				 dev_name(gtype->dev),
-				 iommu_domain_type_str(gtype->type));
-			gtype->type = 0;
-		}
-
-		if (!gtype->dev) {
-			gtype->dev  = dev;
-			gtype->type = type;
-		}
-	}
-
-	return 0;
-}
-
-static void probe_alloc_default_domain(const struct bus_type *bus,
-				       struct iommu_group *group)
+/* A target_type of 0 will select the best domain type and cannot fail */
+static int iommu_get_default_domain_type(struct iommu_group *group,
+					 int target_type)
 {
-	struct __group_domain_type gtype;
+	int best_type = target_type;
+	struct group_device *gdev;
+	struct device *last_dev;

-	memset(&gtype, 0, sizeof(gtype));
-
-	/* Ask for default domain requirements of all devices in the group */
-	__iommu_group_for_each_dev(group, &gtype,
-				   probe_get_default_domain_type);
-
-	if (!gtype.type)
-		gtype.type = iommu_def_domain_type;
+	lockdep_assert_held(&group->mutex);

-	iommu_group_alloc_default_domain(bus, group, gtype.type);
+	for_each_group_device(group, gdev) {
+		unsigned int type = iommu_get_def_domain_type(gdev->dev);

-}
+		if (best_type && type && best_type != type) {
+			if (target_type) {
+				dev_err_ratelimited(
+					gdev->dev,
+					"Device cannot be in %s domain\n",
+					iommu_domain_type_str(target_type));
+				return -1;
+			}

-static int __iommu_group_dma_first_attach(struct iommu_group *group)
-{
-	return __iommu_group_for_each_dev(group, group->default_domain,
-					  iommu_group_do_dma_first_attach);
+			dev_warn(
+				gdev->dev,
+				"Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n",
+				iommu_domain_type_str(type), dev_name(last_dev),
+				iommu_domain_type_str(best_type));
+			return 0;
+		}
+		if (!best_type)
+			best_type = type;
+		last_dev = gdev->dev;
+	}
+	return best_type;
 }

-static int iommu_group_do_probe_finalize(struct device *dev, void *data)
+static void iommu_group_do_probe_finalize(struct device *dev)
 {
 	const struct iommu_ops *ops = dev_iommu_ops(dev);

 	if (ops->probe_finalize)
 		ops->probe_finalize(dev);
-
-	return 0;
-}
-
-static void __iommu_group_dma_finalize(struct iommu_group *group)
-{
-	__iommu_group_for_each_dev(group, group->default_domain,
-				   iommu_group_do_probe_finalize);
-}
-
-static int iommu_do_create_direct_mappings(struct device *dev, void *data)
-{
-	struct iommu_group *group = data;
-
-	iommu_create_device_direct_mappings(group, dev);
-
-	return 0;
-}
-
-static int iommu_group_create_direct_mappings(struct iommu_group *group)
-{
-	return __iommu_group_for_each_dev(group, group,
-					  iommu_do_create_direct_mappings);
 }

 int bus_iommu_probe(const struct bus_type *bus)
@@ -1873,32 +1806,31 @@ int bus_iommu_probe(const struct bus_type *bus)
 		return ret;

 	list_for_each_entry_safe(group, next, &group_list, entry) {
+		struct group_device *gdev;
+
 		mutex_lock(&group->mutex);

 		/* Remove item from the list */
 		list_del_init(&group->entry);

-		/* Try to allocate default domain */
-		probe_alloc_default_domain(bus, group);
-
-		if (!group->default_domain) {
+		ret = iommu_setup_default_domain(group, 0);
+		if (ret) {
 			mutex_unlock(&group->mutex);
-			continue;
+			return ret;
 		}
-
-		iommu_group_create_direct_mappings(group);
-
-		ret = __iommu_group_dma_first_attach(group);
-
 		mutex_unlock(&group->mutex);

-		if (ret)
-			break;
-
-		__iommu_group_dma_finalize(group);
+		/*
+		 * FIXME: Mis-locked because the ops->probe_finalize() call-back
+		 * of some IOMMU drivers calls arm_iommu_attach_device() which
+		 * in-turn might call back into IOMMU core code, where it tries
+		 * to take group->mutex, resulting in a deadlock.
+		 */
+		for_each_group_device(group, gdev)
+			iommu_group_do_probe_finalize(gdev->dev);
 	}

-	return ret;
+	return 0;
 }

 bool iommu_present(const struct bus_type *bus)
@@ -1946,7 +1878,7 @@ bool iommu_group_has_isolated_msi(struct iommu_group *group)
 	bool ret = true;

 	mutex_lock(&group->mutex);
-	list_for_each_entry(group_dev, &group->devices, list)
+	for_each_group_device(group, group_dev)
 		ret &= msi_device_has_isolated_msi(group_dev->dev);
 	mutex_unlock(&group->mutex);
 	return ret;
@@ -1980,11 +1912,12 @@ static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus,
 						 unsigned type)
 {
 	struct iommu_domain *domain;
+	unsigned int alloc_type = type & IOMMU_DOMAIN_ALLOC_FLAGS;

 	if (bus == NULL || bus->iommu_ops == NULL)
 		return NULL;

-	domain = bus->iommu_ops->domain_alloc(type);
+	domain = bus->iommu_ops->domain_alloc(alloc_type);
 	if (!domain)
 		return NULL;

@@ -2028,15 +1961,13 @@ EXPORT_SYMBOL_GPL(iommu_domain_free);
 static void __iommu_group_set_core_domain(struct iommu_group *group)
 {
 	struct iommu_domain *new_domain;
-	int ret;

 	if (group->owner)
 		new_domain = group->blocking_domain;
 	else
 		new_domain = group->default_domain;

-	ret = __iommu_group_set_domain(group, new_domain);
-	WARN(ret, "iommu driver failed to attach the default/blocking domain");
+	__iommu_group_set_domain_nofail(group, new_domain);
 }

 static int __iommu_attach_device(struct iommu_domain *domain,
@@ -2082,7 +2013,7 @@ int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
 	 */
 	mutex_lock(&group->mutex);
 	ret = -EINVAL;
-	if (iommu_group_device_count(group) != 1)
+	if (list_count_nodes(&group->devices) != 1)
 		goto out_unlock;

 	ret = __iommu_attach_group(domain, group);
@@ -2113,7 +2044,7 @@ void iommu_detach_device(struct iommu_domain *domain, struct device *dev)

 	mutex_lock(&group->mutex);
 	if (WARN_ON(domain != group->domain) ||
-	    WARN_ON(iommu_group_device_count(group) != 1))
+	    WARN_ON(list_count_nodes(&group->devices) != 1))
 		goto out_unlock;
 	__iommu_group_set_core_domain(group);

@@ -2149,52 +2080,14 @@ struct iommu_domain *iommu_get_dma_domain(struct device *dev)
 	return dev->iommu_group->default_domain;
 }

-/*
- * IOMMU groups are really the natural working unit of the IOMMU, but
- * the IOMMU API works on domains and devices.  Bridge that gap by
- * iterating over the devices in a group.  Ideally we'd have a single
- * device which represents the requestor ID of the group, but we also
- * allow IOMMU drivers to create policy defined minimum sets, where
- * the physical hardware may be able to distiguish members, but we
- * wish to group them at a higher level (ex. untrusted multi-function
- * PCI devices).  Thus we attach each device.
- */
-static int iommu_group_do_attach_device(struct device *dev, void *data)
-{
-	struct iommu_domain *domain = data;
-
-	return __iommu_attach_device(domain, dev);
-}
-
 static int __iommu_attach_group(struct iommu_domain *domain,
 				struct iommu_group *group)
 {
-	int ret;
-
 	if (group->domain && group->domain != group->default_domain &&
 	    group->domain != group->blocking_domain)
 		return -EBUSY;

-	ret = __iommu_group_for_each_dev(group, domain,
-					 iommu_group_do_attach_device);
-	if (ret == 0) {
-		group->domain = domain;
-	} else {
-		/*
-		 * To recover from the case when certain device within the
-		 * group fails to attach to the new domain, we need force
-		 * attaching all devices back to the old domain. The old
-		 * domain is compatible for all devices in the group,
-		 * hence the iommu driver should always return success.
-		 */
-		struct iommu_domain *old_domain = group->domain;
-
-		group->domain = NULL;
-		WARN(__iommu_group_set_domain(group, old_domain),
-		     "iommu driver failed to attach a compatible domain");
-	}
-
-	return ret;
+	return __iommu_group_set_domain(group, domain);
 }

 /**
@@ -2221,21 +2114,61 @@ int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group)
 }
 EXPORT_SYMBOL_GPL(iommu_attach_group);

-static int iommu_group_do_set_platform_dma(struct device *dev, void *data)
+static int __iommu_device_set_domain(struct iommu_group *group,
+				     struct device *dev,
+				     struct iommu_domain *new_domain,
+				     unsigned int flags)
 {
-	const struct iommu_ops *ops = dev_iommu_ops(dev);
+	int ret;

-	if (!WARN_ON(!ops->set_platform_dma_ops))
-		ops->set_platform_dma_ops(dev);
+	if (dev->iommu->attach_deferred) {
+		if (new_domain == group->default_domain)
+			return 0;
+		dev->iommu->attach_deferred = 0;
+	}

+	ret = __iommu_attach_device(new_domain, dev);
+	if (ret) {
+		/*
+		 * If we have a blocking domain then try to attach that in hopes
+		 * of avoiding a UAF. Modern drivers should implement blocking
+		 * domains as global statics that cannot fail.
+		 */
+		if ((flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) &&
+		    group->blocking_domain &&
+		    group->blocking_domain != new_domain)
+			__iommu_attach_device(group->blocking_domain, dev);
+		return ret;
+	}
 	return 0;
 }

-static int __iommu_group_set_domain(struct iommu_group *group,
-				    struct iommu_domain *new_domain)
+/*
+ * If 0 is returned the group's domain is new_domain. If an error is returned
+ * then the group's domain will be set back to the existing domain unless
+ * IOMMU_SET_DOMAIN_MUST_SUCCEED, otherwise an error is returned and the group's
+ * domains is left inconsistent. This is a driver bug to fail attach with a
+ * previously good domain. We try to avoid a kernel UAF because of this.
+ *
+ * IOMMU groups are really the natural working unit of the IOMMU, but the IOMMU
+ * API works on domains and devices.  Bridge that gap by iterating over the
+ * devices in a group.  Ideally we'd have a single device which represents the
+ * requestor ID of the group, but we also allow IOMMU drivers to create policy
+ * defined minimum sets, where the physical hardware may be able to distiguish
+ * members, but we wish to group them at a higher level (ex. untrusted
+ * multi-function PCI devices).  Thus we attach each device.
+ */
+static int __iommu_group_set_domain_internal(struct iommu_group *group,
+					     struct iommu_domain *new_domain,
+					     unsigned int flags)
 {
+	struct group_device *last_gdev;
+	struct group_device *gdev;
+	int result;
 	int ret;

+	lockdep_assert_held(&group->mutex);
+
 	if (group->domain == new_domain)
 		return 0;

@@ -2245,8 +2178,12 @@ static int __iommu_group_set_domain(struct iommu_group *group,
 	 * platform specific behavior.
 	 */
 	if (!new_domain) {
-		__iommu_group_for_each_dev(group, NULL,
-					   iommu_group_do_set_platform_dma);
+		for_each_group_device(group, gdev) {
+			const struct iommu_ops *ops = dev_iommu_ops(gdev->dev);
+
+			if (!WARN_ON(!ops->set_platform_dma_ops))
+				ops->set_platform_dma_ops(gdev->dev);
+		}
 		group->domain = NULL;
 		return 0;
 	}
@@ -2256,16 +2193,52 @@ static int __iommu_group_set_domain(struct iommu_group *group,
 	 * domain. This switch does not have to be atomic and DMA can be
 	 * discarded during the transition. DMA must only be able to access
 	 * either new_domain or group->domain, never something else.
-	 *
-	 * Note that this is called in error unwind paths, attaching to a
-	 * domain that has already been attached cannot fail.
 	 */
-	ret = __iommu_group_for_each_dev(group, new_domain,
-					 iommu_group_do_attach_device);
-	if (ret)
-		return ret;
+	result = 0;
+	for_each_group_device(group, gdev) {
+		ret = __iommu_device_set_domain(group, gdev->dev, new_domain,
+						flags);
+		if (ret) {
+			result = ret;
+			/*
+			 * Keep trying the other devices in the group. If a
+			 * driver fails attach to an otherwise good domain, and
+			 * does not support blocking domains, it should at least
+			 * drop its reference on the current domain so we don't
+			 * UAF.
+			 */
+			if (flags & IOMMU_SET_DOMAIN_MUST_SUCCEED)
+				continue;
+			goto err_revert;
+		}
+	}
 	group->domain = new_domain;
-	return 0;
+	return result;
+
+err_revert:
+	/*
+	 * This is called in error unwind paths. A well behaved driver should
+	 * always allow us to attach to a domain that was already attached.
+	 */
+	last_gdev = gdev;
+	for_each_group_device(group, gdev) {
+		const struct iommu_ops *ops = dev_iommu_ops(gdev->dev);
+
+		/*
+		 * If set_platform_dma_ops is not present a NULL domain can
+		 * happen only for first probe, in which case we leave
+		 * group->domain as NULL and let release clean everything up.
+		 */
+		if (group->domain)
+			WARN_ON(__iommu_device_set_domain(
+				group, gdev->dev, group->domain,
+				IOMMU_SET_DOMAIN_MUST_SUCCEED));
+		else if (ops->set_platform_dma_ops)
+			ops->set_platform_dma_ops(gdev->dev);
+		if (gdev == last_gdev)
+			break;
+	}
+	return ret;
 }

 void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group)
@@ -2846,78 +2819,112 @@ int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat)
 }
 EXPORT_SYMBOL_GPL(iommu_dev_disable_feature);

-/*
- * Changes the default domain of an iommu group
- *
- * @group: The group for which the default domain should be changed
- * @dev: The first device in the group
- * @type: The type of the new default domain that gets associated with the group
- *
- * Returns 0 on success and error code on failure
+/**
+ * iommu_setup_default_domain - Set the default_domain for the group
+ * @group: Group to change
+ * @target_type: Domain type to set as the default_domain
 *
- * Note:
- * 1. Presently, this function is called only when user requests to change the
- *    group's default domain type through /sys/kernel/iommu_groups/<grp_id>/type
- *    Please take a closer look if intended to use for other purposes.
+ * Allocate a default domain and set it as the current domain on the group. If
+ * the group already has a default domain it will be changed to the target_type.
+ * When target_type is 0 the default domain is selected based on driver and
+ * system preferences.
 */
-static int iommu_change_dev_def_domain(struct iommu_group *group,
-				       struct device *dev, int type)
+static int iommu_setup_default_domain(struct iommu_group *group,
+				      int target_type)
 {
-	struct __group_domain_type gtype = {NULL, 0};
-	struct iommu_domain *prev_dom;
+	struct iommu_domain *old_dom = group->default_domain;
+	struct group_device *gdev;
+	struct iommu_domain *dom;
+	bool direct_failed;
+	int req_type;
 	int ret;

 	lockdep_assert_held(&group->mutex);

-	prev_dom = group->default_domain;
-	__iommu_group_for_each_dev(group, &gtype,
-				   probe_get_default_domain_type);
-	if (!type) {
-		/*
-		 * If the user hasn't requested any specific type of domain and
-		 * if the device supports both the domains, then default to the
-		 * domain the device was booted with
-		 */
-		type = gtype.type ? : iommu_def_domain_type;
-	} else if (gtype.type && type != gtype.type) {
-		dev_err_ratelimited(dev, "Device cannot be in %s domain\n",
-				    iommu_domain_type_str(type));
+	req_type = iommu_get_default_domain_type(group, target_type);
+	if (req_type < 0)
 		return -EINVAL;
-	}

 	/*
-	 * Switch to a new domain only if the requested domain type is different
-	 * from the existing default domain type
+	 * There are still some drivers which don't support default domains, so
+	 * we ignore the failure and leave group->default_domain NULL.
+	 *
+	 * We assume that the iommu driver starts up the device in
+	 * 'set_platform_dma_ops' mode if it does not support default domains.
 	 */
-	if (prev_dom->type == type)
+	dom = iommu_group_alloc_default_domain(group, req_type);
+	if (!dom) {
+		/* Once in default_domain mode we never leave */
+		if (group->default_domain)
+			return -ENODEV;
+		group->default_domain = NULL;
 		return 0;
+	}

-	group->default_domain = NULL;
-	group->domain = NULL;
-
-	/* Sets group->default_domain to the newly allocated domain */
-	ret = iommu_group_alloc_default_domain(dev->bus, group, type);
-	if (ret)
-		goto restore_old_domain;
-
-	ret = iommu_group_create_direct_mappings(group);
-	if (ret)
-		goto free_new_domain;
-
-	ret = __iommu_attach_group(group->default_domain, group);
-	if (ret)
-		goto free_new_domain;
+	if (group->default_domain == dom)
+		return 0;

-	iommu_domain_free(prev_dom);
+	/*
+	 * IOMMU_RESV_DIRECT and IOMMU_RESV_DIRECT_RELAXABLE regions must be
+	 * mapped before their device is attached, in order to guarantee
+	 * continuity with any FW activity
+	 */
+	direct_failed = false;
+	for_each_group_device(group, gdev) {
+		if (iommu_create_device_direct_mappings(dom, gdev->dev)) {
+			direct_failed = true;
+			dev_warn_once(
+				gdev->dev->iommu->iommu_dev->dev,
+				"IOMMU driver was not able to establish FW requested direct mapping.");
+		}
+	}

-	return 0;
+	/* We must set default_domain early for __iommu_device_set_domain */
+	group->default_domain = dom;
+	if (!group->domain) {
+		/*
+		 * Drivers are not allowed to fail the first domain attach.
+		 * The only way to recover from this is to fail attaching the
+		 * iommu driver and call ops->release_device. Put the domain
+		 * in group->default_domain so it is freed after.
+		 */
+		ret = __iommu_group_set_domain_internal(
+			group, dom, IOMMU_SET_DOMAIN_MUST_SUCCEED);
+		if (WARN_ON(ret))
+			goto out_free;
+	} else {
+		ret = __iommu_group_set_domain(group, dom);
+		if (ret) {
+			iommu_domain_free(dom);
+			group->default_domain = old_dom;
+			return ret;
+		}
+	}

-free_new_domain:
-	iommu_domain_free(group->default_domain);
-restore_old_domain:
-	group->default_domain = prev_dom;
-	group->domain = prev_dom;
+	/*
+	 * Drivers are supposed to allow mappings to be installed in a domain
+	 * before device attachment, but some don't. Hack around this defect by
+	 * trying again after attaching. If this happens it means the device
+	 * will not continuously have the IOMMU_RESV_DIRECT map.
+	 */
+	if (direct_failed) {
+		for_each_group_device(group, gdev) {
+			ret = iommu_create_device_direct_mappings(dom, gdev->dev);
+			if (ret)
+				goto err_restore;
+		}
+	}

+err_restore:
+	if (old_dom) {
+		__iommu_group_set_domain_internal(
+			group, old_dom, IOMMU_SET_DOMAIN_MUST_SUCCEED);
+		iommu_domain_free(dom);
+		old_dom = NULL;
+	}
+out_free:
+	if (old_dom)
+		iommu_domain_free(old_dom);
 	return ret;
 }

@@ -2933,8 +2940,7 @@ static int iommu_change_dev_def_domain(struct iommu_group *group,
 static ssize_t iommu_group_store_type(struct iommu_group *group,
 				      const char *buf, size_t count)
 {
-	struct group_device *grp_dev;
-	struct device *dev;
+	struct group_device *gdev;
 	int ret, req_type;

 	if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
@@ -2959,23 +2965,23 @@ static ssize_t iommu_group_store_type(struct iommu_group *group,
 	if (req_type == IOMMU_DOMAIN_DMA_FQ &&
 	    group->default_domain->type == IOMMU_DOMAIN_DMA) {
 		ret = iommu_dma_init_fq(group->default_domain);
-		if (!ret)
-			group->default_domain->type = IOMMU_DOMAIN_DMA_FQ;
-		mutex_unlock(&group->mutex);
+		if (ret)
+			goto out_unlock;

-		return ret ?: count;
+		group->default_domain->type = IOMMU_DOMAIN_DMA_FQ;
+		ret = count;
+		goto out_unlock;
 	}

 	/* Otherwise, ensure that device exists and no driver is bound. */
 	if (list_empty(&group->devices) || group->owner_cnt) {
-		mutex_unlock(&group->mutex);
-		return -EPERM;
+		ret = -EPERM;
+		goto out_unlock;
 	}

-	grp_dev = list_first_entry(&group->devices, struct group_device, list);
-	dev = grp_dev->dev;
-
-	ret = iommu_change_dev_def_domain(group, dev, req_type);
+	ret = iommu_setup_default_domain(group, req_type);
+	if (ret)
+		goto out_unlock;

 	/*
 	 * Release the mutex here because ops->probe_finalize() call-back of
@@ -2986,9 +2992,12 @@ static ssize_t iommu_group_store_type(struct iommu_group *group,
 	mutex_unlock(&group->mutex);

 	/* Make sure dma_ops is appropriatley set */
-	if (!ret)
-		__iommu_group_dma_finalize(group);
+	for_each_group_device(group, gdev)
+		iommu_group_do_probe_finalize(gdev->dev);
+	return count;

+out_unlock:
+	mutex_unlock(&group->mutex);
 	return ret ?: count;
 }

@@ -3182,16 +3191,13 @@ EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner);

 static void __iommu_release_dma_ownership(struct iommu_group *group)
 {
-	int ret;
-
 	if (WARN_ON(!group->owner_cnt || !group->owner ||
 		    !xa_empty(&group->pasid_array)))
 		return;

 	group->owner_cnt = 0;
 	group->owner = NULL;
-	ret = __iommu_group_set_domain(group, group->default_domain);
-	WARN(ret, "iommu driver failed to attach the default domain");
+	__iommu_group_set_domain_nofail(group, group->default_domain);
 }

 /**
@@ -3253,7 +3259,7 @@ static int __iommu_set_group_pasid(struct iommu_domain *domain,
 	struct group_device *device;
 	int ret = 0;

-	list_for_each_entry(device, &group->devices, list) {
+	for_each_group_device(group, device) {
 		ret = domain->ops->set_dev_pasid(domain, device->dev, pasid);
 		if (ret)
 			break;
@@ -3268,7 +3274,7 @@ static void __iommu_remove_group_pasid(struct iommu_group *group,
 	struct group_device *device;
 	const struct iommu_ops *ops;

-	list_for_each_entry(device, &group->devices, list) {
+	for_each_group_device(group, device) {
 		ops = dev_iommu_ops(device->dev);
 		ops->remove_dev_pasid(device->dev, pasid);
 	}

--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -647,7 +647,13 @@ struct iova_rcache {

 static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
 {
-	return kzalloc(sizeof(struct iova_magazine), flags);
+	struct iova_magazine *mag;
+
+	mag = kmalloc(sizeof(*mag), flags);
+	if (mag)
+		mag->size = 0;
+
+	return mag;
 }

 static void iova_magazine_free(struct iova_magazine *mag)

--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -788,6 +788,29 @@ static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev)
 	return 0;
 }

+static void viommu_detach_dev(struct viommu_endpoint *vdev)
+{
+	int i;
+	struct virtio_iommu_req_detach req;
+	struct viommu_domain *vdomain = vdev->vdomain;
+	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(vdev->dev);
+
+	if (!vdomain)
+		return;
+
+	req = (struct virtio_iommu_req_detach) {
+		.head.type	= VIRTIO_IOMMU_T_DETACH,
+		.domain		= cpu_to_le32(vdomain->id),
+	};
+
+	for (i = 0; i < fwspec->num_ids; i++) {
+		req.endpoint = cpu_to_le32(fwspec->ids[i]);
+		WARN_ON(viommu_send_req_sync(vdev->viommu, &req, sizeof(req)));
+	}
+	vdomain->nr_endpoints--;
+	vdev->vdomain = NULL;
+}
+
 static int viommu_map_pages(struct iommu_domain *domain, unsigned long iova,
 			    phys_addr_t paddr, size_t pgsize, size_t pgcount,
 			    int prot, gfp_t gfp, size_t *mapped)
@@ -810,25 +833,26 @@ static int viommu_map_pages(struct iommu_domain *domain, unsigned long iova,
 	if (ret)
 		return ret;

-	map = (struct virtio_iommu_req_map) {
-		.head.type	= VIRTIO_IOMMU_T_MAP,
-		.domain		= cpu_to_le32(vdomain->id),
-		.virt_start	= cpu_to_le64(iova),
-		.phys_start	= cpu_to_le64(paddr),
-		.virt_end	= cpu_to_le64(end),
-		.flags		= cpu_to_le32(flags),
-	};
-
-	if (!vdomain->nr_endpoints)
-		return 0;
+	if (vdomain->nr_endpoints) {
+		map = (struct virtio_iommu_req_map) {
+			.head.type	= VIRTIO_IOMMU_T_MAP,
+			.domain		= cpu_to_le32(vdomain->id),
+			.virt_start	= cpu_to_le64(iova),
+			.phys_start	= cpu_to_le64(paddr),
+			.virt_end	= cpu_to_le64(end),
+			.flags		= cpu_to_le32(flags),
+		};

-	ret = viommu_send_req_sync(vdomain->viommu, &map, sizeof(map));
-	if (ret)
-		viommu_del_mappings(vdomain, iova, end);
-	else if (mapped)
+		ret = viommu_send_req_sync(vdomain->viommu, &map, sizeof(map));
+		if (ret) {
+			viommu_del_mappings(vdomain, iova, end);
+			return ret;
+		}
+	}
+	if (mapped)
 		*mapped = size;

-	return ret;
+	return 0;
 }

 static size_t viommu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
@@ -990,6 +1014,7 @@ static void viommu_release_device(struct device *dev)
 {
 	struct viommu_endpoint *vdev = dev_iommu_priv_get(dev);

+	viommu_detach_dev(vdev);
 	iommu_put_resv_regions(dev, &vdev->resv_regions);
 	kfree(vdev);
 }

--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -65,6 +65,7 @@ struct iommu_domain_geometry {

 #define __IOMMU_DOMAIN_SVA	(1U << 4)  /* Shared process address space */

+#define IOMMU_DOMAIN_ALLOC_FLAGS ~__IOMMU_DOMAIN_DMA_FQ
 /*
 * This are the possible domain-types
 *
@@ -127,6 +128,11 @@ enum iommu_cap {
 	 * this device.
 	 */
 	IOMMU_CAP_ENFORCE_CACHE_COHERENCY,
+	/*
+	 * IOMMU driver does not issue TLB maintenance during .unmap, so can
+	 * usefully support the non-strict DMA flush queue.
+	 */
+	IOMMU_CAP_DEFERRED_FLUSH,
 };

 /* These are the possible reserved region types */