Commit 0aafc8ae authored by Peter Xu's avatar Peter Xu Committed by Joerg Roedel

Revert "iommu/vt-d: Fix lock inversion between iommu->lock and device_domain_lock"

This reverts commit 7560cc3c.

With 5.2.0-rc5 I can easily trigger this with lockdep and iommu=pt:

    ======================================================
    WARNING: possible circular locking dependency detected
    5.2.0-rc5 #78 Not tainted
    ------------------------------------------------------
    swapper/0/1 is trying to acquire lock:
    00000000ea2b3beb (&(&iommu->lock)->rlock){+.+.}, at: domain_context_mapping_one+0xa5/0x4e0
    but task is already holding lock:
    00000000a681907b (device_domain_lock){....}, at: domain_context_mapping_one+0x8d/0x4e0
    which lock already depends on the new lock.
    the existing dependency chain (in reverse order) is:
    -> #1 (device_domain_lock){....}:
           _raw_spin_lock_irqsave+0x3c/0x50
           dmar_insert_one_dev_info+0xbb/0x510
           domain_add_dev_info+0x50/0x90
           dev_prepare_static_identity_mapping+0x30/0x68
           intel_iommu_init+0xddd/0x1422
           pci_iommu_init+0x16/0x3f
           do_one_initcall+0x5d/0x2b4
           kernel_init_freeable+0x218/0x2c1
           kernel_init+0xa/0x100
           ret_from_fork+0x3a/0x50
    -> #0 (&(&iommu->lock)->rlock){+.+.}:
           lock_acquire+0x9e/0x170
           _raw_spin_lock+0x25/0x30
           domain_context_mapping_one+0xa5/0x4e0
           pci_for_each_dma_alias+0x30/0x140
           dmar_insert_one_dev_info+0x3b2/0x510
           domain_add_dev_info+0x50/0x90
           dev_prepare_static_identity_mapping+0x30/0x68
           intel_iommu_init+0xddd/0x1422
           pci_iommu_init+0x16/0x3f
           do_one_initcall+0x5d/0x2b4
           kernel_init_freeable+0x218/0x2c1
           kernel_init+0xa/0x100
           ret_from_fork+0x3a/0x50

    other info that might help us debug this:
     Possible unsafe locking scenario:
           CPU0                    CPU1
           ----                    ----
      lock(device_domain_lock);
                                   lock(&(&iommu->lock)->rlock);
                                   lock(device_domain_lock);
      lock(&(&iommu->lock)->rlock);

     *** DEADLOCK ***
    2 locks held by swapper/0/1:
     #0: 00000000033eb13d (dmar_global_lock){++++}, at: intel_iommu_init+0x1e0/0x1422
     #1: 00000000a681907b (device_domain_lock){....}, at: domain_context_mapping_one+0x8d/0x4e0

    stack backtrace:
    CPU: 2 PID: 1 Comm: swapper/0 Not tainted 5.2.0-rc5 #78
    Hardware name: LENOVO 20KGS35G01/20KGS35G01, BIOS N23ET50W (1.25 ) 06/25/2018
    Call Trace:
     dump_stack+0x85/0xc0
     print_circular_bug.cold.57+0x15c/0x195
     __lock_acquire+0x152a/0x1710
     lock_acquire+0x9e/0x170
     ? domain_context_mapping_one+0xa5/0x4e0
     _raw_spin_lock+0x25/0x30
     ? domain_context_mapping_one+0xa5/0x4e0
     domain_context_mapping_one+0xa5/0x4e0
     ? domain_context_mapping_one+0x4e0/0x4e0
     pci_for_each_dma_alias+0x30/0x140
     dmar_insert_one_dev_info+0x3b2/0x510
     domain_add_dev_info+0x50/0x90
     dev_prepare_static_identity_mapping+0x30/0x68
     intel_iommu_init+0xddd/0x1422
     ? printk+0x58/0x6f
     ? lockdep_hardirqs_on+0xf0/0x180
     ? do_early_param+0x8e/0x8e
     ? e820__memblock_setup+0x63/0x63
     pci_iommu_init+0x16/0x3f
     do_one_initcall+0x5d/0x2b4
     ? do_early_param+0x8e/0x8e
     ? rcu_read_lock_sched_held+0x55/0x60
     ? do_early_param+0x8e/0x8e
     kernel_init_freeable+0x218/0x2c1
     ? rest_init+0x230/0x230
     kernel_init+0xa/0x100
     ret_from_fork+0x3a/0x50

domain_context_mapping_one() is taking device_domain_lock first then
iommu lock, while dmar_insert_one_dev_info() is doing the reverse.

That should be introduced by commit:

7560cc3c ("iommu/vt-d: Fix lock inversion between iommu->lock and
              device_domain_lock", 2019-05-27)

So far I still cannot figure out how the previous deadlock was
triggered (I cannot find iommu lock taken before calling of
iommu_flush_dev_iotlb()), however I'm pretty sure that that change
should be incomplete at least because it does not fix all the places
so we're still taking the locks in different orders, while reverting
that commit is very clean to me so far that we should always take
device_domain_lock first then the iommu lock.

We can continue to try to find the real culprit mentioned in
7560cc3c, but for now I think we should revert it to fix current
breakage.

CC: Joerg Roedel <joro@8bytes.org>
CC: Lu Baolu <baolu.lu@linux.intel.com>
CC: dave.jiang@intel.com
Signed-off-by: default avatarPeter Xu <peterx@redhat.com>
Tested-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: default avatarJoerg Roedel <jroedel@suse.de>
parent 9e0babf2
...@@ -2504,7 +2504,6 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, ...@@ -2504,7 +2504,6 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
} }
} }
spin_lock(&iommu->lock);
spin_lock_irqsave(&device_domain_lock, flags); spin_lock_irqsave(&device_domain_lock, flags);
if (dev) if (dev)
found = find_domain(dev); found = find_domain(dev);
...@@ -2520,16 +2519,17 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, ...@@ -2520,16 +2519,17 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
if (found) { if (found) {
spin_unlock_irqrestore(&device_domain_lock, flags); spin_unlock_irqrestore(&device_domain_lock, flags);
spin_unlock(&iommu->lock);
free_devinfo_mem(info); free_devinfo_mem(info);
/* Caller must free the original domain */ /* Caller must free the original domain */
return found; return found;
} }
spin_lock(&iommu->lock);
ret = domain_attach_iommu(domain, iommu); ret = domain_attach_iommu(domain, iommu);
spin_unlock(&iommu->lock);
if (ret) { if (ret) {
spin_unlock_irqrestore(&device_domain_lock, flags); spin_unlock_irqrestore(&device_domain_lock, flags);
spin_unlock(&iommu->lock);
free_devinfo_mem(info); free_devinfo_mem(info);
return NULL; return NULL;
} }
...@@ -2539,7 +2539,6 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, ...@@ -2539,7 +2539,6 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
if (dev) if (dev)
dev->archdata.iommu = info; dev->archdata.iommu = info;
spin_unlock_irqrestore(&device_domain_lock, flags); spin_unlock_irqrestore(&device_domain_lock, flags);
spin_unlock(&iommu->lock);
/* PASID table is mandatory for a PCI device in scalable mode. */ /* PASID table is mandatory for a PCI device in scalable mode. */
if (dev && dev_is_pci(dev) && sm_supported(iommu)) { if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment