Commit 918eb5c8 authored by Nate Watterson's avatar Nate Watterson Committed by Will Deacon

iommu/arm-smmu-v3: Add in-kernel support for NVIDIA Tegra241 (Grace) CMDQV

NVIDIA's Tegra241 Soc has a CMDQ-Virtualization (CMDQV) hardware, extending
the standard ARM SMMU v3 IP to support multiple VCMDQs with virtualization
capabilities. In terms of command queue, they are very like a standard SMMU
CMDQ (or ECMDQs), but only support CS_NONE in the CS field of CMD_SYNC.

Add a new tegra241-cmdqv driver, and insert its structure pointer into the
existing arm_smmu_device, and then add related function calls in the SMMUv3
driver to interact with the CMDQV driver.

In the CMDQV driver, add a minimal part for the in-kernel support: reserve
VINTF0 for in-kernel use, and assign some of the VCMDQs to the VINTF0, and
select one VCMDQ based on the current CPU ID to execute supported commands.
This multi-queue design for in-kernel use gives some limited improvements:
up to 20% reduction of invalidation time was measured by a multi-threaded
DMA unmap benchmark, compared to a single queue.

The other part of the CMDQV driver will be user-space support that gives a
hypervisor running on the host OS to talk to the driver for virtualization
use cases, allowing VMs to use VCMDQs without trappings, i.e. no VM Exits.
This is designed based on IOMMUFD, and its RFC series is also under review.
It will provide a guest OS a bigger improvement: 70% to 90% reductions of
TLB invalidation time were measured by DMA unmap tests running in a guest,
compared to nested SMMU CMDQ (with trappings).

As the initial version, the CMDQV driver only supports ACPI configurations.
Signed-off-by: default avatarNate Watterson <nwatterson@nvidia.com>
Reviewed-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Co-developed-by: default avatarNicolin Chen <nicolinc@nvidia.com>
Signed-off-by: default avatarNicolin Chen <nicolinc@nvidia.com>
Link: https://lore.kernel.org/r/dce50490b2c10b7254fb36aa73ed7ffd812b283a.1724970714.git.nicolinc@nvidia.comSigned-off-by: default avatarWill Deacon <will@kernel.org>
parent 6de80d61
...@@ -22474,6 +22474,7 @@ M: Thierry Reding <thierry.reding@gmail.com> ...@@ -22474,6 +22474,7 @@ M: Thierry Reding <thierry.reding@gmail.com>
R: Krishna Reddy <vdumpa@nvidia.com> R: Krishna Reddy <vdumpa@nvidia.com>
L: linux-tegra@vger.kernel.org L: linux-tegra@vger.kernel.org
S: Supported S: Supported
F: drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
F: drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c F: drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
F: drivers/iommu/tegra* F: drivers/iommu/tegra*
......
...@@ -424,6 +424,17 @@ config ARM_SMMU_V3_KUNIT_TEST ...@@ -424,6 +424,17 @@ config ARM_SMMU_V3_KUNIT_TEST
Enable this option to unit-test arm-smmu-v3 driver functions. Enable this option to unit-test arm-smmu-v3 driver functions.
If unsure, say N. If unsure, say N.
config TEGRA241_CMDQV
bool "NVIDIA Tegra241 CMDQ-V extension support for ARM SMMUv3"
depends on ACPI
help
Support for NVIDIA CMDQ-Virtualization extension for ARM SMMUv3. The
CMDQ-V extension is similar to v3.3 ECMDQ for multi command queues
support, except with virtualization capabilities.
Say Y here if your system is NVIDIA Tegra241 (Grace) or it has the same
CMDQ-V extension.
endif endif
config S390_IOMMU config S390_IOMMU
......
...@@ -2,5 +2,6 @@ ...@@ -2,5 +2,6 @@
obj-$(CONFIG_ARM_SMMU_V3) += arm_smmu_v3.o obj-$(CONFIG_ARM_SMMU_V3) += arm_smmu_v3.o
arm_smmu_v3-y := arm-smmu-v3.o arm_smmu_v3-y := arm-smmu-v3.o
arm_smmu_v3-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o arm_smmu_v3-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o
arm_smmu_v3-$(CONFIG_TEGRA241_CMDQV) += tegra241-cmdqv.o
obj-$(CONFIG_ARM_SMMU_V3_KUNIT_TEST) += arm-smmu-v3-test.o obj-$(CONFIG_ARM_SMMU_V3_KUNIT_TEST) += arm-smmu-v3-test.o
...@@ -4354,6 +4354,31 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) ...@@ -4354,6 +4354,31 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
} }
#ifdef CONFIG_ACPI #ifdef CONFIG_ACPI
#ifdef CONFIG_TEGRA241_CMDQV
static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node,
struct arm_smmu_device *smmu)
{
const char *uid = kasprintf(GFP_KERNEL, "%u", node->identifier);
struct acpi_device *adev;
/* Look for an NVDA200C node whose _UID matches the SMMU node ID */
adev = acpi_dev_get_first_match_dev("NVDA200C", uid, -1);
if (adev) {
/* Tegra241 CMDQV driver is responsible for put_device() */
smmu->impl_dev = &adev->dev;
smmu->options |= ARM_SMMU_OPT_TEGRA241_CMDQV;
dev_info(smmu->dev, "found companion CMDQV device: %s\n",
dev_name(smmu->impl_dev));
}
kfree(uid);
}
#else
static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node,
struct arm_smmu_device *smmu)
{
}
#endif
static int acpi_smmu_iort_probe_model(struct acpi_iort_node *node, static int acpi_smmu_iort_probe_model(struct acpi_iort_node *node,
struct arm_smmu_device *smmu) struct arm_smmu_device *smmu)
{ {
...@@ -4368,6 +4393,11 @@ static int acpi_smmu_iort_probe_model(struct acpi_iort_node *node, ...@@ -4368,6 +4393,11 @@ static int acpi_smmu_iort_probe_model(struct acpi_iort_node *node,
smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH; smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
break; break;
case ACPI_IORT_SMMU_V3_GENERIC: case ACPI_IORT_SMMU_V3_GENERIC:
/*
* Tegra241 implementation stores its SMMU options and impl_dev
* in DSDT. Thus, go through the ACPI tables unconditionally.
*/
acpi_smmu_dsdt_probe_tegra241_cmdqv(node, smmu);
break; break;
} }
...@@ -4497,7 +4527,8 @@ static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu) ...@@ -4497,7 +4527,8 @@ static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu)
struct arm_smmu_device *new_smmu = ERR_PTR(-ENODEV); struct arm_smmu_device *new_smmu = ERR_PTR(-ENODEV);
int ret; int ret;
/* Add impl probe */ if (smmu->impl_dev && (smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV))
new_smmu = tegra241_cmdqv_probe(smmu);
if (new_smmu == ERR_PTR(-ENODEV)) if (new_smmu == ERR_PTR(-ENODEV))
return smmu; return smmu;
......
...@@ -641,6 +641,7 @@ struct arm_smmu_impl_ops { ...@@ -641,6 +641,7 @@ struct arm_smmu_impl_ops {
/* An SMMUv3 instance */ /* An SMMUv3 instance */
struct arm_smmu_device { struct arm_smmu_device {
struct device *dev; struct device *dev;
struct device *impl_dev;
const struct arm_smmu_impl_ops *impl_ops; const struct arm_smmu_impl_ops *impl_ops;
void __iomem *base; void __iomem *base;
...@@ -882,4 +883,14 @@ static inline void arm_smmu_sva_notifier_synchronize(void) {} ...@@ -882,4 +883,14 @@ static inline void arm_smmu_sva_notifier_synchronize(void) {}
#define arm_smmu_sva_domain_alloc NULL #define arm_smmu_sva_domain_alloc NULL
#endif /* CONFIG_ARM_SMMU_V3_SVA */ #endif /* CONFIG_ARM_SMMU_V3_SVA */
#ifdef CONFIG_TEGRA241_CMDQV
struct arm_smmu_device *tegra241_cmdqv_probe(struct arm_smmu_device *smmu);
#else /* CONFIG_TEGRA241_CMDQV */
static inline struct arm_smmu_device *
tegra241_cmdqv_probe(struct arm_smmu_device *smmu)
{
return ERR_PTR(-ENODEV);
}
#endif /* CONFIG_TEGRA241_CMDQV */
#endif /* _ARM_SMMU_V3_H */ #endif /* _ARM_SMMU_V3_H */
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment