Commit d9a0cd51 authored by Alex Williamson's avatar Alex Williamson

Merge branch 'v5.16/vfio/hch-cleanup-vfio-iommu_group-creation-v6' into v5.16/vfio/next

parents 02d5e016 3f901389
...@@ -351,7 +351,7 @@ static int vfio_ap_mdev_probe(struct mdev_device *mdev) ...@@ -351,7 +351,7 @@ static int vfio_ap_mdev_probe(struct mdev_device *mdev)
list_add(&matrix_mdev->node, &matrix_dev->mdev_list); list_add(&matrix_mdev->node, &matrix_dev->mdev_list);
mutex_unlock(&matrix_dev->lock); mutex_unlock(&matrix_dev->lock);
ret = vfio_register_group_dev(&matrix_mdev->vdev); ret = vfio_register_emulated_iommu_dev(&matrix_mdev->vdev);
if (ret) if (ret)
goto err_list; goto err_list;
dev_set_drvdata(&mdev->dev, matrix_mdev); dev_set_drvdata(&mdev->dev, matrix_mdev);
......
...@@ -505,22 +505,13 @@ static void vfio_fsl_uninit_device(struct vfio_fsl_mc_device *vdev) ...@@ -505,22 +505,13 @@ static void vfio_fsl_uninit_device(struct vfio_fsl_mc_device *vdev)
static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev) static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev)
{ {
struct iommu_group *group;
struct vfio_fsl_mc_device *vdev; struct vfio_fsl_mc_device *vdev;
struct device *dev = &mc_dev->dev; struct device *dev = &mc_dev->dev;
int ret; int ret;
group = vfio_iommu_group_get(dev);
if (!group) {
dev_err(dev, "VFIO_FSL_MC: No IOMMU group\n");
return -EINVAL;
}
vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
if (!vdev) { if (!vdev)
ret = -ENOMEM; return -ENOMEM;
goto out_group_put;
}
vfio_init_group_dev(&vdev->vdev, dev, &vfio_fsl_mc_ops); vfio_init_group_dev(&vdev->vdev, dev, &vfio_fsl_mc_ops);
vdev->mc_dev = mc_dev; vdev->mc_dev = mc_dev;
...@@ -556,8 +547,6 @@ static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev) ...@@ -556,8 +547,6 @@ static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev)
out_uninit: out_uninit:
vfio_uninit_group_dev(&vdev->vdev); vfio_uninit_group_dev(&vdev->vdev);
kfree(vdev); kfree(vdev);
out_group_put:
vfio_iommu_group_put(group, dev);
return ret; return ret;
} }
...@@ -574,8 +563,6 @@ static int vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev) ...@@ -574,8 +563,6 @@ static int vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev)
vfio_uninit_group_dev(&vdev->vdev); vfio_uninit_group_dev(&vdev->vdev);
kfree(vdev); kfree(vdev);
vfio_iommu_group_put(mc_dev->dev.iommu_group, dev);
return 0; return 0;
} }
......
...@@ -13,60 +13,23 @@ ...@@ -13,60 +13,23 @@
#include "mdev_private.h" #include "mdev_private.h"
static int mdev_attach_iommu(struct mdev_device *mdev)
{
int ret;
struct iommu_group *group;
group = iommu_group_alloc();
if (IS_ERR(group))
return PTR_ERR(group);
ret = iommu_group_add_device(group, &mdev->dev);
if (!ret)
dev_info(&mdev->dev, "MDEV: group_id = %d\n",
iommu_group_id(group));
iommu_group_put(group);
return ret;
}
static void mdev_detach_iommu(struct mdev_device *mdev)
{
iommu_group_remove_device(&mdev->dev);
dev_info(&mdev->dev, "MDEV: detaching iommu\n");
}
static int mdev_probe(struct device *dev) static int mdev_probe(struct device *dev)
{ {
struct mdev_driver *drv = struct mdev_driver *drv =
container_of(dev->driver, struct mdev_driver, driver); container_of(dev->driver, struct mdev_driver, driver);
struct mdev_device *mdev = to_mdev_device(dev);
int ret;
ret = mdev_attach_iommu(mdev);
if (ret)
return ret;
if (drv->probe) { if (!drv->probe)
ret = drv->probe(mdev); return 0;
if (ret) return drv->probe(to_mdev_device(dev));
mdev_detach_iommu(mdev);
}
return ret;
} }
static void mdev_remove(struct device *dev) static void mdev_remove(struct device *dev)
{ {
struct mdev_driver *drv = struct mdev_driver *drv =
container_of(dev->driver, struct mdev_driver, driver); container_of(dev->driver, struct mdev_driver, driver);
struct mdev_device *mdev = to_mdev_device(dev);
if (drv->remove) if (drv->remove)
drv->remove(mdev); drv->remove(to_mdev_device(dev));
mdev_detach_iommu(mdev);
} }
static int mdev_match(struct device *dev, struct device_driver *drv) static int mdev_match(struct device *dev, struct device_driver *drv)
......
...@@ -119,7 +119,7 @@ static int vfio_mdev_probe(struct mdev_device *mdev) ...@@ -119,7 +119,7 @@ static int vfio_mdev_probe(struct mdev_device *mdev)
return -ENOMEM; return -ENOMEM;
vfio_init_group_dev(vdev, &mdev->dev, &vfio_mdev_dev_ops); vfio_init_group_dev(vdev, &mdev->dev, &vfio_mdev_dev_ops);
ret = vfio_register_group_dev(vdev); ret = vfio_register_emulated_iommu_dev(vdev);
if (ret) if (ret)
goto out_uninit; goto out_uninit;
......
...@@ -1806,7 +1806,6 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_uninit_device); ...@@ -1806,7 +1806,6 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_uninit_device);
int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev) int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev)
{ {
struct pci_dev *pdev = vdev->pdev; struct pci_dev *pdev = vdev->pdev;
struct iommu_group *group;
int ret; int ret;
if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL) if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
...@@ -1825,10 +1824,6 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev) ...@@ -1825,10 +1824,6 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev)
return -EBUSY; return -EBUSY;
} }
group = vfio_iommu_group_get(&pdev->dev);
if (!group)
return -EINVAL;
if (pci_is_root_bus(pdev->bus)) { if (pci_is_root_bus(pdev->bus)) {
ret = vfio_assign_device_set(&vdev->vdev, vdev); ret = vfio_assign_device_set(&vdev->vdev, vdev);
} else if (!pci_probe_reset_slot(pdev->slot)) { } else if (!pci_probe_reset_slot(pdev->slot)) {
...@@ -1842,10 +1837,10 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev) ...@@ -1842,10 +1837,10 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev)
} }
if (ret) if (ret)
goto out_group_put; return ret;
ret = vfio_pci_vf_init(vdev); ret = vfio_pci_vf_init(vdev);
if (ret) if (ret)
goto out_group_put; return ret;
ret = vfio_pci_vga_init(vdev); ret = vfio_pci_vga_init(vdev);
if (ret) if (ret)
goto out_vf; goto out_vf;
...@@ -1876,8 +1871,6 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev) ...@@ -1876,8 +1871,6 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev)
vfio_pci_set_power_state(vdev, PCI_D0); vfio_pci_set_power_state(vdev, PCI_D0);
out_vf: out_vf:
vfio_pci_vf_uninit(vdev); vfio_pci_vf_uninit(vdev);
out_group_put:
vfio_iommu_group_put(group, &pdev->dev);
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(vfio_pci_core_register_device); EXPORT_SYMBOL_GPL(vfio_pci_core_register_device);
...@@ -1893,8 +1886,6 @@ void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev) ...@@ -1893,8 +1886,6 @@ void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev)
vfio_pci_vf_uninit(vdev); vfio_pci_vf_uninit(vdev);
vfio_pci_vga_uninit(vdev); vfio_pci_vga_uninit(vdev);
vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);
if (!disable_idle_d3) if (!disable_idle_d3)
vfio_pci_set_power_state(vdev, PCI_D0); vfio_pci_set_power_state(vdev, PCI_D0);
} }
......
...@@ -642,7 +642,6 @@ static int vfio_platform_of_probe(struct vfio_platform_device *vdev, ...@@ -642,7 +642,6 @@ static int vfio_platform_of_probe(struct vfio_platform_device *vdev,
int vfio_platform_probe_common(struct vfio_platform_device *vdev, int vfio_platform_probe_common(struct vfio_platform_device *vdev,
struct device *dev) struct device *dev)
{ {
struct iommu_group *group;
int ret; int ret;
vfio_init_group_dev(&vdev->vdev, dev, &vfio_platform_ops); vfio_init_group_dev(&vdev->vdev, dev, &vfio_platform_ops);
...@@ -663,24 +662,15 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev, ...@@ -663,24 +662,15 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev,
goto out_uninit; goto out_uninit;
} }
group = vfio_iommu_group_get(dev);
if (!group) {
dev_err(dev, "No IOMMU group for device %s\n", vdev->name);
ret = -EINVAL;
goto put_reset;
}
ret = vfio_register_group_dev(&vdev->vdev); ret = vfio_register_group_dev(&vdev->vdev);
if (ret) if (ret)
goto put_iommu; goto put_reset;
mutex_init(&vdev->igate); mutex_init(&vdev->igate);
pm_runtime_enable(dev); pm_runtime_enable(dev);
return 0; return 0;
put_iommu:
vfio_iommu_group_put(group, dev);
put_reset: put_reset:
vfio_platform_put_reset(vdev); vfio_platform_put_reset(vdev);
out_uninit: out_uninit:
...@@ -696,7 +686,6 @@ void vfio_platform_remove_common(struct vfio_platform_device *vdev) ...@@ -696,7 +686,6 @@ void vfio_platform_remove_common(struct vfio_platform_device *vdev)
pm_runtime_disable(vdev->device); pm_runtime_disable(vdev->device);
vfio_platform_put_reset(vdev); vfio_platform_put_reset(vdev);
vfio_uninit_group_dev(&vdev->vdev); vfio_uninit_group_dev(&vdev->vdev);
vfio_iommu_group_put(vdev->vdev.dev->iommu_group, vdev->vdev.dev);
} }
EXPORT_SYMBOL_GPL(vfio_platform_remove_common); EXPORT_SYMBOL_GPL(vfio_platform_remove_common);
......
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
#include <linux/vfio.h> #include <linux/vfio.h>
#include <linux/wait.h> #include <linux/wait.h>
#include <linux/sched/signal.h> #include <linux/sched/signal.h>
#include "vfio.h"
#define DRIVER_VERSION "0.3" #define DRIVER_VERSION "0.3"
#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
...@@ -83,7 +84,7 @@ struct vfio_group { ...@@ -83,7 +84,7 @@ struct vfio_group {
struct mutex unbound_lock; struct mutex unbound_lock;
atomic_t opened; atomic_t opened;
wait_queue_head_t container_q; wait_queue_head_t container_q;
bool noiommu; enum vfio_group_type type;
unsigned int dev_counter; unsigned int dev_counter;
struct kvm *kvm; struct kvm *kvm;
struct blocking_notifier_head notifier; struct blocking_notifier_head notifier;
...@@ -169,70 +170,6 @@ static void vfio_release_device_set(struct vfio_device *device) ...@@ -169,70 +170,6 @@ static void vfio_release_device_set(struct vfio_device *device)
xa_unlock(&vfio_device_set_xa); xa_unlock(&vfio_device_set_xa);
} }
/*
* vfio_iommu_group_{get,put} are only intended for VFIO bus driver probe
* and remove functions, any use cases other than acquiring the first
* reference for the purpose of calling vfio_register_group_dev() or removing
* that symmetric reference after vfio_unregister_group_dev() should use the raw
* iommu_group_{get,put} functions. In particular, vfio_iommu_group_put()
* removes the device from the dummy group and cannot be nested.
*/
struct iommu_group *vfio_iommu_group_get(struct device *dev)
{
struct iommu_group *group;
int __maybe_unused ret;
group = iommu_group_get(dev);
#ifdef CONFIG_VFIO_NOIOMMU
/*
* With noiommu enabled, an IOMMU group will be created for a device
* that doesn't already have one and doesn't have an iommu_ops on their
* bus. We set iommudata simply to be able to identify these groups
* as special use and for reclamation later.
*/
if (group || !noiommu || iommu_present(dev->bus))
return group;
group = iommu_group_alloc();
if (IS_ERR(group))
return NULL;
iommu_group_set_name(group, "vfio-noiommu");
iommu_group_set_iommudata(group, &noiommu, NULL);
ret = iommu_group_add_device(group, dev);
if (ret) {
iommu_group_put(group);
return NULL;
}
/*
* Where to taint? At this point we've added an IOMMU group for a
* device that is not backed by iommu_ops, therefore any iommu_
* callback using iommu_ops can legitimately Oops. So, while we may
* be about to give a DMA capable device to a user without IOMMU
* protection, which is clearly taint-worthy, let's go ahead and do
* it here.
*/
add_taint(TAINT_USER, LOCKDEP_STILL_OK);
dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
#endif
return group;
}
EXPORT_SYMBOL_GPL(vfio_iommu_group_get);
void vfio_iommu_group_put(struct iommu_group *group, struct device *dev)
{
#ifdef CONFIG_VFIO_NOIOMMU
if (iommu_group_get_iommudata(group) == &noiommu)
iommu_group_remove_device(dev);
#endif
iommu_group_put(group);
}
EXPORT_SYMBOL_GPL(vfio_iommu_group_put);
#ifdef CONFIG_VFIO_NOIOMMU #ifdef CONFIG_VFIO_NOIOMMU
static void *vfio_noiommu_open(unsigned long arg) static void *vfio_noiommu_open(unsigned long arg)
{ {
...@@ -258,9 +195,9 @@ static long vfio_noiommu_ioctl(void *iommu_data, ...@@ -258,9 +195,9 @@ static long vfio_noiommu_ioctl(void *iommu_data,
} }
static int vfio_noiommu_attach_group(void *iommu_data, static int vfio_noiommu_attach_group(void *iommu_data,
struct iommu_group *iommu_group) struct iommu_group *iommu_group, enum vfio_group_type type)
{ {
return iommu_group_get_iommudata(iommu_group) == &noiommu ? 0 : -EINVAL; return 0;
} }
static void vfio_noiommu_detach_group(void *iommu_data, static void vfio_noiommu_detach_group(void *iommu_data,
...@@ -277,8 +214,23 @@ static const struct vfio_iommu_driver_ops vfio_noiommu_ops = { ...@@ -277,8 +214,23 @@ static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
.attach_group = vfio_noiommu_attach_group, .attach_group = vfio_noiommu_attach_group,
.detach_group = vfio_noiommu_detach_group, .detach_group = vfio_noiommu_detach_group,
}; };
#endif
/*
* Only noiommu containers can use vfio-noiommu and noiommu containers can only
* use vfio-noiommu.
*/
static inline bool vfio_iommu_driver_allowed(struct vfio_container *container,
const struct vfio_iommu_driver *driver)
{
return container->noiommu == (driver->ops == &vfio_noiommu_ops);
}
#else
static inline bool vfio_iommu_driver_allowed(struct vfio_container *container,
const struct vfio_iommu_driver *driver)
{
return true;
}
#endif /* CONFIG_VFIO_NOIOMMU */
/** /**
* IOMMU driver registration * IOMMU driver registration
...@@ -384,7 +336,8 @@ static void vfio_group_unlock_and_free(struct vfio_group *group) ...@@ -384,7 +336,8 @@ static void vfio_group_unlock_and_free(struct vfio_group *group)
/** /**
* Group objects - create, release, get, put, search * Group objects - create, release, get, put, search
*/ */
static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
enum vfio_group_type type)
{ {
struct vfio_group *group, *tmp; struct vfio_group *group, *tmp;
struct device *dev; struct device *dev;
...@@ -403,9 +356,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) ...@@ -403,9 +356,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
atomic_set(&group->opened, 0); atomic_set(&group->opened, 0);
init_waitqueue_head(&group->container_q); init_waitqueue_head(&group->container_q);
group->iommu_group = iommu_group; group->iommu_group = iommu_group;
#ifdef CONFIG_VFIO_NOIOMMU group->type = type;
group->noiommu = (iommu_group_get_iommudata(iommu_group) == &noiommu);
#endif
BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
group->nb.notifier_call = vfio_iommu_group_notifier; group->nb.notifier_call = vfio_iommu_group_notifier;
...@@ -441,8 +392,8 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) ...@@ -441,8 +392,8 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
} }
dev = device_create(vfio.class, NULL, dev = device_create(vfio.class, NULL,
MKDEV(MAJOR(vfio.group_devt), minor), MKDEV(MAJOR(vfio.group_devt), minor), group, "%s%d",
group, "%s%d", group->noiommu ? "noiommu-" : "", group->type == VFIO_NO_IOMMU ? "noiommu-" : "",
iommu_group_id(iommu_group)); iommu_group_id(iommu_group));
if (IS_ERR(dev)) { if (IS_ERR(dev)) {
vfio_free_group_minor(minor); vfio_free_group_minor(minor);
...@@ -828,43 +779,101 @@ void vfio_uninit_group_dev(struct vfio_device *device) ...@@ -828,43 +779,101 @@ void vfio_uninit_group_dev(struct vfio_device *device)
} }
EXPORT_SYMBOL_GPL(vfio_uninit_group_dev); EXPORT_SYMBOL_GPL(vfio_uninit_group_dev);
int vfio_register_group_dev(struct vfio_device *device) static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev,
enum vfio_group_type type)
{ {
struct vfio_device *existing_device;
struct iommu_group *iommu_group; struct iommu_group *iommu_group;
struct vfio_group *group; struct vfio_group *group;
int ret;
iommu_group = iommu_group_alloc();
if (IS_ERR(iommu_group))
return ERR_CAST(iommu_group);
iommu_group_set_name(iommu_group, "vfio-noiommu");
ret = iommu_group_add_device(iommu_group, dev);
if (ret)
goto out_put_group;
group = vfio_create_group(iommu_group, type);
if (IS_ERR(group)) {
ret = PTR_ERR(group);
goto out_remove_device;
}
return group;
out_remove_device:
iommu_group_remove_device(dev);
out_put_group:
iommu_group_put(iommu_group);
return ERR_PTR(ret);
}
static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
{
struct iommu_group *iommu_group;
struct vfio_group *group;
iommu_group = iommu_group_get(dev);
#ifdef CONFIG_VFIO_NOIOMMU
if (!iommu_group && noiommu && !iommu_present(dev->bus)) {
/* /*
* If the driver doesn't specify a set then the device is added to a * With noiommu enabled, create an IOMMU group for devices that
* singleton set just for itself. * don't already have one and don't have an iommu_ops on their
* bus. Taint the kernel because we're about to give a DMA
* capable device to a user without IOMMU protection.
*/ */
if (!device->dev_set) group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU);
vfio_assign_device_set(device, device); if (!IS_ERR(group)) {
add_taint(TAINT_USER, LOCKDEP_STILL_OK);
iommu_group = iommu_group_get(device->dev); dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
}
return group;
}
#endif
if (!iommu_group) if (!iommu_group)
return -EINVAL; return ERR_PTR(-EINVAL);
/* a found vfio_group already holds a reference to the iommu_group */
group = vfio_group_get_from_iommu(iommu_group); group = vfio_group_get_from_iommu(iommu_group);
if (!group) { if (group)
group = vfio_create_group(iommu_group); goto out_put;
if (IS_ERR(group)) {
/* a newly created vfio_group keeps the reference. */
group = vfio_create_group(iommu_group, VFIO_IOMMU);
if (IS_ERR(group))
goto out_put;
return group;
out_put:
iommu_group_put(iommu_group); iommu_group_put(iommu_group);
return group;
}
static int __vfio_register_dev(struct vfio_device *device,
struct vfio_group *group)
{
struct vfio_device *existing_device;
if (IS_ERR(group))
return PTR_ERR(group); return PTR_ERR(group);
}
} else {
/* /*
* A found vfio_group already holds a reference to the * If the driver doesn't specify a set then the device is added to a
* iommu_group. A created vfio_group keeps the reference. * singleton set just for itself.
*/ */
iommu_group_put(iommu_group); if (!device->dev_set)
} vfio_assign_device_set(device, device);
existing_device = vfio_group_get_device(group, device->dev); existing_device = vfio_group_get_device(group, device->dev);
if (existing_device) { if (existing_device) {
dev_WARN(device->dev, "Device already exists on group %d\n", dev_WARN(device->dev, "Device already exists on group %d\n",
iommu_group_id(iommu_group)); iommu_group_id(group->iommu_group));
vfio_device_put(existing_device); vfio_device_put(existing_device);
if (group->type == VFIO_NO_IOMMU ||
group->type == VFIO_EMULATED_IOMMU)
iommu_group_remove_device(device->dev);
vfio_group_put(group); vfio_group_put(group);
return -EBUSY; return -EBUSY;
} }
...@@ -882,8 +891,25 @@ int vfio_register_group_dev(struct vfio_device *device) ...@@ -882,8 +891,25 @@ int vfio_register_group_dev(struct vfio_device *device)
return 0; return 0;
} }
int vfio_register_group_dev(struct vfio_device *device)
{
return __vfio_register_dev(device,
vfio_group_find_or_alloc(device->dev));
}
EXPORT_SYMBOL_GPL(vfio_register_group_dev); EXPORT_SYMBOL_GPL(vfio_register_group_dev);
/*
* Register a virtual device without IOMMU backing. The user of this
* device must not be able to directly trigger unmediated DMA.
*/
int vfio_register_emulated_iommu_dev(struct vfio_device *device)
{
return __vfio_register_dev(device,
vfio_noiommu_group_alloc(device->dev, VFIO_EMULATED_IOMMU));
}
EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev);
/** /**
* Get a reference to the vfio_device for a device. Even if the * Get a reference to the vfio_device for a device. Even if the
* caller thinks they own the device, they could be racing with a * caller thinks they own the device, they could be racing with a
...@@ -1010,6 +1036,9 @@ void vfio_unregister_group_dev(struct vfio_device *device) ...@@ -1010,6 +1036,9 @@ void vfio_unregister_group_dev(struct vfio_device *device)
if (list_empty(&group->device_list)) if (list_empty(&group->device_list))
wait_event(group->container_q, !group->container); wait_event(group->container_q, !group->container);
if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU)
iommu_group_remove_device(device->dev);
/* Matches the get in vfio_register_group_dev() */ /* Matches the get in vfio_register_group_dev() */
vfio_group_put(group); vfio_group_put(group);
} }
...@@ -1042,13 +1071,10 @@ static long vfio_ioctl_check_extension(struct vfio_container *container, ...@@ -1042,13 +1071,10 @@ static long vfio_ioctl_check_extension(struct vfio_container *container,
list_for_each_entry(driver, &vfio.iommu_drivers_list, list_for_each_entry(driver, &vfio.iommu_drivers_list,
vfio_next) { vfio_next) {
#ifdef CONFIG_VFIO_NOIOMMU
if (!list_empty(&container->group_list) && if (!list_empty(&container->group_list) &&
(container->noiommu != !vfio_iommu_driver_allowed(container,
(driver->ops == &vfio_noiommu_ops))) driver))
continue; continue;
#endif
if (!try_module_get(driver->ops->owner)) if (!try_module_get(driver->ops->owner))
continue; continue;
...@@ -1079,7 +1105,8 @@ static int __vfio_container_attach_groups(struct vfio_container *container, ...@@ -1079,7 +1105,8 @@ static int __vfio_container_attach_groups(struct vfio_container *container,
int ret = -ENODEV; int ret = -ENODEV;
list_for_each_entry(group, &container->group_list, container_next) { list_for_each_entry(group, &container->group_list, container_next) {
ret = driver->ops->attach_group(data, group->iommu_group); ret = driver->ops->attach_group(data, group->iommu_group,
group->type);
if (ret) if (ret)
goto unwind; goto unwind;
} }
...@@ -1120,15 +1147,8 @@ static long vfio_ioctl_set_iommu(struct vfio_container *container, ...@@ -1120,15 +1147,8 @@ static long vfio_ioctl_set_iommu(struct vfio_container *container,
list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
void *data; void *data;
#ifdef CONFIG_VFIO_NOIOMMU if (!vfio_iommu_driver_allowed(container, driver))
/*
* Only noiommu containers can use vfio-noiommu and noiommu
* containers can only use vfio-noiommu.
*/
if (container->noiommu != (driver->ops == &vfio_noiommu_ops))
continue; continue;
#endif
if (!try_module_get(driver->ops->owner)) if (!try_module_get(driver->ops->owner))
continue; continue;
...@@ -1234,62 +1254,12 @@ static int vfio_fops_release(struct inode *inode, struct file *filep) ...@@ -1234,62 +1254,12 @@ static int vfio_fops_release(struct inode *inode, struct file *filep)
return 0; return 0;
} }
/*
* Once an iommu driver is set, we optionally pass read/write/mmap
* on to the driver, allowing management interfaces beyond ioctl.
*/
static ssize_t vfio_fops_read(struct file *filep, char __user *buf,
size_t count, loff_t *ppos)
{
struct vfio_container *container = filep->private_data;
struct vfio_iommu_driver *driver;
ssize_t ret = -EINVAL;
driver = container->iommu_driver;
if (likely(driver && driver->ops->read))
ret = driver->ops->read(container->iommu_data,
buf, count, ppos);
return ret;
}
static ssize_t vfio_fops_write(struct file *filep, const char __user *buf,
size_t count, loff_t *ppos)
{
struct vfio_container *container = filep->private_data;
struct vfio_iommu_driver *driver;
ssize_t ret = -EINVAL;
driver = container->iommu_driver;
if (likely(driver && driver->ops->write))
ret = driver->ops->write(container->iommu_data,
buf, count, ppos);
return ret;
}
static int vfio_fops_mmap(struct file *filep, struct vm_area_struct *vma)
{
struct vfio_container *container = filep->private_data;
struct vfio_iommu_driver *driver;
int ret = -EINVAL;
driver = container->iommu_driver;
if (likely(driver && driver->ops->mmap))
ret = driver->ops->mmap(container->iommu_data, vma);
return ret;
}
static const struct file_operations vfio_fops = { static const struct file_operations vfio_fops = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.open = vfio_fops_open, .open = vfio_fops_open,
.release = vfio_fops_release, .release = vfio_fops_release,
.read = vfio_fops_read,
.write = vfio_fops_write,
.unlocked_ioctl = vfio_fops_unl_ioctl, .unlocked_ioctl = vfio_fops_unl_ioctl,
.compat_ioctl = compat_ptr_ioctl, .compat_ioctl = compat_ptr_ioctl,
.mmap = vfio_fops_mmap,
}; };
/** /**
...@@ -1366,7 +1336,7 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd) ...@@ -1366,7 +1336,7 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
if (atomic_read(&group->container_users)) if (atomic_read(&group->container_users))
return -EINVAL; return -EINVAL;
if (group->noiommu && !capable(CAP_SYS_RAWIO)) if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
return -EPERM; return -EPERM;
f = fdget(container_fd); f = fdget(container_fd);
...@@ -1386,7 +1356,7 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd) ...@@ -1386,7 +1356,7 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
/* Real groups and fake groups cannot mix */ /* Real groups and fake groups cannot mix */
if (!list_empty(&container->group_list) && if (!list_empty(&container->group_list) &&
container->noiommu != group->noiommu) { container->noiommu != (group->type == VFIO_NO_IOMMU)) {
ret = -EPERM; ret = -EPERM;
goto unlock_out; goto unlock_out;
} }
...@@ -1394,13 +1364,14 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd) ...@@ -1394,13 +1364,14 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
driver = container->iommu_driver; driver = container->iommu_driver;
if (driver) { if (driver) {
ret = driver->ops->attach_group(container->iommu_data, ret = driver->ops->attach_group(container->iommu_data,
group->iommu_group); group->iommu_group,
group->type);
if (ret) if (ret)
goto unlock_out; goto unlock_out;
} }
group->container = container; group->container = container;
container->noiommu = group->noiommu; container->noiommu = (group->type == VFIO_NO_IOMMU);
list_add(&group->container_next, &container->group_list); list_add(&group->container_next, &container->group_list);
/* Get a reference on the container and mark a user within the group */ /* Get a reference on the container and mark a user within the group */
...@@ -1424,7 +1395,7 @@ static int vfio_group_add_container_user(struct vfio_group *group) ...@@ -1424,7 +1395,7 @@ static int vfio_group_add_container_user(struct vfio_group *group)
if (!atomic_inc_not_zero(&group->container_users)) if (!atomic_inc_not_zero(&group->container_users))
return -EINVAL; return -EINVAL;
if (group->noiommu) { if (group->type == VFIO_NO_IOMMU) {
atomic_dec(&group->container_users); atomic_dec(&group->container_users);
return -EPERM; return -EPERM;
} }
...@@ -1449,7 +1420,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) ...@@ -1449,7 +1420,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
!group->container->iommu_driver || !vfio_group_viable(group)) !group->container->iommu_driver || !vfio_group_viable(group))
return -EINVAL; return -EINVAL;
if (group->noiommu && !capable(CAP_SYS_RAWIO)) if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
return -EPERM; return -EPERM;
device = vfio_device_get_from_name(group, buf); device = vfio_device_get_from_name(group, buf);
...@@ -1496,7 +1467,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) ...@@ -1496,7 +1467,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
fd_install(fdno, filep); fd_install(fdno, filep);
if (group->noiommu) if (group->type == VFIO_NO_IOMMU)
dev_warn(device->dev, "vfio-noiommu device opened by user " dev_warn(device->dev, "vfio-noiommu device opened by user "
"(%s:%d)\n", current->comm, task_pid_nr(current)); "(%s:%d)\n", current->comm, task_pid_nr(current));
return fdno; return fdno;
...@@ -1592,7 +1563,7 @@ static int vfio_group_fops_open(struct inode *inode, struct file *filep) ...@@ -1592,7 +1563,7 @@ static int vfio_group_fops_open(struct inode *inode, struct file *filep)
if (!group) if (!group)
return -ENODEV; return -ENODEV;
if (group->noiommu && !capable(CAP_SYS_RAWIO)) { if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) {
vfio_group_put(group); vfio_group_put(group);
return -EPERM; return -EPERM;
} }
......
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 Red Hat, Inc. All rights reserved.
* Author: Alex Williamson <alex.williamson@redhat.com>
*/
enum vfio_group_type {
/*
* Physical device with IOMMU backing.
*/
VFIO_IOMMU,
/*
* Virtual device without IOMMU backing. The VFIO core fakes up an
* iommu_group as the iommu_group sysfs interface is part of the
* userspace ABI. The user of these devices must not be able to
* directly trigger unmediated DMA.
*/
VFIO_EMULATED_IOMMU,
/*
* Physical device without IOMMU backing. The VFIO core fakes up an
* iommu_group as the iommu_group sysfs interface is part of the
* userspace ABI. Users can trigger unmediated DMA by the device,
* usage is highly dangerous, requires an explicit opt-in and will
* taint the kernel.
*/
VFIO_NO_IOMMU,
};
/* events for the backend driver notify callback */
enum vfio_iommu_notify_type {
VFIO_IOMMU_CONTAINER_CLOSE = 0,
};
/**
* struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
*/
struct vfio_iommu_driver_ops {
char *name;
struct module *owner;
void *(*open)(unsigned long arg);
void (*release)(void *iommu_data);
long (*ioctl)(void *iommu_data, unsigned int cmd,
unsigned long arg);
int (*attach_group)(void *iommu_data,
struct iommu_group *group,
enum vfio_group_type);
void (*detach_group)(void *iommu_data,
struct iommu_group *group);
int (*pin_pages)(void *iommu_data,
struct iommu_group *group,
unsigned long *user_pfn,
int npage, int prot,
unsigned long *phys_pfn);
int (*unpin_pages)(void *iommu_data,
unsigned long *user_pfn, int npage);
int (*register_notifier)(void *iommu_data,
unsigned long *events,
struct notifier_block *nb);
int (*unregister_notifier)(void *iommu_data,
struct notifier_block *nb);
int (*dma_rw)(void *iommu_data, dma_addr_t user_iova,
void *data, size_t count, bool write);
struct iommu_domain *(*group_iommu_domain)(void *iommu_data,
struct iommu_group *group);
void (*notify)(void *iommu_data,
enum vfio_iommu_notify_type event);
};
int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops);
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <linux/sched/mm.h> #include <linux/sched/mm.h>
#include <linux/sched/signal.h> #include <linux/sched/signal.h>
#include <linux/mm.h> #include <linux/mm.h>
#include "vfio.h"
#include <asm/iommu.h> #include <asm/iommu.h>
#include <asm/tce.h> #include <asm/tce.h>
...@@ -1238,13 +1239,16 @@ static long tce_iommu_take_ownership_ddw(struct tce_container *container, ...@@ -1238,13 +1239,16 @@ static long tce_iommu_take_ownership_ddw(struct tce_container *container,
} }
static int tce_iommu_attach_group(void *iommu_data, static int tce_iommu_attach_group(void *iommu_data,
struct iommu_group *iommu_group) struct iommu_group *iommu_group, enum vfio_group_type type)
{ {
int ret = 0; int ret = 0;
struct tce_container *container = iommu_data; struct tce_container *container = iommu_data;
struct iommu_table_group *table_group; struct iommu_table_group *table_group;
struct tce_iommu_group *tcegrp = NULL; struct tce_iommu_group *tcegrp = NULL;
if (type == VFIO_EMULATED_IOMMU)
return -EINVAL;
mutex_lock(&container->lock); mutex_lock(&container->lock);
/* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n", /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n",
......
...@@ -36,10 +36,10 @@ ...@@ -36,10 +36,10 @@
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/vfio.h> #include <linux/vfio.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/mdev.h>
#include <linux/notifier.h> #include <linux/notifier.h>
#include <linux/dma-iommu.h> #include <linux/dma-iommu.h>
#include <linux/irqdomain.h> #include <linux/irqdomain.h>
#include "vfio.h"
#define DRIVER_VERSION "0.2" #define DRIVER_VERSION "0.2"
#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
...@@ -65,7 +65,6 @@ MODULE_PARM_DESC(dma_entry_limit, ...@@ -65,7 +65,6 @@ MODULE_PARM_DESC(dma_entry_limit,
struct vfio_iommu { struct vfio_iommu {
struct list_head domain_list; struct list_head domain_list;
struct list_head iova_list; struct list_head iova_list;
struct vfio_domain *external_domain; /* domain for external user */
struct mutex lock; struct mutex lock;
struct rb_root dma_list; struct rb_root dma_list;
struct blocking_notifier_head notifier; struct blocking_notifier_head notifier;
...@@ -78,6 +77,7 @@ struct vfio_iommu { ...@@ -78,6 +77,7 @@ struct vfio_iommu {
bool nesting; bool nesting;
bool dirty_page_tracking; bool dirty_page_tracking;
bool container_open; bool container_open;
struct list_head emulated_iommu_groups;
}; };
struct vfio_domain { struct vfio_domain {
...@@ -113,7 +113,6 @@ struct vfio_batch { ...@@ -113,7 +113,6 @@ struct vfio_batch {
struct vfio_iommu_group { struct vfio_iommu_group {
struct iommu_group *iommu_group; struct iommu_group *iommu_group;
struct list_head next; struct list_head next;
bool mdev_group; /* An mdev group */
bool pinned_page_dirty_scope; bool pinned_page_dirty_scope;
}; };
...@@ -140,9 +139,6 @@ struct vfio_regions { ...@@ -140,9 +139,6 @@ struct vfio_regions {
size_t len; size_t len;
}; };
#define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu) \
(!list_empty(&iommu->domain_list))
#define DIRTY_BITMAP_BYTES(n) (ALIGN(n, BITS_PER_TYPE(u64)) / BITS_PER_BYTE) #define DIRTY_BITMAP_BYTES(n) (ALIGN(n, BITS_PER_TYPE(u64)) / BITS_PER_BYTE)
/* /*
...@@ -880,7 +876,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, ...@@ -880,7 +876,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
* already pinned and accounted. Accounting should be done if there is no * already pinned and accounted. Accounting should be done if there is no
* iommu capable domain in the container. * iommu capable domain in the container.
*/ */
do_accounting = !IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu); do_accounting = list_empty(&iommu->domain_list);
for (i = 0; i < npage; i++) { for (i = 0; i < npage; i++) {
struct vfio_pfn *vpfn; struct vfio_pfn *vpfn;
...@@ -969,7 +965,7 @@ static int vfio_iommu_type1_unpin_pages(void *iommu_data, ...@@ -969,7 +965,7 @@ static int vfio_iommu_type1_unpin_pages(void *iommu_data,
mutex_lock(&iommu->lock); mutex_lock(&iommu->lock);
do_accounting = !IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu); do_accounting = list_empty(&iommu->domain_list);
for (i = 0; i < npage; i++) { for (i = 0; i < npage; i++) {
struct vfio_dma *dma; struct vfio_dma *dma;
dma_addr_t iova; dma_addr_t iova;
...@@ -1090,7 +1086,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, ...@@ -1090,7 +1086,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
if (!dma->size) if (!dma->size)
return 0; return 0;
if (!IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)) if (list_empty(&iommu->domain_list))
return 0; return 0;
/* /*
...@@ -1667,7 +1663,7 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, ...@@ -1667,7 +1663,7 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
vfio_link_dma(iommu, dma); vfio_link_dma(iommu, dma);
/* Don't pin and map if container doesn't contain IOMMU capable domain*/ /* Don't pin and map if container doesn't contain IOMMU capable domain*/
if (!IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)) if (list_empty(&iommu->domain_list))
dma->size = size; dma->size = size;
else else
ret = vfio_pin_map_dma(iommu, dma, size); ret = vfio_pin_map_dma(iommu, dma, size);
...@@ -1893,8 +1889,8 @@ static struct vfio_iommu_group* ...@@ -1893,8 +1889,8 @@ static struct vfio_iommu_group*
vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, vfio_iommu_find_iommu_group(struct vfio_iommu *iommu,
struct iommu_group *iommu_group) struct iommu_group *iommu_group)
{ {
struct vfio_iommu_group *group;
struct vfio_domain *domain; struct vfio_domain *domain;
struct vfio_iommu_group *group = NULL;
list_for_each_entry(domain, &iommu->domain_list, next) { list_for_each_entry(domain, &iommu->domain_list, next) {
group = find_iommu_group(domain, iommu_group); group = find_iommu_group(domain, iommu_group);
...@@ -1902,10 +1898,10 @@ vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, ...@@ -1902,10 +1898,10 @@ vfio_iommu_find_iommu_group(struct vfio_iommu *iommu,
return group; return group;
} }
if (iommu->external_domain) list_for_each_entry(group, &iommu->emulated_iommu_groups, next)
group = find_iommu_group(iommu->external_domain, iommu_group); if (group->iommu_group == iommu_group)
return group; return group;
return NULL;
} }
static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions, static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions,
...@@ -1934,89 +1930,6 @@ static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions, ...@@ -1934,89 +1930,6 @@ static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions,
return ret; return ret;
} }
static int vfio_mdev_attach_domain(struct device *dev, void *data)
{
struct mdev_device *mdev = to_mdev_device(dev);
struct iommu_domain *domain = data;
struct device *iommu_device;
iommu_device = mdev_get_iommu_device(mdev);
if (iommu_device) {
if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX))
return iommu_aux_attach_device(domain, iommu_device);
else
return iommu_attach_device(domain, iommu_device);
}
return -EINVAL;
}
static int vfio_mdev_detach_domain(struct device *dev, void *data)
{
struct mdev_device *mdev = to_mdev_device(dev);
struct iommu_domain *domain = data;
struct device *iommu_device;
iommu_device = mdev_get_iommu_device(mdev);
if (iommu_device) {
if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX))
iommu_aux_detach_device(domain, iommu_device);
else
iommu_detach_device(domain, iommu_device);
}
return 0;
}
static int vfio_iommu_attach_group(struct vfio_domain *domain,
struct vfio_iommu_group *group)
{
if (group->mdev_group)
return iommu_group_for_each_dev(group->iommu_group,
domain->domain,
vfio_mdev_attach_domain);
else
return iommu_attach_group(domain->domain, group->iommu_group);
}
static void vfio_iommu_detach_group(struct vfio_domain *domain,
struct vfio_iommu_group *group)
{
if (group->mdev_group)
iommu_group_for_each_dev(group->iommu_group, domain->domain,
vfio_mdev_detach_domain);
else
iommu_detach_group(domain->domain, group->iommu_group);
}
static bool vfio_bus_is_mdev(struct bus_type *bus)
{
struct bus_type *mdev_bus;
bool ret = false;
mdev_bus = symbol_get(mdev_bus_type);
if (mdev_bus) {
ret = (bus == mdev_bus);
symbol_put(mdev_bus_type);
}
return ret;
}
static int vfio_mdev_iommu_device(struct device *dev, void *data)
{
struct mdev_device *mdev = to_mdev_device(dev);
struct device **old = data, *new;
new = mdev_get_iommu_device(mdev);
if (!new || (*old && *old != new))
return -EINVAL;
*old = new;
return 0;
}
/* /*
* This is a helper function to insert an address range to iova list. * This is a helper function to insert an address range to iova list.
* The list is initially created with a single entry corresponding to * The list is initially created with a single entry corresponding to
...@@ -2241,81 +2154,58 @@ static void vfio_iommu_iova_insert_copy(struct vfio_iommu *iommu, ...@@ -2241,81 +2154,58 @@ static void vfio_iommu_iova_insert_copy(struct vfio_iommu *iommu,
} }
static int vfio_iommu_type1_attach_group(void *iommu_data, static int vfio_iommu_type1_attach_group(void *iommu_data,
struct iommu_group *iommu_group) struct iommu_group *iommu_group, enum vfio_group_type type)
{ {
struct vfio_iommu *iommu = iommu_data; struct vfio_iommu *iommu = iommu_data;
struct vfio_iommu_group *group; struct vfio_iommu_group *group;
struct vfio_domain *domain, *d; struct vfio_domain *domain, *d;
struct bus_type *bus = NULL; struct bus_type *bus = NULL;
int ret;
bool resv_msi, msi_remap; bool resv_msi, msi_remap;
phys_addr_t resv_msi_base = 0; phys_addr_t resv_msi_base = 0;
struct iommu_domain_geometry *geo; struct iommu_domain_geometry *geo;
LIST_HEAD(iova_copy); LIST_HEAD(iova_copy);
LIST_HEAD(group_resv_regions); LIST_HEAD(group_resv_regions);
int ret = -EINVAL;
mutex_lock(&iommu->lock); mutex_lock(&iommu->lock);
/* Check for duplicates */ /* Check for duplicates */
if (vfio_iommu_find_iommu_group(iommu, iommu_group)) { if (vfio_iommu_find_iommu_group(iommu, iommu_group))
mutex_unlock(&iommu->lock); goto out_unlock;
return -EINVAL;
}
group = kzalloc(sizeof(*group), GFP_KERNEL);
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
if (!group || !domain) {
ret = -ENOMEM; ret = -ENOMEM;
goto out_free; group = kzalloc(sizeof(*group), GFP_KERNEL);
} if (!group)
goto out_unlock;
group->iommu_group = iommu_group; group->iommu_group = iommu_group;
/* Determine bus_type in order to allocate a domain */ if (type == VFIO_EMULATED_IOMMU) {
ret = iommu_group_for_each_dev(iommu_group, &bus, vfio_bus_type); list_add(&group->next, &iommu->emulated_iommu_groups);
if (ret)
goto out_free;
if (vfio_bus_is_mdev(bus)) {
struct device *iommu_device = NULL;
group->mdev_group = true;
/* Determine the isolation type */
ret = iommu_group_for_each_dev(iommu_group, &iommu_device,
vfio_mdev_iommu_device);
if (ret || !iommu_device) {
if (!iommu->external_domain) {
INIT_LIST_HEAD(&domain->group_list);
iommu->external_domain = domain;
vfio_update_pgsize_bitmap(iommu);
} else {
kfree(domain);
}
list_add(&group->next,
&iommu->external_domain->group_list);
/* /*
* Non-iommu backed group cannot dirty memory directly, * An emulated IOMMU group cannot dirty memory directly, it can
* it can only use interfaces that provide dirty * only use interfaces that provide dirty tracking.
* tracking. * The iommu scope can only be promoted with the addition of a
* The iommu scope can only be promoted with the * dirty tracking group.
* addition of a dirty tracking group.
*/ */
group->pinned_page_dirty_scope = true; group->pinned_page_dirty_scope = true;
mutex_unlock(&iommu->lock); ret = 0;
goto out_unlock;
return 0;
} }
bus = iommu_device->bus; /* Determine bus_type in order to allocate a domain */
} ret = iommu_group_for_each_dev(iommu_group, &bus, vfio_bus_type);
if (ret)
goto out_free_group;
ret = -ENOMEM;
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
if (!domain)
goto out_free_group;
domain->domain = iommu_domain_alloc(bus);
if (!domain->domain) {
ret = -EIO; ret = -EIO;
goto out_free; domain->domain = iommu_domain_alloc(bus);
} if (!domain->domain)
goto out_free_domain;
if (iommu->nesting) { if (iommu->nesting) {
ret = iommu_enable_nesting(domain->domain); ret = iommu_enable_nesting(domain->domain);
...@@ -2323,7 +2213,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, ...@@ -2323,7 +2213,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
goto out_domain; goto out_domain;
} }
ret = vfio_iommu_attach_group(domain, group); ret = iommu_attach_group(domain->domain, group->iommu_group);
if (ret) if (ret)
goto out_domain; goto out_domain;
...@@ -2390,15 +2280,17 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, ...@@ -2390,15 +2280,17 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
list_for_each_entry(d, &iommu->domain_list, next) { list_for_each_entry(d, &iommu->domain_list, next) {
if (d->domain->ops == domain->domain->ops && if (d->domain->ops == domain->domain->ops &&
d->prot == domain->prot) { d->prot == domain->prot) {
vfio_iommu_detach_group(domain, group); iommu_detach_group(domain->domain, group->iommu_group);
if (!vfio_iommu_attach_group(d, group)) { if (!iommu_attach_group(d->domain,
group->iommu_group)) {
list_add(&group->next, &d->group_list); list_add(&group->next, &d->group_list);
iommu_domain_free(domain->domain); iommu_domain_free(domain->domain);
kfree(domain); kfree(domain);
goto done; goto done;
} }
ret = vfio_iommu_attach_group(domain, group); ret = iommu_attach_group(domain->domain,
group->iommu_group);
if (ret) if (ret)
goto out_domain; goto out_domain;
} }
...@@ -2435,14 +2327,16 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, ...@@ -2435,14 +2327,16 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
return 0; return 0;
out_detach: out_detach:
vfio_iommu_detach_group(domain, group); iommu_detach_group(domain->domain, group->iommu_group);
out_domain: out_domain:
iommu_domain_free(domain->domain); iommu_domain_free(domain->domain);
vfio_iommu_iova_free(&iova_copy); vfio_iommu_iova_free(&iova_copy);
vfio_iommu_resv_free(&group_resv_regions); vfio_iommu_resv_free(&group_resv_regions);
out_free: out_free_domain:
kfree(domain); kfree(domain);
out_free_group:
kfree(group); kfree(group);
out_unlock:
mutex_unlock(&iommu->lock); mutex_unlock(&iommu->lock);
return ret; return ret;
} }
...@@ -2567,26 +2461,20 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, ...@@ -2567,26 +2461,20 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
LIST_HEAD(iova_copy); LIST_HEAD(iova_copy);
mutex_lock(&iommu->lock); mutex_lock(&iommu->lock);
list_for_each_entry(group, &iommu->emulated_iommu_groups, next) {
if (iommu->external_domain) { if (group->iommu_group != iommu_group)
group = find_iommu_group(iommu->external_domain, iommu_group); continue;
if (group) {
update_dirty_scope = !group->pinned_page_dirty_scope; update_dirty_scope = !group->pinned_page_dirty_scope;
list_del(&group->next); list_del(&group->next);
kfree(group); kfree(group);
if (list_empty(&iommu->external_domain->group_list)) { if (list_empty(&iommu->emulated_iommu_groups) &&
if (!IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)) { list_empty(&iommu->domain_list)) {
WARN_ON(iommu->notifier.head); WARN_ON(iommu->notifier.head);
vfio_iommu_unmap_unpin_all(iommu); vfio_iommu_unmap_unpin_all(iommu);
} }
kfree(iommu->external_domain);
iommu->external_domain = NULL;
}
goto detach_group_done; goto detach_group_done;
} }
}
/* /*
* Get a copy of iova list. This will be used to update * Get a copy of iova list. This will be used to update
...@@ -2600,7 +2488,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, ...@@ -2600,7 +2488,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
if (!group) if (!group)
continue; continue;
vfio_iommu_detach_group(domain, group); iommu_detach_group(domain->domain, group->iommu_group);
update_dirty_scope = !group->pinned_page_dirty_scope; update_dirty_scope = !group->pinned_page_dirty_scope;
list_del(&group->next); list_del(&group->next);
kfree(group); kfree(group);
...@@ -2613,7 +2501,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, ...@@ -2613,7 +2501,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
*/ */
if (list_empty(&domain->group_list)) { if (list_empty(&domain->group_list)) {
if (list_is_singular(&iommu->domain_list)) { if (list_is_singular(&iommu->domain_list)) {
if (!iommu->external_domain) { if (list_empty(&iommu->emulated_iommu_groups)) {
WARN_ON(iommu->notifier.head); WARN_ON(iommu->notifier.head);
vfio_iommu_unmap_unpin_all(iommu); vfio_iommu_unmap_unpin_all(iommu);
} else { } else {
...@@ -2677,23 +2565,23 @@ static void *vfio_iommu_type1_open(unsigned long arg) ...@@ -2677,23 +2565,23 @@ static void *vfio_iommu_type1_open(unsigned long arg)
mutex_init(&iommu->lock); mutex_init(&iommu->lock);
BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier); BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier);
init_waitqueue_head(&iommu->vaddr_wait); init_waitqueue_head(&iommu->vaddr_wait);
iommu->pgsize_bitmap = PAGE_MASK;
INIT_LIST_HEAD(&iommu->emulated_iommu_groups);
return iommu; return iommu;
} }
static void vfio_release_domain(struct vfio_domain *domain, bool external) static void vfio_release_domain(struct vfio_domain *domain)
{ {
struct vfio_iommu_group *group, *group_tmp; struct vfio_iommu_group *group, *group_tmp;
list_for_each_entry_safe(group, group_tmp, list_for_each_entry_safe(group, group_tmp,
&domain->group_list, next) { &domain->group_list, next) {
if (!external) iommu_detach_group(domain->domain, group->iommu_group);
vfio_iommu_detach_group(domain, group);
list_del(&group->next); list_del(&group->next);
kfree(group); kfree(group);
} }
if (!external)
iommu_domain_free(domain->domain); iommu_domain_free(domain->domain);
} }
...@@ -2701,17 +2589,19 @@ static void vfio_iommu_type1_release(void *iommu_data) ...@@ -2701,17 +2589,19 @@ static void vfio_iommu_type1_release(void *iommu_data)
{ {
struct vfio_iommu *iommu = iommu_data; struct vfio_iommu *iommu = iommu_data;
struct vfio_domain *domain, *domain_tmp; struct vfio_domain *domain, *domain_tmp;
struct vfio_iommu_group *group, *next_group;
if (iommu->external_domain) { list_for_each_entry_safe(group, next_group,
vfio_release_domain(iommu->external_domain, true); &iommu->emulated_iommu_groups, next) {
kfree(iommu->external_domain); list_del(&group->next);
kfree(group);
} }
vfio_iommu_unmap_unpin_all(iommu); vfio_iommu_unmap_unpin_all(iommu);
list_for_each_entry_safe(domain, domain_tmp, list_for_each_entry_safe(domain, domain_tmp,
&iommu->domain_list, next) { &iommu->domain_list, next) {
vfio_release_domain(domain, false); vfio_release_domain(domain);
list_del(&domain->next); list_del(&domain->next);
kfree(domain); kfree(domain);
} }
......
...@@ -18,7 +18,6 @@ struct mdev_device { ...@@ -18,7 +18,6 @@ struct mdev_device {
void *driver_data; void *driver_data;
struct list_head next; struct list_head next;
struct mdev_type *type; struct mdev_type *type;
struct device *iommu_device;
bool active; bool active;
}; };
...@@ -27,25 +26,6 @@ static inline struct mdev_device *to_mdev_device(struct device *dev) ...@@ -27,25 +26,6 @@ static inline struct mdev_device *to_mdev_device(struct device *dev)
return container_of(dev, struct mdev_device, dev); return container_of(dev, struct mdev_device, dev);
} }
/*
* Called by the parent device driver to set the device which represents
* this mdev in iommu protection scope. By default, the iommu device is
* NULL, that indicates using vendor defined isolation.
*
* @dev: the mediated device that iommu will isolate.
* @iommu_device: a pci device which represents the iommu for @dev.
*/
static inline void mdev_set_iommu_device(struct mdev_device *mdev,
struct device *iommu_device)
{
mdev->iommu_device = iommu_device;
}
static inline struct device *mdev_get_iommu_device(struct mdev_device *mdev)
{
return mdev->iommu_device;
}
unsigned int mdev_get_type_group_id(struct mdev_device *mdev); unsigned int mdev_get_type_group_id(struct mdev_device *mdev);
unsigned int mtype_get_type_group_id(struct mdev_type *mtype); unsigned int mtype_get_type_group_id(struct mdev_type *mtype);
struct device *mtype_get_parent_dev(struct mdev_type *mtype); struct device *mtype_get_parent_dev(struct mdev_type *mtype);
......
...@@ -71,68 +71,17 @@ struct vfio_device_ops { ...@@ -71,68 +71,17 @@ struct vfio_device_ops {
int (*match)(struct vfio_device *vdev, char *buf); int (*match)(struct vfio_device *vdev, char *buf);
}; };
extern struct iommu_group *vfio_iommu_group_get(struct device *dev);
extern void vfio_iommu_group_put(struct iommu_group *group, struct device *dev);
void vfio_init_group_dev(struct vfio_device *device, struct device *dev, void vfio_init_group_dev(struct vfio_device *device, struct device *dev,
const struct vfio_device_ops *ops); const struct vfio_device_ops *ops);
void vfio_uninit_group_dev(struct vfio_device *device); void vfio_uninit_group_dev(struct vfio_device *device);
int vfio_register_group_dev(struct vfio_device *device); int vfio_register_group_dev(struct vfio_device *device);
int vfio_register_emulated_iommu_dev(struct vfio_device *device);
void vfio_unregister_group_dev(struct vfio_device *device); void vfio_unregister_group_dev(struct vfio_device *device);
extern struct vfio_device *vfio_device_get_from_dev(struct device *dev); extern struct vfio_device *vfio_device_get_from_dev(struct device *dev);
extern void vfio_device_put(struct vfio_device *device); extern void vfio_device_put(struct vfio_device *device);
int vfio_assign_device_set(struct vfio_device *device, void *set_id); int vfio_assign_device_set(struct vfio_device *device, void *set_id);
/* events for the backend driver notify callback */
enum vfio_iommu_notify_type {
VFIO_IOMMU_CONTAINER_CLOSE = 0,
};
/**
* struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
*/
struct vfio_iommu_driver_ops {
char *name;
struct module *owner;
void *(*open)(unsigned long arg);
void (*release)(void *iommu_data);
ssize_t (*read)(void *iommu_data, char __user *buf,
size_t count, loff_t *ppos);
ssize_t (*write)(void *iommu_data, const char __user *buf,
size_t count, loff_t *size);
long (*ioctl)(void *iommu_data, unsigned int cmd,
unsigned long arg);
int (*mmap)(void *iommu_data, struct vm_area_struct *vma);
int (*attach_group)(void *iommu_data,
struct iommu_group *group);
void (*detach_group)(void *iommu_data,
struct iommu_group *group);
int (*pin_pages)(void *iommu_data,
struct iommu_group *group,
unsigned long *user_pfn,
int npage, int prot,
unsigned long *phys_pfn);
int (*unpin_pages)(void *iommu_data,
unsigned long *user_pfn, int npage);
int (*register_notifier)(void *iommu_data,
unsigned long *events,
struct notifier_block *nb);
int (*unregister_notifier)(void *iommu_data,
struct notifier_block *nb);
int (*dma_rw)(void *iommu_data, dma_addr_t user_iova,
void *data, size_t count, bool write);
struct iommu_domain *(*group_iommu_domain)(void *iommu_data,
struct iommu_group *group);
void (*notify)(void *iommu_data,
enum vfio_iommu_notify_type event);
};
extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
extern void vfio_unregister_iommu_driver(
const struct vfio_iommu_driver_ops *ops);
/* /*
* External user API * External user API
*/ */
......
...@@ -553,7 +553,7 @@ static int mbochs_probe(struct mdev_device *mdev) ...@@ -553,7 +553,7 @@ static int mbochs_probe(struct mdev_device *mdev)
mbochs_create_config_space(mdev_state); mbochs_create_config_space(mdev_state);
mbochs_reset(mdev_state); mbochs_reset(mdev_state);
ret = vfio_register_group_dev(&mdev_state->vdev); ret = vfio_register_emulated_iommu_dev(&mdev_state->vdev);
if (ret) if (ret)
goto err_mem; goto err_mem;
dev_set_drvdata(&mdev->dev, mdev_state); dev_set_drvdata(&mdev->dev, mdev_state);
......
...@@ -258,7 +258,7 @@ static int mdpy_probe(struct mdev_device *mdev) ...@@ -258,7 +258,7 @@ static int mdpy_probe(struct mdev_device *mdev)
mdpy_count++; mdpy_count++;
ret = vfio_register_group_dev(&mdev_state->vdev); ret = vfio_register_emulated_iommu_dev(&mdev_state->vdev);
if (ret) if (ret)
goto err_mem; goto err_mem;
dev_set_drvdata(&mdev->dev, mdev_state); dev_set_drvdata(&mdev->dev, mdev_state);
......
...@@ -741,7 +741,7 @@ static int mtty_probe(struct mdev_device *mdev) ...@@ -741,7 +741,7 @@ static int mtty_probe(struct mdev_device *mdev)
mtty_create_config_space(mdev_state); mtty_create_config_space(mdev_state);
ret = vfio_register_group_dev(&mdev_state->vdev); ret = vfio_register_emulated_iommu_dev(&mdev_state->vdev);
if (ret) if (ret)
goto err_vconfig; goto err_vconfig;
dev_set_drvdata(&mdev->dev, mdev_state); dev_set_drvdata(&mdev->dev, mdev_state);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment