Commit 467590e0 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'vfio-v4.18-rc1' of git://github.com/awilliam/linux-vfio

Pull VFIO updates from Alex Williamson:

 - Bind type1 task tracking to group_leader to facilitate vCPU hotplug
   in QEMU (Alex Williamson)

 - Sample mdev display drivers, including region-based host and guest
   Linux drivers and bochs compatible dmabuf device
   (Gerd Hoffmann)

 - Fix vfio-platform reset module leak (Geert Uytterhoeven)

 - vfio-platform error message consistency (Geert Uytterhoeven)

 - Global checking for mdev uuid collisions rather than per parent
   device (Alex Williamson)

 - Use match_string() helper (Yisheng Xie)

 - vfio-platform PM domain fixes (Geert Uytterhoeven)

 - Fix sample mbochs driver build dependency (Arnd Bergmann)

* tag 'vfio-v4.18-rc1' of git://github.com/awilliam/linux-vfio:
  samples: mbochs: add DMA_SHARED_BUFFER dependency
  vfio: platform: Fix using devices in PM Domains
  vfio: use match_string() helper
  vfio/mdev: Re-order sysfs attribute creation
  vfio/mdev: Check globally for duplicate devices
  vfio: platform: Make printed error messages more consistent
  vfio: platform: Fix reset module leak in error path
  sample: vfio bochs vbe display (host device for bochs-drm)
  sample: vfio mdev display - guest driver
  sample: vfio mdev display - host device
  vfio/type1: Fix task tracking for QEMU vCPU hotplug
parents 763f9694 c1abca96
...@@ -145,6 +145,11 @@ The functions in the mdev_parent_ops structure are as follows: ...@@ -145,6 +145,11 @@ The functions in the mdev_parent_ops structure are as follows:
* create: allocate basic resources in a driver for a mediated device * create: allocate basic resources in a driver for a mediated device
* remove: free resources in a driver when a mediated device is destroyed * remove: free resources in a driver when a mediated device is destroyed
(Note that mdev-core provides no implicit serialization of create/remove
callbacks per mdev parent device, per mdev type, or any other categorization.
Vendor drivers are expected to be fully asynchronous in this respect or
provide their own internal resource protection.)
The callbacks in the mdev_parent_ops structure are as follows: The callbacks in the mdev_parent_ops structure are as follows:
* open: open callback of mediated device * open: open callback of mediated device
......
...@@ -66,34 +66,6 @@ uuid_le mdev_uuid(struct mdev_device *mdev) ...@@ -66,34 +66,6 @@ uuid_le mdev_uuid(struct mdev_device *mdev)
} }
EXPORT_SYMBOL(mdev_uuid); EXPORT_SYMBOL(mdev_uuid);
static int _find_mdev_device(struct device *dev, void *data)
{
struct mdev_device *mdev;
if (!dev_is_mdev(dev))
return 0;
mdev = to_mdev_device(dev);
if (uuid_le_cmp(mdev->uuid, *(uuid_le *)data) == 0)
return 1;
return 0;
}
static bool mdev_device_exist(struct mdev_parent *parent, uuid_le uuid)
{
struct device *dev;
dev = device_find_child(parent->dev, &uuid, _find_mdev_device);
if (dev) {
put_device(dev);
return true;
}
return false;
}
/* Should be called holding parent_list_lock */ /* Should be called holding parent_list_lock */
static struct mdev_parent *__find_parent_device(struct device *dev) static struct mdev_parent *__find_parent_device(struct device *dev)
{ {
...@@ -221,7 +193,6 @@ int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops) ...@@ -221,7 +193,6 @@ int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops)
} }
kref_init(&parent->ref); kref_init(&parent->ref);
mutex_init(&parent->lock);
parent->dev = dev; parent->dev = dev;
parent->ops = ops; parent->ops = ops;
...@@ -297,6 +268,10 @@ static void mdev_device_release(struct device *dev) ...@@ -297,6 +268,10 @@ static void mdev_device_release(struct device *dev)
{ {
struct mdev_device *mdev = to_mdev_device(dev); struct mdev_device *mdev = to_mdev_device(dev);
mutex_lock(&mdev_list_lock);
list_del(&mdev->next);
mutex_unlock(&mdev_list_lock);
dev_dbg(&mdev->dev, "MDEV: destroying\n"); dev_dbg(&mdev->dev, "MDEV: destroying\n");
kfree(mdev); kfree(mdev);
} }
...@@ -304,7 +279,7 @@ static void mdev_device_release(struct device *dev) ...@@ -304,7 +279,7 @@ static void mdev_device_release(struct device *dev)
int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid) int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
{ {
int ret; int ret;
struct mdev_device *mdev; struct mdev_device *mdev, *tmp;
struct mdev_parent *parent; struct mdev_parent *parent;
struct mdev_type *type = to_mdev_type(kobj); struct mdev_type *type = to_mdev_type(kobj);
...@@ -312,21 +287,28 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid) ...@@ -312,21 +287,28 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
if (!parent) if (!parent)
return -EINVAL; return -EINVAL;
mutex_lock(&parent->lock); mutex_lock(&mdev_list_lock);
/* Check for duplicate */ /* Check for duplicate */
if (mdev_device_exist(parent, uuid)) { list_for_each_entry(tmp, &mdev_list, next) {
if (!uuid_le_cmp(tmp->uuid, uuid)) {
mutex_unlock(&mdev_list_lock);
ret = -EEXIST; ret = -EEXIST;
goto create_err; goto mdev_fail;
}
} }
mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
if (!mdev) { if (!mdev) {
mutex_unlock(&mdev_list_lock);
ret = -ENOMEM; ret = -ENOMEM;
goto create_err; goto mdev_fail;
} }
memcpy(&mdev->uuid, &uuid, sizeof(uuid_le)); memcpy(&mdev->uuid, &uuid, sizeof(uuid_le));
list_add(&mdev->next, &mdev_list);
mutex_unlock(&mdev_list_lock);
mdev->parent = parent; mdev->parent = parent;
kref_init(&mdev->ref); kref_init(&mdev->ref);
...@@ -338,35 +320,28 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid) ...@@ -338,35 +320,28 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
ret = device_register(&mdev->dev); ret = device_register(&mdev->dev);
if (ret) { if (ret) {
put_device(&mdev->dev); put_device(&mdev->dev);
goto create_err; goto mdev_fail;
} }
ret = mdev_device_create_ops(kobj, mdev); ret = mdev_device_create_ops(kobj, mdev);
if (ret) if (ret)
goto create_failed; goto create_fail;
ret = mdev_create_sysfs_files(&mdev->dev, type); ret = mdev_create_sysfs_files(&mdev->dev, type);
if (ret) { if (ret) {
mdev_device_remove_ops(mdev, true); mdev_device_remove_ops(mdev, true);
goto create_failed; goto create_fail;
} }
mdev->type_kobj = kobj; mdev->type_kobj = kobj;
mdev->active = true;
dev_dbg(&mdev->dev, "MDEV: created\n"); dev_dbg(&mdev->dev, "MDEV: created\n");
mutex_unlock(&parent->lock); return 0;
mutex_lock(&mdev_list_lock);
list_add(&mdev->next, &mdev_list);
mutex_unlock(&mdev_list_lock);
return ret;
create_failed: create_fail:
device_unregister(&mdev->dev); device_unregister(&mdev->dev);
mdev_fail:
create_err:
mutex_unlock(&parent->lock);
mdev_put_parent(parent); mdev_put_parent(parent);
return ret; return ret;
} }
...@@ -377,44 +352,39 @@ int mdev_device_remove(struct device *dev, bool force_remove) ...@@ -377,44 +352,39 @@ int mdev_device_remove(struct device *dev, bool force_remove)
struct mdev_parent *parent; struct mdev_parent *parent;
struct mdev_type *type; struct mdev_type *type;
int ret; int ret;
bool found = false;
mdev = to_mdev_device(dev); mdev = to_mdev_device(dev);
mutex_lock(&mdev_list_lock); mutex_lock(&mdev_list_lock);
list_for_each_entry(tmp, &mdev_list, next) { list_for_each_entry(tmp, &mdev_list, next) {
if (tmp == mdev) { if (tmp == mdev)
found = true;
break; break;
} }
}
if (found) if (tmp != mdev) {
list_del(&mdev->next); mutex_unlock(&mdev_list_lock);
return -ENODEV;
}
if (!mdev->active) {
mutex_unlock(&mdev_list_lock); mutex_unlock(&mdev_list_lock);
return -EAGAIN;
}
if (!found) mdev->active = false;
return -ENODEV; mutex_unlock(&mdev_list_lock);
type = to_mdev_type(mdev->type_kobj); type = to_mdev_type(mdev->type_kobj);
parent = mdev->parent; parent = mdev->parent;
mutex_lock(&parent->lock);
ret = mdev_device_remove_ops(mdev, force_remove); ret = mdev_device_remove_ops(mdev, force_remove);
if (ret) { if (ret) {
mutex_unlock(&parent->lock); mdev->active = true;
mutex_lock(&mdev_list_lock);
list_add(&mdev->next, &mdev_list);
mutex_unlock(&mdev_list_lock);
return ret; return ret;
} }
mdev_remove_sysfs_files(dev, type); mdev_remove_sysfs_files(dev, type);
device_unregister(dev); device_unregister(dev);
mutex_unlock(&parent->lock);
mdev_put_parent(parent); mdev_put_parent(parent);
return 0; return 0;
......
...@@ -20,7 +20,6 @@ struct mdev_parent { ...@@ -20,7 +20,6 @@ struct mdev_parent {
struct device *dev; struct device *dev;
const struct mdev_parent_ops *ops; const struct mdev_parent_ops *ops;
struct kref ref; struct kref ref;
struct mutex lock;
struct list_head next; struct list_head next;
struct kset *mdev_types_kset; struct kset *mdev_types_kset;
struct list_head type_list; struct list_head type_list;
...@@ -34,6 +33,7 @@ struct mdev_device { ...@@ -34,6 +33,7 @@ struct mdev_device {
struct kref ref; struct kref ref;
struct list_head next; struct list_head next;
struct kobject *type_kobj; struct kobject *type_kobj;
bool active;
}; };
#define to_mdev_device(dev) container_of(dev, struct mdev_device, dev) #define to_mdev_device(dev) container_of(dev, struct mdev_device, dev)
......
...@@ -257,24 +257,24 @@ int mdev_create_sysfs_files(struct device *dev, struct mdev_type *type) ...@@ -257,24 +257,24 @@ int mdev_create_sysfs_files(struct device *dev, struct mdev_type *type)
{ {
int ret; int ret;
ret = sysfs_create_files(&dev->kobj, mdev_device_attrs);
if (ret)
return ret;
ret = sysfs_create_link(type->devices_kobj, &dev->kobj, dev_name(dev)); ret = sysfs_create_link(type->devices_kobj, &dev->kobj, dev_name(dev));
if (ret) if (ret)
goto device_link_failed; return ret;
ret = sysfs_create_link(&dev->kobj, &type->kobj, "mdev_type"); ret = sysfs_create_link(&dev->kobj, &type->kobj, "mdev_type");
if (ret) if (ret)
goto type_link_failed; goto type_link_failed;
ret = sysfs_create_files(&dev->kobj, mdev_device_attrs);
if (ret)
goto create_files_failed;
return ret; return ret;
create_files_failed:
sysfs_remove_link(&dev->kobj, "mdev_type");
type_link_failed: type_link_failed:
sysfs_remove_link(type->devices_kobj, dev_name(dev)); sysfs_remove_link(type->devices_kobj, dev_name(dev));
device_link_failed:
sysfs_remove_files(&dev->kobj, mdev_device_attrs);
return ret; return ret;
} }
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <linux/iommu.h> #include <linux/iommu.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/pm_runtime.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
...@@ -239,6 +240,7 @@ static void vfio_platform_release(void *device_data) ...@@ -239,6 +240,7 @@ static void vfio_platform_release(void *device_data)
ret, extra_dbg ? extra_dbg : ""); ret, extra_dbg ? extra_dbg : "");
WARN_ON(1); WARN_ON(1);
} }
pm_runtime_put(vdev->device);
vfio_platform_regions_cleanup(vdev); vfio_platform_regions_cleanup(vdev);
vfio_platform_irq_cleanup(vdev); vfio_platform_irq_cleanup(vdev);
} }
...@@ -269,6 +271,10 @@ static int vfio_platform_open(void *device_data) ...@@ -269,6 +271,10 @@ static int vfio_platform_open(void *device_data)
if (ret) if (ret)
goto err_irq; goto err_irq;
ret = pm_runtime_get_sync(vdev->device);
if (ret < 0)
goto err_pm;
ret = vfio_platform_call_reset(vdev, &extra_dbg); ret = vfio_platform_call_reset(vdev, &extra_dbg);
if (ret && vdev->reset_required) { if (ret && vdev->reset_required) {
dev_warn(vdev->device, "reset driver is required and reset call failed in open (%d) %s\n", dev_warn(vdev->device, "reset driver is required and reset call failed in open (%d) %s\n",
...@@ -283,6 +289,8 @@ static int vfio_platform_open(void *device_data) ...@@ -283,6 +289,8 @@ static int vfio_platform_open(void *device_data)
return 0; return 0;
err_rst: err_rst:
pm_runtime_put(vdev->device);
err_pm:
vfio_platform_irq_cleanup(vdev); vfio_platform_irq_cleanup(vdev);
err_irq: err_irq:
vfio_platform_regions_cleanup(vdev); vfio_platform_regions_cleanup(vdev);
...@@ -630,8 +638,7 @@ static int vfio_platform_of_probe(struct vfio_platform_device *vdev, ...@@ -630,8 +638,7 @@ static int vfio_platform_of_probe(struct vfio_platform_device *vdev,
ret = device_property_read_string(dev, "compatible", ret = device_property_read_string(dev, "compatible",
&vdev->compat); &vdev->compat);
if (ret) if (ret)
pr_err("VFIO: cannot retrieve compat for %s\n", pr_err("VFIO: Cannot retrieve compat for %s\n", vdev->name);
vdev->name);
return ret; return ret;
} }
...@@ -673,7 +680,7 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev, ...@@ -673,7 +680,7 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev,
ret = vfio_platform_get_reset(vdev); ret = vfio_platform_get_reset(vdev);
if (ret && vdev->reset_required) { if (ret && vdev->reset_required) {
pr_err("vfio: no reset function found for device %s\n", pr_err("VFIO: No reset function found for device %s\n",
vdev->name); vdev->name);
return ret; return ret;
} }
...@@ -681,18 +688,24 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev, ...@@ -681,18 +688,24 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev,
group = vfio_iommu_group_get(dev); group = vfio_iommu_group_get(dev);
if (!group) { if (!group) {
pr_err("VFIO: No IOMMU group for device %s\n", vdev->name); pr_err("VFIO: No IOMMU group for device %s\n", vdev->name);
return -EINVAL; ret = -EINVAL;
goto put_reset;
} }
ret = vfio_add_group_dev(dev, &vfio_platform_ops, vdev); ret = vfio_add_group_dev(dev, &vfio_platform_ops, vdev);
if (ret) { if (ret)
vfio_iommu_group_put(group, dev); goto put_iommu;
return ret;
}
mutex_init(&vdev->igate); mutex_init(&vdev->igate);
pm_runtime_enable(vdev->device);
return 0; return 0;
put_iommu:
vfio_iommu_group_put(group, dev);
put_reset:
vfio_platform_put_reset(vdev);
return ret;
} }
EXPORT_SYMBOL_GPL(vfio_platform_probe_common); EXPORT_SYMBOL_GPL(vfio_platform_probe_common);
...@@ -703,6 +716,7 @@ struct vfio_platform_device *vfio_platform_remove_common(struct device *dev) ...@@ -703,6 +716,7 @@ struct vfio_platform_device *vfio_platform_remove_common(struct device *dev)
vdev = vfio_del_group_dev(dev); vdev = vfio_del_group_dev(dev);
if (vdev) { if (vdev) {
pm_runtime_disable(vdev->device);
vfio_platform_put_reset(vdev); vfio_platform_put_reset(vdev);
vfio_iommu_group_put(dev->iommu_group, dev); vfio_iommu_group_put(dev->iommu_group, dev);
} }
......
...@@ -630,8 +630,6 @@ static const char * const vfio_driver_whitelist[] = { "pci-stub" }; ...@@ -630,8 +630,6 @@ static const char * const vfio_driver_whitelist[] = { "pci-stub" };
static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv) static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv)
{ {
int i;
if (dev_is_pci(dev)) { if (dev_is_pci(dev)) {
struct pci_dev *pdev = to_pci_dev(dev); struct pci_dev *pdev = to_pci_dev(dev);
...@@ -639,12 +637,9 @@ static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv) ...@@ -639,12 +637,9 @@ static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv)
return true; return true;
} }
for (i = 0; i < ARRAY_SIZE(vfio_driver_whitelist); i++) { return match_string(vfio_driver_whitelist,
if (!strcmp(drv->name, vfio_driver_whitelist[i])) ARRAY_SIZE(vfio_driver_whitelist),
return true; drv->name) >= 0;
}
return false;
} }
/* /*
......
...@@ -83,6 +83,7 @@ struct vfio_dma { ...@@ -83,6 +83,7 @@ struct vfio_dma {
size_t size; /* Map size (bytes) */ size_t size; /* Map size (bytes) */
int prot; /* IOMMU_READ/WRITE */ int prot; /* IOMMU_READ/WRITE */
bool iommu_mapped; bool iommu_mapped;
bool lock_cap; /* capable(CAP_IPC_LOCK) */
struct task_struct *task; struct task_struct *task;
struct rb_root pfn_list; /* Ex-user pinned pfn list */ struct rb_root pfn_list; /* Ex-user pinned pfn list */
}; };
...@@ -253,29 +254,25 @@ static int vfio_iova_put_vfio_pfn(struct vfio_dma *dma, struct vfio_pfn *vpfn) ...@@ -253,29 +254,25 @@ static int vfio_iova_put_vfio_pfn(struct vfio_dma *dma, struct vfio_pfn *vpfn)
return ret; return ret;
} }
static int vfio_lock_acct(struct task_struct *task, long npage, bool *lock_cap) static int vfio_lock_acct(struct vfio_dma *dma, long npage, bool async)
{ {
struct mm_struct *mm; struct mm_struct *mm;
bool is_current;
int ret; int ret;
if (!npage) if (!npage)
return 0; return 0;
is_current = (task->mm == current->mm); mm = async ? get_task_mm(dma->task) : dma->task->mm;
mm = is_current ? task->mm : get_task_mm(task);
if (!mm) if (!mm)
return -ESRCH; /* process exited */ return -ESRCH; /* process exited */
ret = down_write_killable(&mm->mmap_sem); ret = down_write_killable(&mm->mmap_sem);
if (!ret) { if (!ret) {
if (npage > 0) { if (npage > 0) {
if (lock_cap ? !*lock_cap : if (!dma->lock_cap) {
!has_capability(task, CAP_IPC_LOCK)) {
unsigned long limit; unsigned long limit;
limit = task_rlimit(task, limit = task_rlimit(dma->task,
RLIMIT_MEMLOCK) >> PAGE_SHIFT; RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if (mm->locked_vm + npage > limit) if (mm->locked_vm + npage > limit)
...@@ -289,7 +286,7 @@ static int vfio_lock_acct(struct task_struct *task, long npage, bool *lock_cap) ...@@ -289,7 +286,7 @@ static int vfio_lock_acct(struct task_struct *task, long npage, bool *lock_cap)
up_write(&mm->mmap_sem); up_write(&mm->mmap_sem);
} }
if (!is_current) if (async)
mmput(mm); mmput(mm);
return ret; return ret;
...@@ -400,7 +397,7 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, ...@@ -400,7 +397,7 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
*/ */
static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
long npage, unsigned long *pfn_base, long npage, unsigned long *pfn_base,
bool lock_cap, unsigned long limit) unsigned long limit)
{ {
unsigned long pfn = 0; unsigned long pfn = 0;
long ret, pinned = 0, lock_acct = 0; long ret, pinned = 0, lock_acct = 0;
...@@ -423,7 +420,7 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, ...@@ -423,7 +420,7 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
* pages are already counted against the user. * pages are already counted against the user.
*/ */
if (!rsvd && !vfio_find_vpfn(dma, iova)) { if (!rsvd && !vfio_find_vpfn(dma, iova)) {
if (!lock_cap && current->mm->locked_vm + 1 > limit) { if (!dma->lock_cap && current->mm->locked_vm + 1 > limit) {
put_pfn(*pfn_base, dma->prot); put_pfn(*pfn_base, dma->prot);
pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__, pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,
limit << PAGE_SHIFT); limit << PAGE_SHIFT);
...@@ -449,7 +446,7 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, ...@@ -449,7 +446,7 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
} }
if (!rsvd && !vfio_find_vpfn(dma, iova)) { if (!rsvd && !vfio_find_vpfn(dma, iova)) {
if (!lock_cap && if (!dma->lock_cap &&
current->mm->locked_vm + lock_acct + 1 > limit) { current->mm->locked_vm + lock_acct + 1 > limit) {
put_pfn(pfn, dma->prot); put_pfn(pfn, dma->prot);
pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
...@@ -462,7 +459,7 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, ...@@ -462,7 +459,7 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
} }
out: out:
ret = vfio_lock_acct(current, lock_acct, &lock_cap); ret = vfio_lock_acct(dma, lock_acct, false);
unpin_out: unpin_out:
if (ret) { if (ret) {
...@@ -493,7 +490,7 @@ static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova, ...@@ -493,7 +490,7 @@ static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova,
} }
if (do_accounting) if (do_accounting)
vfio_lock_acct(dma->task, locked - unlocked, NULL); vfio_lock_acct(dma, locked - unlocked, true);
return unlocked; return unlocked;
} }
...@@ -510,7 +507,7 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr, ...@@ -510,7 +507,7 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
ret = vaddr_get_pfn(mm, vaddr, dma->prot, pfn_base); ret = vaddr_get_pfn(mm, vaddr, dma->prot, pfn_base);
if (!ret && do_accounting && !is_invalid_reserved_pfn(*pfn_base)) { if (!ret && do_accounting && !is_invalid_reserved_pfn(*pfn_base)) {
ret = vfio_lock_acct(dma->task, 1, NULL); ret = vfio_lock_acct(dma, 1, true);
if (ret) { if (ret) {
put_pfn(*pfn_base, dma->prot); put_pfn(*pfn_base, dma->prot);
if (ret == -ENOMEM) if (ret == -ENOMEM)
...@@ -537,7 +534,7 @@ static int vfio_unpin_page_external(struct vfio_dma *dma, dma_addr_t iova, ...@@ -537,7 +534,7 @@ static int vfio_unpin_page_external(struct vfio_dma *dma, dma_addr_t iova,
unlocked = vfio_iova_put_vfio_pfn(dma, vpfn); unlocked = vfio_iova_put_vfio_pfn(dma, vpfn);
if (do_accounting) if (do_accounting)
vfio_lock_acct(dma->task, -unlocked, NULL); vfio_lock_acct(dma, -unlocked, true);
return unlocked; return unlocked;
} }
...@@ -829,7 +826,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, ...@@ -829,7 +826,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
unlocked += vfio_sync_unpin(dma, domain, &unmapped_region_list); unlocked += vfio_sync_unpin(dma, domain, &unmapped_region_list);
if (do_accounting) { if (do_accounting) {
vfio_lock_acct(dma->task, -unlocked, NULL); vfio_lock_acct(dma, -unlocked, true);
return 0; return 0;
} }
return unlocked; return unlocked;
...@@ -1044,14 +1041,12 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma, ...@@ -1044,14 +1041,12 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma,
size_t size = map_size; size_t size = map_size;
long npage; long npage;
unsigned long pfn, limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; unsigned long pfn, limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
bool lock_cap = capable(CAP_IPC_LOCK);
int ret = 0; int ret = 0;
while (size) { while (size) {
/* Pin a contiguous chunk of memory */ /* Pin a contiguous chunk of memory */
npage = vfio_pin_pages_remote(dma, vaddr + dma->size, npage = vfio_pin_pages_remote(dma, vaddr + dma->size,
size >> PAGE_SHIFT, &pfn, size >> PAGE_SHIFT, &pfn, limit);
lock_cap, limit);
if (npage <= 0) { if (npage <= 0) {
WARN_ON(!npage); WARN_ON(!npage);
ret = (int)npage; ret = (int)npage;
...@@ -1126,8 +1121,36 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, ...@@ -1126,8 +1121,36 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
dma->iova = iova; dma->iova = iova;
dma->vaddr = vaddr; dma->vaddr = vaddr;
dma->prot = prot; dma->prot = prot;
get_task_struct(current);
dma->task = current; /*
* We need to be able to both add to a task's locked memory and test
* against the locked memory limit and we need to be able to do both
* outside of this call path as pinning can be asynchronous via the
* external interfaces for mdev devices. RLIMIT_MEMLOCK requires a
* task_struct and VM locked pages requires an mm_struct, however
* holding an indefinite mm reference is not recommended, therefore we
* only hold a reference to a task. We could hold a reference to
* current, however QEMU uses this call path through vCPU threads,
* which can be killed resulting in a NULL mm and failure in the unmap
* path when called via a different thread. Avoid this problem by
* using the group_leader as threads within the same group require
* both CLONE_THREAD and CLONE_VM and will therefore use the same
* mm_struct.
*
* Previously we also used the task for testing CAP_IPC_LOCK at the
* time of pinning and accounting, however has_capability() makes use
* of real_cred, a copy-on-write field, so we can't guarantee that it
* matches group_leader, or in fact that it might not change by the
* time it's evaluated. If a process were to call MAP_DMA with
* CAP_IPC_LOCK but later drop it, it doesn't make sense that they
* possibly see different results for an iommu_mapped vfio_dma vs
* externally mapped. Therefore track CAP_IPC_LOCK in vfio_dma at the
* time of calling MAP_DMA.
*/
get_task_struct(current->group_leader);
dma->task = current->group_leader;
dma->lock_cap = capable(CAP_IPC_LOCK);
dma->pfn_list = RB_ROOT; dma->pfn_list = RB_ROOT;
/* Insert zero-sized and grow as we map chunks of it */ /* Insert zero-sized and grow as we map chunks of it */
...@@ -1162,7 +1185,6 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, ...@@ -1162,7 +1185,6 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
struct vfio_domain *d; struct vfio_domain *d;
struct rb_node *n; struct rb_node *n;
unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
bool lock_cap = capable(CAP_IPC_LOCK);
int ret; int ret;
/* Arbitrarily pick the first domain in the list for lookups */ /* Arbitrarily pick the first domain in the list for lookups */
...@@ -1209,8 +1231,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, ...@@ -1209,8 +1231,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
npage = vfio_pin_pages_remote(dma, vaddr, npage = vfio_pin_pages_remote(dma, vaddr,
n >> PAGE_SHIFT, n >> PAGE_SHIFT,
&pfn, lock_cap, &pfn, limit);
limit);
if (npage <= 0) { if (npage <= 0) {
WARN_ON(!npage); WARN_ON(!npage);
ret = (int)npage; ret = (int)npage;
...@@ -1487,7 +1508,7 @@ static void vfio_iommu_unmap_unpin_reaccount(struct vfio_iommu *iommu) ...@@ -1487,7 +1508,7 @@ static void vfio_iommu_unmap_unpin_reaccount(struct vfio_iommu *iommu)
if (!is_invalid_reserved_pfn(vpfn->pfn)) if (!is_invalid_reserved_pfn(vpfn->pfn))
locked++; locked++;
} }
vfio_lock_acct(dma->task, locked - unlocked, NULL); vfio_lock_acct(dma, locked - unlocked, true);
} }
} }
......
...@@ -115,6 +115,37 @@ config SAMPLE_VFIO_MDEV_MTTY ...@@ -115,6 +115,37 @@ config SAMPLE_VFIO_MDEV_MTTY
Build a virtual tty sample driver for use as a VFIO Build a virtual tty sample driver for use as a VFIO
mediated device mediated device
config SAMPLE_VFIO_MDEV_MDPY
tristate "Build VFIO mdpy example mediated device sample code -- loadable modules only"
depends on VFIO_MDEV_DEVICE && m
help
Build a virtual display sample driver for use as a VFIO
mediated device. It is a simple framebuffer and supports
the region display interface (VFIO_GFX_PLANE_TYPE_REGION).
config SAMPLE_VFIO_MDEV_MDPY_FB
tristate "Build VFIO mdpy example guest fbdev driver -- loadable module only"
depends on FB && m
select FB_CFB_FILLRECT
select FB_CFB_COPYAREA
select FB_CFB_IMAGEBLIT
help
Guest fbdev driver for the virtual display sample driver.
config SAMPLE_VFIO_MDEV_MBOCHS
tristate "Build VFIO mdpy example mediated device sample code -- loadable modules only"
depends on VFIO_MDEV_DEVICE && m
select DMA_SHARED_BUFFER
help
Build a virtual display sample driver for use as a VFIO
mediated device. It supports the region display interface
(VFIO_GFX_PLANE_TYPE_DMABUF).
Emulate enough of qemu stdvga to make bochs-drm.ko happy.
That is basically the vram memory bar and the bochs dispi
interface vbe registers in the mmio register bar.
Specifically it does *not* include any legacy vga stuff.
Device looks a lot like "qemu -device secondary-vga".
config SAMPLE_STATX config SAMPLE_STATX
bool "Build example extended-stat using code" bool "Build example extended-stat using code"
depends on BROKEN depends on BROKEN
......
obj-$(CONFIG_SAMPLE_VFIO_MDEV_MTTY) += mtty.o obj-$(CONFIG_SAMPLE_VFIO_MDEV_MTTY) += mtty.o
obj-$(CONFIG_SAMPLE_VFIO_MDEV_MDPY) += mdpy.o
obj-$(CONFIG_SAMPLE_VFIO_MDEV_MDPY_FB) += mdpy-fb.o
obj-$(CONFIG_SAMPLE_VFIO_MDEV_MBOCHS) += mbochs.o
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* Simple pci display device.
*
* Framebuffer memory is pci bar 0.
* Configuration (read-only) is in pci config space.
* Format field uses drm fourcc codes.
* ATM only DRM_FORMAT_XRGB8888 is supported.
*/
/* pci ids */
#define MDPY_PCI_VENDOR_ID 0x1b36 /* redhat */
#define MDPY_PCI_DEVICE_ID 0x000f
#define MDPY_PCI_SUBVENDOR_ID PCI_SUBVENDOR_ID_REDHAT_QUMRANET
#define MDPY_PCI_SUBDEVICE_ID PCI_SUBDEVICE_ID_QEMU
/* pci cfg space offsets for fb config (dword) */
#define MDPY_VENDORCAP_OFFSET 0x40
#define MDPY_VENDORCAP_SIZE 0x10
#define MDPY_FORMAT_OFFSET (MDPY_VENDORCAP_OFFSET + 0x04)
#define MDPY_WIDTH_OFFSET (MDPY_VENDORCAP_OFFSET + 0x08)
#define MDPY_HEIGHT_OFFSET (MDPY_VENDORCAP_OFFSET + 0x0c)
// SPDX-License-Identifier: GPL-2.0
/*
* Framebuffer driver for mdpy (mediated virtual pci display device).
*
* See mdpy-defs.h for device specs
*
* (c) Gerd Hoffmann <kraxel@redhat.com>
*
* Using some code snippets from simplefb and cirrusfb.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <linux/errno.h>
#include <linux/fb.h>
#include <linux/io.h>
#include <linux/pci.h>
#include <linux/module.h>
#include <drm/drm_fourcc.h>
#include "mdpy-defs.h"
static const struct fb_fix_screeninfo mdpy_fb_fix = {
.id = "mdpy-fb",
.type = FB_TYPE_PACKED_PIXELS,
.visual = FB_VISUAL_TRUECOLOR,
.accel = FB_ACCEL_NONE,
};
static const struct fb_var_screeninfo mdpy_fb_var = {
.height = -1,
.width = -1,
.activate = FB_ACTIVATE_NOW,
.vmode = FB_VMODE_NONINTERLACED,
.bits_per_pixel = 32,
.transp.offset = 24,
.red.offset = 16,
.green.offset = 8,
.blue.offset = 0,
.transp.length = 8,
.red.length = 8,
.green.length = 8,
.blue.length = 8,
};
#define PSEUDO_PALETTE_SIZE 16
struct mdpy_fb_par {
u32 palette[PSEUDO_PALETTE_SIZE];
};
static int mdpy_fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
u_int transp, struct fb_info *info)
{
u32 *pal = info->pseudo_palette;
u32 cr = red >> (16 - info->var.red.length);
u32 cg = green >> (16 - info->var.green.length);
u32 cb = blue >> (16 - info->var.blue.length);
u32 value, mask;
if (regno >= PSEUDO_PALETTE_SIZE)
return -EINVAL;
value = (cr << info->var.red.offset) |
(cg << info->var.green.offset) |
(cb << info->var.blue.offset);
if (info->var.transp.length > 0) {
mask = (1 << info->var.transp.length) - 1;
mask <<= info->var.transp.offset;
value |= mask;
}
pal[regno] = value;
return 0;
}
static void mdpy_fb_destroy(struct fb_info *info)
{
if (info->screen_base)
iounmap(info->screen_base);
}
static struct fb_ops mdpy_fb_ops = {
.owner = THIS_MODULE,
.fb_destroy = mdpy_fb_destroy,
.fb_setcolreg = mdpy_fb_setcolreg,
.fb_fillrect = cfb_fillrect,
.fb_copyarea = cfb_copyarea,
.fb_imageblit = cfb_imageblit,
};
static int mdpy_fb_probe(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
struct fb_info *info;
struct mdpy_fb_par *par;
u32 format, width, height;
int ret;
ret = pci_enable_device(pdev);
if (ret < 0)
return ret;
ret = pci_request_regions(pdev, "mdpy-fb");
if (ret < 0)
return ret;
pci_read_config_dword(pdev, MDPY_FORMAT_OFFSET, &format);
pci_read_config_dword(pdev, MDPY_WIDTH_OFFSET, &width);
pci_read_config_dword(pdev, MDPY_HEIGHT_OFFSET, &height);
if (format != DRM_FORMAT_XRGB8888) {
pci_err(pdev, "format mismatch (0x%x != 0x%x)\n",
format, DRM_FORMAT_XRGB8888);
return -EINVAL;
}
if (width < 100 || width > 10000) {
pci_err(pdev, "width (%d) out of range\n", width);
return -EINVAL;
}
if (height < 100 || height > 10000) {
pci_err(pdev, "height (%d) out of range\n", height);
return -EINVAL;
}
pci_info(pdev, "mdpy found: %dx%d framebuffer\n",
width, height);
info = framebuffer_alloc(sizeof(struct mdpy_fb_par), &pdev->dev);
if (!info)
goto err_release_regions;
pci_set_drvdata(pdev, info);
par = info->par;
info->fix = mdpy_fb_fix;
info->fix.smem_start = pci_resource_start(pdev, 0);
info->fix.smem_len = pci_resource_len(pdev, 0);
info->fix.line_length = width * 4;
info->var = mdpy_fb_var;
info->var.xres = width;
info->var.yres = height;
info->var.xres_virtual = width;
info->var.yres_virtual = height;
info->screen_size = info->fix.smem_len;
info->screen_base = ioremap(info->fix.smem_start,
info->screen_size);
if (!info->screen_base) {
pci_err(pdev, "ioremap(pcibar) failed\n");
ret = -EIO;
goto err_release_fb;
}
info->apertures = alloc_apertures(1);
if (!info->apertures) {
ret = -ENOMEM;
goto err_unmap;
}
info->apertures->ranges[0].base = info->fix.smem_start;
info->apertures->ranges[0].size = info->fix.smem_len;
info->fbops = &mdpy_fb_ops;
info->flags = FBINFO_DEFAULT;
info->pseudo_palette = par->palette;
ret = register_framebuffer(info);
if (ret < 0) {
pci_err(pdev, "mdpy-fb device register failed: %d\n", ret);
goto err_unmap;
}
pci_info(pdev, "fb%d registered\n", info->node);
return 0;
err_unmap:
iounmap(info->screen_base);
err_release_fb:
framebuffer_release(info);
err_release_regions:
pci_release_regions(pdev);
return ret;
}
static void mdpy_fb_remove(struct pci_dev *pdev)
{
struct fb_info *info = pci_get_drvdata(pdev);
unregister_framebuffer(info);
framebuffer_release(info);
}
static struct pci_device_id mdpy_fb_pci_table[] = {
{
.vendor = MDPY_PCI_VENDOR_ID,
.device = MDPY_PCI_DEVICE_ID,
.subvendor = MDPY_PCI_SUBVENDOR_ID,
.subdevice = MDPY_PCI_SUBDEVICE_ID,
}, {
/* end of list */
}
};
static struct pci_driver mdpy_fb_pci_driver = {
.name = "mdpy-fb",
.id_table = mdpy_fb_pci_table,
.probe = mdpy_fb_probe,
.remove = mdpy_fb_remove,
};
static int __init mdpy_fb_init(void)
{
int ret;
ret = pci_register_driver(&mdpy_fb_pci_driver);
if (ret)
return ret;
return 0;
}
module_init(mdpy_fb_init);
MODULE_DEVICE_TABLE(pci, mdpy_fb_pci_table);
MODULE_LICENSE("GPL v2");
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment