Commit 5a36f0f3 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'vfio-v5.8-rc1' of git://github.com/awilliam/linux-vfio

Pull VFIO updates from Alex Williamson:

 - Block accesses to disabled MMIO space (Alex Williamson)

 - VFIO device migration API (Kirti Wankhede)

 - type1 IOMMU dirty bitmap API and implementation (Kirti Wankhede)

 - PCI NULL capability masking (Alex Williamson)

 - Memory leak fixes (Qian Cai)

 - Reference leak fix (Qiushi Wu)

* tag 'vfio-v5.8-rc1' of git://github.com/awilliam/linux-vfio:
  vfio iommu: typecast corrections
  vfio iommu: Use shift operation for 64-bit integer division
  vfio/mdev: Fix reference count leak in add_mdev_supported_type
  vfio: Selective dirty page tracking if IOMMU backed device pins pages
  vfio iommu: Add migration capability to report supported features
  vfio iommu: Update UNMAP_DMA ioctl to get dirty bitmap before unmap
  vfio iommu: Implementation of ioctl for dirty pages tracking
  vfio iommu: Add ioctl definition for dirty pages tracking
  vfio iommu: Cache pgsize_bitmap in struct vfio_iommu
  vfio iommu: Remove atomicity of ref_count of pinned pages
  vfio: UAPI for migration interface for device state
  vfio/pci: fix memory leaks of eventfd ctx
  vfio/pci: fix memory leaks in alloc_perm_bits()
  vfio-pci: Mask cap zero
  vfio-pci: Invalidate mmaps and block MMIO access on disabled memory
  vfio-pci: Fault mmaps to enable vma tracking
  vfio/type1: Support faulting PFNMAP vmas
parents ac7b3421 4f085ca2
......@@ -110,7 +110,7 @@ static struct mdev_type *add_mdev_supported_type(struct mdev_parent *parent,
"%s-%s", dev_driver_string(parent->dev),
group->name);
if (ret) {
kfree(type);
kobject_put(&type->kobj);
return ERR_PTR(ret);
}
......
This diff is collapsed.
......@@ -395,6 +395,14 @@ static inline void p_setd(struct perm_bits *p, int off, u32 virt, u32 write)
*(__le32 *)(&p->write[off]) = cpu_to_le32(write);
}
/* Caller should hold memory_lock semaphore */
bool __vfio_pci_memory_enabled(struct vfio_pci_device *vdev)
{
u16 cmd = le16_to_cpu(*(__le16 *)&vdev->vconfig[PCI_COMMAND]);
return cmd & PCI_COMMAND_MEMORY;
}
/*
* Restore the *real* BARs after we detect a FLR or backdoor reset.
* (backdoor = some device specific technique that we didn't catch)
......@@ -556,13 +564,18 @@ static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos,
new_cmd = le32_to_cpu(val);
phys_io = !!(phys_cmd & PCI_COMMAND_IO);
virt_io = !!(le16_to_cpu(*virt_cmd) & PCI_COMMAND_IO);
new_io = !!(new_cmd & PCI_COMMAND_IO);
phys_mem = !!(phys_cmd & PCI_COMMAND_MEMORY);
virt_mem = !!(le16_to_cpu(*virt_cmd) & PCI_COMMAND_MEMORY);
new_mem = !!(new_cmd & PCI_COMMAND_MEMORY);
phys_io = !!(phys_cmd & PCI_COMMAND_IO);
virt_io = !!(le16_to_cpu(*virt_cmd) & PCI_COMMAND_IO);
new_io = !!(new_cmd & PCI_COMMAND_IO);
if (!new_mem)
vfio_pci_zap_and_down_write_memory_lock(vdev);
else
down_write(&vdev->memory_lock);
/*
* If the user is writing mem/io enable (new_mem/io) and we
......@@ -579,8 +592,11 @@ static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos,
}
count = vfio_default_config_write(vdev, pos, count, perm, offset, val);
if (count < 0)
if (count < 0) {
if (offset == PCI_COMMAND)
up_write(&vdev->memory_lock);
return count;
}
/*
* Save current memory/io enable bits in vconfig to allow for
......@@ -591,6 +607,8 @@ static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos,
*virt_cmd &= cpu_to_le16(~mask);
*virt_cmd |= cpu_to_le16(new_cmd & mask);
up_write(&vdev->memory_lock);
}
/* Emulate INTx disable */
......@@ -828,8 +846,11 @@ static int vfio_exp_config_write(struct vfio_pci_device *vdev, int pos,
pos - offset + PCI_EXP_DEVCAP,
&cap);
if (!ret && (cap & PCI_EXP_DEVCAP_FLR))
if (!ret && (cap & PCI_EXP_DEVCAP_FLR)) {
vfio_pci_zap_and_down_write_memory_lock(vdev);
pci_try_reset_function(vdev->pdev);
up_write(&vdev->memory_lock);
}
}
/*
......@@ -907,8 +928,11 @@ static int vfio_af_config_write(struct vfio_pci_device *vdev, int pos,
pos - offset + PCI_AF_CAP,
&cap);
if (!ret && (cap & PCI_AF_CAP_FLR) && (cap & PCI_AF_CAP_TP))
if (!ret && (cap & PCI_AF_CAP_FLR) && (cap & PCI_AF_CAP_TP)) {
vfio_pci_zap_and_down_write_memory_lock(vdev);
pci_try_reset_function(vdev->pdev);
up_write(&vdev->memory_lock);
}
}
return count;
......@@ -1462,7 +1486,12 @@ static int vfio_cap_init(struct vfio_pci_device *vdev)
if (ret)
return ret;
if (cap <= PCI_CAP_ID_MAX) {
/*
* ID 0 is a NULL capability, conflicting with our fake
* PCI_CAP_ID_BASIC. As it has no content, consider it
* hidden for now.
*/
if (cap && cap <= PCI_CAP_ID_MAX) {
len = pci_cap_length[cap];
if (len == 0xFF) { /* Variable length */
len = vfio_cap_len(vdev, cap, pos);
......@@ -1728,8 +1757,11 @@ void vfio_config_free(struct vfio_pci_device *vdev)
vdev->vconfig = NULL;
kfree(vdev->pci_config_map);
vdev->pci_config_map = NULL;
kfree(vdev->msi_perm);
vdev->msi_perm = NULL;
if (vdev->msi_perm) {
free_perm_bits(vdev->msi_perm);
kfree(vdev->msi_perm);
vdev->msi_perm = NULL;
}
}
/*
......
......@@ -249,6 +249,7 @@ static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix)
struct pci_dev *pdev = vdev->pdev;
unsigned int flag = msix ? PCI_IRQ_MSIX : PCI_IRQ_MSI;
int ret;
u16 cmd;
if (!is_irq_none(vdev))
return -EINVAL;
......@@ -258,13 +259,16 @@ static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix)
return -ENOMEM;
/* return the number of supported vectors if we can't get all: */
cmd = vfio_pci_memory_lock_and_enable(vdev);
ret = pci_alloc_irq_vectors(pdev, 1, nvec, flag);
if (ret < nvec) {
if (ret > 0)
pci_free_irq_vectors(pdev);
vfio_pci_memory_unlock_and_restore(vdev, cmd);
kfree(vdev->ctx);
return ret;
}
vfio_pci_memory_unlock_and_restore(vdev, cmd);
vdev->num_ctx = nvec;
vdev->irq_type = msix ? VFIO_PCI_MSIX_IRQ_INDEX :
......@@ -287,6 +291,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
struct pci_dev *pdev = vdev->pdev;
struct eventfd_ctx *trigger;
int irq, ret;
u16 cmd;
if (vector < 0 || vector >= vdev->num_ctx)
return -EINVAL;
......@@ -295,7 +300,11 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
if (vdev->ctx[vector].trigger) {
irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
cmd = vfio_pci_memory_lock_and_enable(vdev);
free_irq(irq, vdev->ctx[vector].trigger);
vfio_pci_memory_unlock_and_restore(vdev, cmd);
kfree(vdev->ctx[vector].name);
eventfd_ctx_put(vdev->ctx[vector].trigger);
vdev->ctx[vector].trigger = NULL;
......@@ -323,6 +332,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
* such a reset it would be unsuccessful. To avoid this, restore the
* cached value of the message prior to enabling.
*/
cmd = vfio_pci_memory_lock_and_enable(vdev);
if (msix) {
struct msi_msg msg;
......@@ -332,6 +342,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
ret = request_irq(irq, vfio_msihandler, 0,
vdev->ctx[vector].name, trigger);
vfio_pci_memory_unlock_and_restore(vdev, cmd);
if (ret) {
kfree(vdev->ctx[vector].name);
eventfd_ctx_put(trigger);
......@@ -376,6 +387,7 @@ static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix)
{
struct pci_dev *pdev = vdev->pdev;
int i;
u16 cmd;
for (i = 0; i < vdev->num_ctx; i++) {
vfio_virqfd_disable(&vdev->ctx[i].unmask);
......@@ -384,7 +396,9 @@ static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix)
vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix);
cmd = vfio_pci_memory_lock_and_enable(vdev);
pci_free_irq_vectors(pdev);
vfio_pci_memory_unlock_and_restore(vdev, cmd);
/*
* Both disable paths above use pci_intx_for_msi() to clear DisINTx
......
......@@ -92,6 +92,11 @@ struct vfio_pci_vf_token {
int users;
};
struct vfio_pci_mmap_vma {
struct vm_area_struct *vma;
struct list_head vma_next;
};
struct vfio_pci_device {
struct pci_dev *pdev;
void __iomem *barmap[PCI_STD_NUM_BARS];
......@@ -132,6 +137,9 @@ struct vfio_pci_device {
struct list_head ioeventfds_list;
struct vfio_pci_vf_token *vf_token;
struct notifier_block nb;
struct mutex vma_lock;
struct list_head vma_list;
struct rw_semaphore memory_lock;
};
#define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX)
......@@ -174,6 +182,13 @@ extern int vfio_pci_register_dev_region(struct vfio_pci_device *vdev,
extern int vfio_pci_set_power_state(struct vfio_pci_device *vdev,
pci_power_t state);
extern bool __vfio_pci_memory_enabled(struct vfio_pci_device *vdev);
extern void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_device
*vdev);
extern u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_device *vdev);
extern void vfio_pci_memory_unlock_and_restore(struct vfio_pci_device *vdev,
u16 cmd);
#ifdef CONFIG_VFIO_PCI_IGD
extern int vfio_pci_igd_init(struct vfio_pci_device *vdev);
#else
......
......@@ -162,6 +162,7 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
size_t x_start = 0, x_end = 0;
resource_size_t end;
void __iomem *io;
struct resource *res = &vdev->pdev->resource[bar];
ssize_t done;
if (pci_resource_start(pdev, bar))
......@@ -177,6 +178,14 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
count = min(count, (size_t)(end - pos));
if (res->flags & IORESOURCE_MEM) {
down_read(&vdev->memory_lock);
if (!__vfio_pci_memory_enabled(vdev)) {
up_read(&vdev->memory_lock);
return -EIO;
}
}
if (bar == PCI_ROM_RESOURCE) {
/*
* The ROM can fill less space than the BAR, so we start the
......@@ -184,13 +193,17 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
* filling large ROM BARs much faster.
*/
io = pci_map_rom(pdev, &x_start);
if (!io)
return -ENOMEM;
if (!io) {
done = -ENOMEM;
goto out;
}
x_end = end;
} else {
int ret = vfio_pci_setup_barmap(vdev, bar);
if (ret)
return ret;
if (ret) {
done = ret;
goto out;
}
io = vdev->barmap[bar];
}
......@@ -207,6 +220,9 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
if (bar == PCI_ROM_RESOURCE)
pci_unmap_rom(pdev, io);
out:
if (res->flags & IORESOURCE_MEM)
up_read(&vdev->memory_lock);
return done;
}
......
......@@ -85,6 +85,7 @@ struct vfio_group {
atomic_t opened;
wait_queue_head_t container_q;
bool noiommu;
unsigned int dev_counter;
struct kvm *kvm;
struct blocking_notifier_head notifier;
};
......@@ -555,6 +556,7 @@ struct vfio_device *vfio_group_create_device(struct vfio_group *group,
mutex_lock(&group->device_lock);
list_add(&device->group_next, &group->device_list);
group->dev_counter++;
mutex_unlock(&group->device_lock);
return device;
......@@ -567,6 +569,7 @@ static void vfio_device_release(struct kref *kref)
struct vfio_group *group = device->group;
list_del(&device->group_next);
group->dev_counter--;
mutex_unlock(&group->device_lock);
dev_set_drvdata(device->dev, NULL);
......@@ -1945,6 +1948,9 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
if (!group)
return -ENODEV;
if (group->dev_counter > 1)
return -EINVAL;
ret = vfio_group_add_container_user(group);
if (ret)
goto err_pin_pages;
......@@ -1952,7 +1958,8 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
container = group->container;
driver = container->iommu_driver;
if (likely(driver && driver->ops->pin_pages))
ret = driver->ops->pin_pages(container->iommu_data, user_pfn,
ret = driver->ops->pin_pages(container->iommu_data,
group->iommu_group, user_pfn,
npage, prot, phys_pfn);
else
ret = -ENOTTY;
......@@ -2050,8 +2057,8 @@ int vfio_group_pin_pages(struct vfio_group *group,
driver = container->iommu_driver;
if (likely(driver && driver->ops->pin_pages))
ret = driver->ops->pin_pages(container->iommu_data,
user_iova_pfn, npage,
prot, phys_pfn);
group->iommu_group, user_iova_pfn,
npage, prot, phys_pfn);
else
ret = -ENOTTY;
......
This diff is collapsed.
......@@ -76,7 +76,9 @@ struct vfio_iommu_driver_ops {
struct iommu_group *group);
void (*detach_group)(void *iommu_data,
struct iommu_group *group);
int (*pin_pages)(void *iommu_data, unsigned long *user_pfn,
int (*pin_pages)(void *iommu_data,
struct iommu_group *group,
unsigned long *user_pfn,
int npage, int prot,
unsigned long *phys_pfn);
int (*unpin_pages)(void *iommu_data,
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment