Commit cd34b82e authored by Alex Williamson's avatar Alex Williamson

Merge branches 'v5.8/vfio/alex-block-mmio-v3', 'v5.8/vfio/alex-zero-cap-v2'...

Merge branches 'v5.8/vfio/alex-block-mmio-v3', 'v5.8/vfio/alex-zero-cap-v2' and 'v5.8/vfio/qian-leak-fixes' into v5.8/vfio/next
This diff is collapsed.
...@@ -395,6 +395,14 @@ static inline void p_setd(struct perm_bits *p, int off, u32 virt, u32 write) ...@@ -395,6 +395,14 @@ static inline void p_setd(struct perm_bits *p, int off, u32 virt, u32 write)
*(__le32 *)(&p->write[off]) = cpu_to_le32(write); *(__le32 *)(&p->write[off]) = cpu_to_le32(write);
} }
/* Caller should hold memory_lock semaphore */
bool __vfio_pci_memory_enabled(struct vfio_pci_device *vdev)
{
u16 cmd = le16_to_cpu(*(__le16 *)&vdev->vconfig[PCI_COMMAND]);
return cmd & PCI_COMMAND_MEMORY;
}
/* /*
* Restore the *real* BARs after we detect a FLR or backdoor reset. * Restore the *real* BARs after we detect a FLR or backdoor reset.
* (backdoor = some device specific technique that we didn't catch) * (backdoor = some device specific technique that we didn't catch)
...@@ -556,13 +564,18 @@ static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos, ...@@ -556,13 +564,18 @@ static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos,
new_cmd = le32_to_cpu(val); new_cmd = le32_to_cpu(val);
phys_io = !!(phys_cmd & PCI_COMMAND_IO);
virt_io = !!(le16_to_cpu(*virt_cmd) & PCI_COMMAND_IO);
new_io = !!(new_cmd & PCI_COMMAND_IO);
phys_mem = !!(phys_cmd & PCI_COMMAND_MEMORY); phys_mem = !!(phys_cmd & PCI_COMMAND_MEMORY);
virt_mem = !!(le16_to_cpu(*virt_cmd) & PCI_COMMAND_MEMORY); virt_mem = !!(le16_to_cpu(*virt_cmd) & PCI_COMMAND_MEMORY);
new_mem = !!(new_cmd & PCI_COMMAND_MEMORY); new_mem = !!(new_cmd & PCI_COMMAND_MEMORY);
phys_io = !!(phys_cmd & PCI_COMMAND_IO); if (!new_mem)
virt_io = !!(le16_to_cpu(*virt_cmd) & PCI_COMMAND_IO); vfio_pci_zap_and_down_write_memory_lock(vdev);
new_io = !!(new_cmd & PCI_COMMAND_IO); else
down_write(&vdev->memory_lock);
/* /*
* If the user is writing mem/io enable (new_mem/io) and we * If the user is writing mem/io enable (new_mem/io) and we
...@@ -579,8 +592,11 @@ static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos, ...@@ -579,8 +592,11 @@ static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos,
} }
count = vfio_default_config_write(vdev, pos, count, perm, offset, val); count = vfio_default_config_write(vdev, pos, count, perm, offset, val);
if (count < 0) if (count < 0) {
if (offset == PCI_COMMAND)
up_write(&vdev->memory_lock);
return count; return count;
}
/* /*
* Save current memory/io enable bits in vconfig to allow for * Save current memory/io enable bits in vconfig to allow for
...@@ -591,6 +607,8 @@ static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos, ...@@ -591,6 +607,8 @@ static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos,
*virt_cmd &= cpu_to_le16(~mask); *virt_cmd &= cpu_to_le16(~mask);
*virt_cmd |= cpu_to_le16(new_cmd & mask); *virt_cmd |= cpu_to_le16(new_cmd & mask);
up_write(&vdev->memory_lock);
} }
/* Emulate INTx disable */ /* Emulate INTx disable */
...@@ -828,8 +846,11 @@ static int vfio_exp_config_write(struct vfio_pci_device *vdev, int pos, ...@@ -828,8 +846,11 @@ static int vfio_exp_config_write(struct vfio_pci_device *vdev, int pos,
pos - offset + PCI_EXP_DEVCAP, pos - offset + PCI_EXP_DEVCAP,
&cap); &cap);
if (!ret && (cap & PCI_EXP_DEVCAP_FLR)) if (!ret && (cap & PCI_EXP_DEVCAP_FLR)) {
vfio_pci_zap_and_down_write_memory_lock(vdev);
pci_try_reset_function(vdev->pdev); pci_try_reset_function(vdev->pdev);
up_write(&vdev->memory_lock);
}
} }
/* /*
...@@ -907,8 +928,11 @@ static int vfio_af_config_write(struct vfio_pci_device *vdev, int pos, ...@@ -907,8 +928,11 @@ static int vfio_af_config_write(struct vfio_pci_device *vdev, int pos,
pos - offset + PCI_AF_CAP, pos - offset + PCI_AF_CAP,
&cap); &cap);
if (!ret && (cap & PCI_AF_CAP_FLR) && (cap & PCI_AF_CAP_TP)) if (!ret && (cap & PCI_AF_CAP_FLR) && (cap & PCI_AF_CAP_TP)) {
vfio_pci_zap_and_down_write_memory_lock(vdev);
pci_try_reset_function(vdev->pdev); pci_try_reset_function(vdev->pdev);
up_write(&vdev->memory_lock);
}
} }
return count; return count;
...@@ -1462,7 +1486,12 @@ static int vfio_cap_init(struct vfio_pci_device *vdev) ...@@ -1462,7 +1486,12 @@ static int vfio_cap_init(struct vfio_pci_device *vdev)
if (ret) if (ret)
return ret; return ret;
if (cap <= PCI_CAP_ID_MAX) { /*
* ID 0 is a NULL capability, conflicting with our fake
* PCI_CAP_ID_BASIC. As it has no content, consider it
* hidden for now.
*/
if (cap && cap <= PCI_CAP_ID_MAX) {
len = pci_cap_length[cap]; len = pci_cap_length[cap];
if (len == 0xFF) { /* Variable length */ if (len == 0xFF) { /* Variable length */
len = vfio_cap_len(vdev, cap, pos); len = vfio_cap_len(vdev, cap, pos);
...@@ -1728,8 +1757,11 @@ void vfio_config_free(struct vfio_pci_device *vdev) ...@@ -1728,8 +1757,11 @@ void vfio_config_free(struct vfio_pci_device *vdev)
vdev->vconfig = NULL; vdev->vconfig = NULL;
kfree(vdev->pci_config_map); kfree(vdev->pci_config_map);
vdev->pci_config_map = NULL; vdev->pci_config_map = NULL;
kfree(vdev->msi_perm); if (vdev->msi_perm) {
vdev->msi_perm = NULL; free_perm_bits(vdev->msi_perm);
kfree(vdev->msi_perm);
vdev->msi_perm = NULL;
}
} }
/* /*
......
...@@ -249,6 +249,7 @@ static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix) ...@@ -249,6 +249,7 @@ static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix)
struct pci_dev *pdev = vdev->pdev; struct pci_dev *pdev = vdev->pdev;
unsigned int flag = msix ? PCI_IRQ_MSIX : PCI_IRQ_MSI; unsigned int flag = msix ? PCI_IRQ_MSIX : PCI_IRQ_MSI;
int ret; int ret;
u16 cmd;
if (!is_irq_none(vdev)) if (!is_irq_none(vdev))
return -EINVAL; return -EINVAL;
...@@ -258,13 +259,16 @@ static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix) ...@@ -258,13 +259,16 @@ static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix)
return -ENOMEM; return -ENOMEM;
/* return the number of supported vectors if we can't get all: */ /* return the number of supported vectors if we can't get all: */
cmd = vfio_pci_memory_lock_and_enable(vdev);
ret = pci_alloc_irq_vectors(pdev, 1, nvec, flag); ret = pci_alloc_irq_vectors(pdev, 1, nvec, flag);
if (ret < nvec) { if (ret < nvec) {
if (ret > 0) if (ret > 0)
pci_free_irq_vectors(pdev); pci_free_irq_vectors(pdev);
vfio_pci_memory_unlock_and_restore(vdev, cmd);
kfree(vdev->ctx); kfree(vdev->ctx);
return ret; return ret;
} }
vfio_pci_memory_unlock_and_restore(vdev, cmd);
vdev->num_ctx = nvec; vdev->num_ctx = nvec;
vdev->irq_type = msix ? VFIO_PCI_MSIX_IRQ_INDEX : vdev->irq_type = msix ? VFIO_PCI_MSIX_IRQ_INDEX :
...@@ -287,6 +291,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, ...@@ -287,6 +291,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
struct pci_dev *pdev = vdev->pdev; struct pci_dev *pdev = vdev->pdev;
struct eventfd_ctx *trigger; struct eventfd_ctx *trigger;
int irq, ret; int irq, ret;
u16 cmd;
if (vector < 0 || vector >= vdev->num_ctx) if (vector < 0 || vector >= vdev->num_ctx)
return -EINVAL; return -EINVAL;
...@@ -295,7 +300,11 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, ...@@ -295,7 +300,11 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
if (vdev->ctx[vector].trigger) { if (vdev->ctx[vector].trigger) {
irq_bypass_unregister_producer(&vdev->ctx[vector].producer); irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
cmd = vfio_pci_memory_lock_and_enable(vdev);
free_irq(irq, vdev->ctx[vector].trigger); free_irq(irq, vdev->ctx[vector].trigger);
vfio_pci_memory_unlock_and_restore(vdev, cmd);
kfree(vdev->ctx[vector].name); kfree(vdev->ctx[vector].name);
eventfd_ctx_put(vdev->ctx[vector].trigger); eventfd_ctx_put(vdev->ctx[vector].trigger);
vdev->ctx[vector].trigger = NULL; vdev->ctx[vector].trigger = NULL;
...@@ -323,6 +332,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, ...@@ -323,6 +332,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
* such a reset it would be unsuccessful. To avoid this, restore the * such a reset it would be unsuccessful. To avoid this, restore the
* cached value of the message prior to enabling. * cached value of the message prior to enabling.
*/ */
cmd = vfio_pci_memory_lock_and_enable(vdev);
if (msix) { if (msix) {
struct msi_msg msg; struct msi_msg msg;
...@@ -332,6 +342,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, ...@@ -332,6 +342,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
ret = request_irq(irq, vfio_msihandler, 0, ret = request_irq(irq, vfio_msihandler, 0,
vdev->ctx[vector].name, trigger); vdev->ctx[vector].name, trigger);
vfio_pci_memory_unlock_and_restore(vdev, cmd);
if (ret) { if (ret) {
kfree(vdev->ctx[vector].name); kfree(vdev->ctx[vector].name);
eventfd_ctx_put(trigger); eventfd_ctx_put(trigger);
...@@ -376,6 +387,7 @@ static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix) ...@@ -376,6 +387,7 @@ static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix)
{ {
struct pci_dev *pdev = vdev->pdev; struct pci_dev *pdev = vdev->pdev;
int i; int i;
u16 cmd;
for (i = 0; i < vdev->num_ctx; i++) { for (i = 0; i < vdev->num_ctx; i++) {
vfio_virqfd_disable(&vdev->ctx[i].unmask); vfio_virqfd_disable(&vdev->ctx[i].unmask);
...@@ -384,7 +396,9 @@ static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix) ...@@ -384,7 +396,9 @@ static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix)
vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix); vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix);
cmd = vfio_pci_memory_lock_and_enable(vdev);
pci_free_irq_vectors(pdev); pci_free_irq_vectors(pdev);
vfio_pci_memory_unlock_and_restore(vdev, cmd);
/* /*
* Both disable paths above use pci_intx_for_msi() to clear DisINTx * Both disable paths above use pci_intx_for_msi() to clear DisINTx
......
...@@ -92,6 +92,11 @@ struct vfio_pci_vf_token { ...@@ -92,6 +92,11 @@ struct vfio_pci_vf_token {
int users; int users;
}; };
struct vfio_pci_mmap_vma {
struct vm_area_struct *vma;
struct list_head vma_next;
};
struct vfio_pci_device { struct vfio_pci_device {
struct pci_dev *pdev; struct pci_dev *pdev;
void __iomem *barmap[PCI_STD_NUM_BARS]; void __iomem *barmap[PCI_STD_NUM_BARS];
...@@ -132,6 +137,9 @@ struct vfio_pci_device { ...@@ -132,6 +137,9 @@ struct vfio_pci_device {
struct list_head ioeventfds_list; struct list_head ioeventfds_list;
struct vfio_pci_vf_token *vf_token; struct vfio_pci_vf_token *vf_token;
struct notifier_block nb; struct notifier_block nb;
struct mutex vma_lock;
struct list_head vma_list;
struct rw_semaphore memory_lock;
}; };
#define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX) #define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX)
...@@ -174,6 +182,13 @@ extern int vfio_pci_register_dev_region(struct vfio_pci_device *vdev, ...@@ -174,6 +182,13 @@ extern int vfio_pci_register_dev_region(struct vfio_pci_device *vdev,
extern int vfio_pci_set_power_state(struct vfio_pci_device *vdev, extern int vfio_pci_set_power_state(struct vfio_pci_device *vdev,
pci_power_t state); pci_power_t state);
extern bool __vfio_pci_memory_enabled(struct vfio_pci_device *vdev);
extern void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_device
*vdev);
extern u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_device *vdev);
extern void vfio_pci_memory_unlock_and_restore(struct vfio_pci_device *vdev,
u16 cmd);
#ifdef CONFIG_VFIO_PCI_IGD #ifdef CONFIG_VFIO_PCI_IGD
extern int vfio_pci_igd_init(struct vfio_pci_device *vdev); extern int vfio_pci_igd_init(struct vfio_pci_device *vdev);
#else #else
......
...@@ -162,6 +162,7 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, ...@@ -162,6 +162,7 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
size_t x_start = 0, x_end = 0; size_t x_start = 0, x_end = 0;
resource_size_t end; resource_size_t end;
void __iomem *io; void __iomem *io;
struct resource *res = &vdev->pdev->resource[bar];
ssize_t done; ssize_t done;
if (pci_resource_start(pdev, bar)) if (pci_resource_start(pdev, bar))
...@@ -177,6 +178,14 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, ...@@ -177,6 +178,14 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
count = min(count, (size_t)(end - pos)); count = min(count, (size_t)(end - pos));
if (res->flags & IORESOURCE_MEM) {
down_read(&vdev->memory_lock);
if (!__vfio_pci_memory_enabled(vdev)) {
up_read(&vdev->memory_lock);
return -EIO;
}
}
if (bar == PCI_ROM_RESOURCE) { if (bar == PCI_ROM_RESOURCE) {
/* /*
* The ROM can fill less space than the BAR, so we start the * The ROM can fill less space than the BAR, so we start the
...@@ -184,13 +193,17 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, ...@@ -184,13 +193,17 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
* filling large ROM BARs much faster. * filling large ROM BARs much faster.
*/ */
io = pci_map_rom(pdev, &x_start); io = pci_map_rom(pdev, &x_start);
if (!io) if (!io) {
return -ENOMEM; done = -ENOMEM;
goto out;
}
x_end = end; x_end = end;
} else { } else {
int ret = vfio_pci_setup_barmap(vdev, bar); int ret = vfio_pci_setup_barmap(vdev, bar);
if (ret) if (ret) {
return ret; done = ret;
goto out;
}
io = vdev->barmap[bar]; io = vdev->barmap[bar];
} }
...@@ -207,6 +220,9 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, ...@@ -207,6 +220,9 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
if (bar == PCI_ROM_RESOURCE) if (bar == PCI_ROM_RESOURCE)
pci_unmap_rom(pdev, io); pci_unmap_rom(pdev, io);
out:
if (res->flags & IORESOURCE_MEM)
up_read(&vdev->memory_lock);
return done; return done;
} }
......
...@@ -317,6 +317,32 @@ static int put_pfn(unsigned long pfn, int prot) ...@@ -317,6 +317,32 @@ static int put_pfn(unsigned long pfn, int prot)
return 0; return 0;
} }
static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
unsigned long vaddr, unsigned long *pfn,
bool write_fault)
{
int ret;
ret = follow_pfn(vma, vaddr, pfn);
if (ret) {
bool unlocked = false;
ret = fixup_user_fault(NULL, mm, vaddr,
FAULT_FLAG_REMOTE |
(write_fault ? FAULT_FLAG_WRITE : 0),
&unlocked);
if (unlocked)
return -EAGAIN;
if (ret)
return ret;
ret = follow_pfn(vma, vaddr, pfn);
}
return ret;
}
static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
int prot, unsigned long *pfn) int prot, unsigned long *pfn)
{ {
...@@ -339,12 +365,16 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, ...@@ -339,12 +365,16 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
vaddr = untagged_addr(vaddr); vaddr = untagged_addr(vaddr);
retry:
vma = find_vma_intersection(mm, vaddr, vaddr + 1); vma = find_vma_intersection(mm, vaddr, vaddr + 1);
if (vma && vma->vm_flags & VM_PFNMAP) { if (vma && vma->vm_flags & VM_PFNMAP) {
if (!follow_pfn(vma, vaddr, pfn) && ret = follow_fault_pfn(vma, mm, vaddr, pfn, prot & IOMMU_WRITE);
is_invalid_reserved_pfn(*pfn)) if (ret == -EAGAIN)
ret = 0; goto retry;
if (!ret && !is_invalid_reserved_pfn(*pfn))
ret = -EFAULT;
} }
done: done:
up_read(&mm->mmap_sem); up_read(&mm->mmap_sem);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment