Commit 17688215 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'vfio-v5.19-rc1' of https://github.com/awilliam/linux-vfio

Pull vfio updates from Alex Williamson:

 - Improvements to mlx5 vfio-pci variant driver, including support for
   parallel migration per PF (Yishai Hadas)

 - Remove redundant iommu_present() check (Robin Murphy)

 - Ongoing refactoring to consolidate the VFIO driver facing API to use
   vfio_device (Jason Gunthorpe)

 - Use drvdata to store vfio_device among all vfio-pci and variant
   drivers (Jason Gunthorpe)

 - Remove redundant code now that IOMMU core manages group DMA ownership
   (Jason Gunthorpe)

 - Remove vfio_group from external API handling struct file ownership
   (Jason Gunthorpe)

 - Correct typo in uapi comments (Thomas Huth)

 - Fix coccicheck detected deadlock (Wan Jiabing)

 - Use rwsem to remove races and simplify code around container and kvm
   association to groups (Jason Gunthorpe)

 - Harden access to devices in low power states and use runtime PM to
   enable d3cold support for unused devices (Abhishek Sahu)

 - Fix dma_owner handling of fake IOMMU groups (Jason Gunthorpe)

 - Set driver_managed_dma on vfio-pci variant drivers (Jason Gunthorpe)

 - Pass KVM pointer directly rather than via notifier (Matthew Rosato)

* tag 'vfio-v5.19-rc1' of https://github.com/awilliam/linux-vfio: (38 commits)
  vfio: remove VFIO_GROUP_NOTIFY_SET_KVM
  vfio/pci: Add driver_managed_dma to the new vfio_pci drivers
  vfio: Do not manipulate iommu dma_owner for fake iommu groups
  vfio/pci: Move the unused device into low power state with runtime PM
  vfio/pci: Virtualize PME related registers bits and initialize to zero
  vfio/pci: Change the PF power state to D0 before enabling VFs
  vfio/pci: Invalidate mmaps and block the access in D3hot power state
  vfio: Change struct vfio_group::container_users to a non-atomic int
  vfio: Simplify the life cycle of the group FD
  vfio: Fully lock struct vfio_group::container
  vfio: Split up vfio_group_get_device_fd()
  vfio: Change struct vfio_group::opened from an atomic to bool
  vfio: Add missing locking for struct vfio_group::kvm
  kvm/vfio: Fix potential deadlock problem in vfio
  include/uapi/linux/vfio.h: Fix trivial typo - _IORW should be _IOWR instead
  vfio/pci: Use the struct file as the handle not the vfio_group
  kvm/vfio: Remove vfio_group from kvm
  vfio: Change vfio_group_set_kvm() to vfio_file_set_kvm()
  vfio: Change vfio_external_check_extension() to vfio_file_enforced_coherent()
  vfio: Remove vfio_external_group_match_file()
  ...
parents 8171acb8 421cfe65
...@@ -262,10 +262,10 @@ Translation APIs for Mediated Devices ...@@ -262,10 +262,10 @@ Translation APIs for Mediated Devices
The following APIs are provided for translating user pfn to host pfn in a VFIO The following APIs are provided for translating user pfn to host pfn in a VFIO
driver:: driver::
extern int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int vfio_pin_pages(struct vfio_device *device, unsigned long *user_pfn,
int npage, int prot, unsigned long *phys_pfn); int npage, int prot, unsigned long *phys_pfn);
extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn,
int npage); int npage);
These functions call back into the back-end IOMMU module by using the pin_pages These functions call back into the back-end IOMMU module by using the pin_pages
......
...@@ -51,7 +51,7 @@ static int preallocated_oos_pages = 8192; ...@@ -51,7 +51,7 @@ static int preallocated_oos_pages = 8192;
static bool intel_gvt_is_valid_gfn(struct intel_vgpu *vgpu, unsigned long gfn) static bool intel_gvt_is_valid_gfn(struct intel_vgpu *vgpu, unsigned long gfn)
{ {
struct kvm *kvm = vgpu->kvm; struct kvm *kvm = vgpu->vfio_device.kvm;
int idx; int idx;
bool ret; bool ret;
...@@ -1185,7 +1185,7 @@ static int is_2MB_gtt_possible(struct intel_vgpu *vgpu, ...@@ -1185,7 +1185,7 @@ static int is_2MB_gtt_possible(struct intel_vgpu *vgpu,
if (!vgpu->attached) if (!vgpu->attached)
return -EINVAL; return -EINVAL;
pfn = gfn_to_pfn(vgpu->kvm, ops->get_pfn(entry)); pfn = gfn_to_pfn(vgpu->vfio_device.kvm, ops->get_pfn(entry));
if (is_error_noslot_pfn(pfn)) if (is_error_noslot_pfn(pfn))
return -EINVAL; return -EINVAL;
return PageTransHuge(pfn_to_page(pfn)); return PageTransHuge(pfn_to_page(pfn));
......
...@@ -227,11 +227,7 @@ struct intel_vgpu { ...@@ -227,11 +227,7 @@ struct intel_vgpu {
struct mutex cache_lock; struct mutex cache_lock;
struct notifier_block iommu_notifier; struct notifier_block iommu_notifier;
struct notifier_block group_notifier;
struct kvm *kvm;
struct work_struct release_work;
atomic_t released; atomic_t released;
struct vfio_group *vfio_group;
struct kvm_page_track_notifier_node track_node; struct kvm_page_track_notifier_node track_node;
#define NR_BKT (1 << 18) #define NR_BKT (1 << 18)
...@@ -732,7 +728,7 @@ static inline int intel_gvt_read_gpa(struct intel_vgpu *vgpu, unsigned long gpa, ...@@ -732,7 +728,7 @@ static inline int intel_gvt_read_gpa(struct intel_vgpu *vgpu, unsigned long gpa,
{ {
if (!vgpu->attached) if (!vgpu->attached)
return -ESRCH; return -ESRCH;
return vfio_dma_rw(vgpu->vfio_group, gpa, buf, len, false); return vfio_dma_rw(&vgpu->vfio_device, gpa, buf, len, false);
} }
/** /**
...@@ -750,7 +746,7 @@ static inline int intel_gvt_write_gpa(struct intel_vgpu *vgpu, ...@@ -750,7 +746,7 @@ static inline int intel_gvt_write_gpa(struct intel_vgpu *vgpu,
{ {
if (!vgpu->attached) if (!vgpu->attached)
return -ESRCH; return -ESRCH;
return vfio_dma_rw(vgpu->vfio_group, gpa, buf, len, true); return vfio_dma_rw(&vgpu->vfio_device, gpa, buf, len, true);
} }
void intel_gvt_debugfs_remove_vgpu(struct intel_vgpu *vgpu); void intel_gvt_debugfs_remove_vgpu(struct intel_vgpu *vgpu);
......
...@@ -228,8 +228,6 @@ static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt) ...@@ -228,8 +228,6 @@ static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt)
} }
} }
static void intel_vgpu_release_work(struct work_struct *work);
static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
unsigned long size) unsigned long size)
{ {
...@@ -243,7 +241,7 @@ static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, ...@@ -243,7 +241,7 @@ static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
for (npage = 0; npage < total_pages; npage++) { for (npage = 0; npage < total_pages; npage++) {
unsigned long cur_gfn = gfn + npage; unsigned long cur_gfn = gfn + npage;
ret = vfio_group_unpin_pages(vgpu->vfio_group, &cur_gfn, 1); ret = vfio_unpin_pages(&vgpu->vfio_device, &cur_gfn, 1);
drm_WARN_ON(&i915->drm, ret != 1); drm_WARN_ON(&i915->drm, ret != 1);
} }
} }
...@@ -266,8 +264,8 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, ...@@ -266,8 +264,8 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
unsigned long cur_gfn = gfn + npage; unsigned long cur_gfn = gfn + npage;
unsigned long pfn; unsigned long pfn;
ret = vfio_group_pin_pages(vgpu->vfio_group, &cur_gfn, 1, ret = vfio_pin_pages(&vgpu->vfio_device, &cur_gfn, 1,
IOMMU_READ | IOMMU_WRITE, &pfn); IOMMU_READ | IOMMU_WRITE, &pfn);
if (ret != 1) { if (ret != 1) {
gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx, ret %d\n", gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx, ret %d\n",
cur_gfn, ret); cur_gfn, ret);
...@@ -761,23 +759,6 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb, ...@@ -761,23 +759,6 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
return NOTIFY_OK; return NOTIFY_OK;
} }
static int intel_vgpu_group_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
struct intel_vgpu *vgpu =
container_of(nb, struct intel_vgpu, group_notifier);
/* the only action we care about */
if (action == VFIO_GROUP_NOTIFY_SET_KVM) {
vgpu->kvm = data;
if (!data)
schedule_work(&vgpu->release_work);
}
return NOTIFY_OK;
}
static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu) static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
{ {
struct intel_vgpu *itr; struct intel_vgpu *itr;
...@@ -789,7 +770,7 @@ static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu) ...@@ -789,7 +770,7 @@ static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
if (!itr->attached) if (!itr->attached)
continue; continue;
if (vgpu->kvm == itr->kvm) { if (vgpu->vfio_device.kvm == itr->vfio_device.kvm) {
ret = true; ret = true;
goto out; goto out;
} }
...@@ -804,61 +785,44 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) ...@@ -804,61 +785,44 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
unsigned long events; unsigned long events;
int ret; int ret;
struct vfio_group *vfio_group;
vgpu->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier; vgpu->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
vgpu->group_notifier.notifier_call = intel_vgpu_group_notifier;
events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
ret = vfio_register_notifier(vfio_dev->dev, VFIO_IOMMU_NOTIFY, &events, ret = vfio_register_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, &events,
&vgpu->iommu_notifier); &vgpu->iommu_notifier);
if (ret != 0) { if (ret != 0) {
gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n", gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n",
ret); ret);
goto out; goto out;
} }
events = VFIO_GROUP_NOTIFY_SET_KVM;
ret = vfio_register_notifier(vfio_dev->dev, VFIO_GROUP_NOTIFY, &events,
&vgpu->group_notifier);
if (ret != 0) {
gvt_vgpu_err("vfio_register_notifier for group failed: %d\n",
ret);
goto undo_iommu;
}
vfio_group =
vfio_group_get_external_user_from_dev(vgpu->vfio_device.dev);
if (IS_ERR_OR_NULL(vfio_group)) {
ret = !vfio_group ? -EFAULT : PTR_ERR(vfio_group);
gvt_vgpu_err("vfio_group_get_external_user_from_dev failed\n");
goto undo_register;
}
vgpu->vfio_group = vfio_group;
ret = -EEXIST; ret = -EEXIST;
if (vgpu->attached) if (vgpu->attached)
goto undo_group; goto undo_iommu;
ret = -ESRCH; ret = -ESRCH;
if (!vgpu->kvm || vgpu->kvm->mm != current->mm) { if (!vgpu->vfio_device.kvm ||
vgpu->vfio_device.kvm->mm != current->mm) {
gvt_vgpu_err("KVM is required to use Intel vGPU\n"); gvt_vgpu_err("KVM is required to use Intel vGPU\n");
goto undo_group; goto undo_iommu;
} }
kvm_get_kvm(vgpu->vfio_device.kvm);
ret = -EEXIST; ret = -EEXIST;
if (__kvmgt_vgpu_exist(vgpu)) if (__kvmgt_vgpu_exist(vgpu))
goto undo_group; goto undo_iommu;
vgpu->attached = true; vgpu->attached = true;
kvm_get_kvm(vgpu->kvm);
kvmgt_protect_table_init(vgpu); kvmgt_protect_table_init(vgpu);
gvt_cache_init(vgpu); gvt_cache_init(vgpu);
vgpu->track_node.track_write = kvmgt_page_track_write; vgpu->track_node.track_write = kvmgt_page_track_write;
vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot; vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
kvm_page_track_register_notifier(vgpu->kvm, &vgpu->track_node); kvm_page_track_register_notifier(vgpu->vfio_device.kvm,
&vgpu->track_node);
debugfs_create_ulong(KVMGT_DEBUGFS_FILENAME, 0444, vgpu->debugfs, debugfs_create_ulong(KVMGT_DEBUGFS_FILENAME, 0444, vgpu->debugfs,
&vgpu->nr_cache_entries); &vgpu->nr_cache_entries);
...@@ -868,17 +832,9 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) ...@@ -868,17 +832,9 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
atomic_set(&vgpu->released, 0); atomic_set(&vgpu->released, 0);
return 0; return 0;
undo_group:
vfio_group_put_external_user(vgpu->vfio_group);
vgpu->vfio_group = NULL;
undo_register:
vfio_unregister_notifier(vfio_dev->dev, VFIO_GROUP_NOTIFY,
&vgpu->group_notifier);
undo_iommu: undo_iommu:
vfio_unregister_notifier(vfio_dev->dev, VFIO_IOMMU_NOTIFY, vfio_unregister_notifier(vfio_dev, VFIO_IOMMU_NOTIFY,
&vgpu->iommu_notifier); &vgpu->iommu_notifier);
out: out:
return ret; return ret;
} }
...@@ -894,8 +850,9 @@ static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu) ...@@ -894,8 +850,9 @@ static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
} }
} }
static void __intel_vgpu_release(struct intel_vgpu *vgpu) static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
{ {
struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
struct drm_i915_private *i915 = vgpu->gvt->gt->i915; struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
int ret; int ret;
...@@ -907,41 +864,24 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu) ...@@ -907,41 +864,24 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu)
intel_gvt_release_vgpu(vgpu); intel_gvt_release_vgpu(vgpu);
ret = vfio_unregister_notifier(vgpu->vfio_device.dev, VFIO_IOMMU_NOTIFY, ret = vfio_unregister_notifier(&vgpu->vfio_device, VFIO_IOMMU_NOTIFY,
&vgpu->iommu_notifier); &vgpu->iommu_notifier);
drm_WARN(&i915->drm, ret, drm_WARN(&i915->drm, ret,
"vfio_unregister_notifier for iommu failed: %d\n", ret); "vfio_unregister_notifier for iommu failed: %d\n", ret);
ret = vfio_unregister_notifier(vgpu->vfio_device.dev, VFIO_GROUP_NOTIFY,
&vgpu->group_notifier);
drm_WARN(&i915->drm, ret,
"vfio_unregister_notifier for group failed: %d\n", ret);
debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs)); debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs));
kvm_page_track_unregister_notifier(vgpu->kvm, &vgpu->track_node); kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
kvm_put_kvm(vgpu->kvm); &vgpu->track_node);
kvmgt_protect_table_destroy(vgpu); kvmgt_protect_table_destroy(vgpu);
gvt_cache_destroy(vgpu); gvt_cache_destroy(vgpu);
intel_vgpu_release_msi_eventfd_ctx(vgpu); intel_vgpu_release_msi_eventfd_ctx(vgpu);
vfio_group_put_external_user(vgpu->vfio_group);
vgpu->kvm = NULL;
vgpu->attached = false; vgpu->attached = false;
}
static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
{
__intel_vgpu_release(vfio_dev_to_vgpu(vfio_dev));
}
static void intel_vgpu_release_work(struct work_struct *work)
{
struct intel_vgpu *vgpu =
container_of(work, struct intel_vgpu, release_work);
__intel_vgpu_release(vgpu); if (vgpu->vfio_device.kvm)
kvm_put_kvm(vgpu->vfio_device.kvm);
} }
static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar) static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar)
...@@ -1690,7 +1630,6 @@ static int intel_vgpu_probe(struct mdev_device *mdev) ...@@ -1690,7 +1630,6 @@ static int intel_vgpu_probe(struct mdev_device *mdev)
return PTR_ERR(vgpu); return PTR_ERR(vgpu);
} }
INIT_WORK(&vgpu->release_work, intel_vgpu_release_work);
vfio_init_group_dev(&vgpu->vfio_device, &mdev->dev, vfio_init_group_dev(&vgpu->vfio_device, &mdev->dev,
&intel_vgpu_dev_ops); &intel_vgpu_dev_ops);
...@@ -1728,7 +1667,7 @@ static struct mdev_driver intel_vgpu_mdev_driver = { ...@@ -1728,7 +1667,7 @@ static struct mdev_driver intel_vgpu_mdev_driver = {
int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn) int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)
{ {
struct kvm *kvm = info->kvm; struct kvm *kvm = info->vfio_device.kvm;
struct kvm_memory_slot *slot; struct kvm_memory_slot *slot;
int idx; int idx;
...@@ -1758,7 +1697,7 @@ int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn) ...@@ -1758,7 +1697,7 @@ int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)
int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn) int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn)
{ {
struct kvm *kvm = info->kvm; struct kvm *kvm = info->vfio_device.kvm;
struct kvm_memory_slot *slot; struct kvm_memory_slot *slot;
int idx; int idx;
......
...@@ -87,6 +87,11 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) ...@@ -87,6 +87,11 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
enable_vfs_hca: enable_vfs_hca:
num_msix_count = mlx5_get_default_msix_vec_count(dev, num_vfs); num_msix_count = mlx5_get_default_msix_vec_count(dev, num_vfs);
for (vf = 0; vf < num_vfs; vf++) { for (vf = 0; vf < num_vfs; vf++) {
/* Notify the VF before its enablement to let it set
* some stuff.
*/
blocking_notifier_call_chain(&sriov->vfs_ctx[vf].notifier,
MLX5_PF_NOTIFY_ENABLE_VF, dev);
err = mlx5_core_enable_hca(dev, vf + 1); err = mlx5_core_enable_hca(dev, vf + 1);
if (err) { if (err) {
mlx5_core_warn(dev, "failed to enable VF %d (%d)\n", vf, err); mlx5_core_warn(dev, "failed to enable VF %d (%d)\n", vf, err);
...@@ -127,6 +132,11 @@ mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf) ...@@ -127,6 +132,11 @@ mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf)
for (vf = num_vfs - 1; vf >= 0; vf--) { for (vf = num_vfs - 1; vf >= 0; vf--) {
if (!sriov->vfs_ctx[vf].enabled) if (!sriov->vfs_ctx[vf].enabled)
continue; continue;
/* Notify the VF before its disablement to let it clean
* some resources.
*/
blocking_notifier_call_chain(&sriov->vfs_ctx[vf].notifier,
MLX5_PF_NOTIFY_DISABLE_VF, dev);
err = mlx5_core_disable_hca(dev, vf + 1); err = mlx5_core_disable_hca(dev, vf + 1);
if (err) { if (err) {
mlx5_core_warn(dev, "failed to disable VF %d\n", vf); mlx5_core_warn(dev, "failed to disable VF %d\n", vf);
...@@ -257,7 +267,7 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev) ...@@ -257,7 +267,7 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev)
{ {
struct mlx5_core_sriov *sriov = &dev->priv.sriov; struct mlx5_core_sriov *sriov = &dev->priv.sriov;
struct pci_dev *pdev = dev->pdev; struct pci_dev *pdev = dev->pdev;
int total_vfs; int total_vfs, i;
if (!mlx5_core_is_pf(dev)) if (!mlx5_core_is_pf(dev))
return 0; return 0;
...@@ -269,6 +279,9 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev) ...@@ -269,6 +279,9 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev)
if (!sriov->vfs_ctx) if (!sriov->vfs_ctx)
return -ENOMEM; return -ENOMEM;
for (i = 0; i < total_vfs; i++)
BLOCKING_INIT_NOTIFIER_HEAD(&sriov->vfs_ctx[i].notifier);
return 0; return 0;
} }
...@@ -281,3 +294,53 @@ void mlx5_sriov_cleanup(struct mlx5_core_dev *dev) ...@@ -281,3 +294,53 @@ void mlx5_sriov_cleanup(struct mlx5_core_dev *dev)
kfree(sriov->vfs_ctx); kfree(sriov->vfs_ctx);
} }
/**
* mlx5_sriov_blocking_notifier_unregister - Unregister a VF from
* a notification block chain.
*
* @mdev: The mlx5 core device.
* @vf_id: The VF id.
* @nb: The notifier block to be unregistered.
*/
void mlx5_sriov_blocking_notifier_unregister(struct mlx5_core_dev *mdev,
int vf_id,
struct notifier_block *nb)
{
struct mlx5_vf_context *vfs_ctx;
struct mlx5_core_sriov *sriov;
sriov = &mdev->priv.sriov;
if (WARN_ON(vf_id < 0 || vf_id >= sriov->num_vfs))
return;
vfs_ctx = &sriov->vfs_ctx[vf_id];
blocking_notifier_chain_unregister(&vfs_ctx->notifier, nb);
}
EXPORT_SYMBOL(mlx5_sriov_blocking_notifier_unregister);
/**
* mlx5_sriov_blocking_notifier_register - Register a VF notification
* block chain.
*
* @mdev: The mlx5 core device.
* @vf_id: The VF id.
* @nb: The notifier block to be called upon the VF events.
*
* Returns 0 on success or an error code.
*/
int mlx5_sriov_blocking_notifier_register(struct mlx5_core_dev *mdev,
int vf_id,
struct notifier_block *nb)
{
struct mlx5_vf_context *vfs_ctx;
struct mlx5_core_sriov *sriov;
sriov = &mdev->priv.sriov;
if (vf_id < 0 || vf_id >= sriov->num_vfs)
return -EINVAL;
vfs_ctx = &sriov->vfs_ctx[vf_id];
return blocking_notifier_chain_register(&vfs_ctx->notifier, nb);
}
EXPORT_SYMBOL(mlx5_sriov_blocking_notifier_register);
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <asm/idals.h> #include <asm/idals.h>
#include "vfio_ccw_cp.h" #include "vfio_ccw_cp.h"
#include "vfio_ccw_private.h"
struct pfn_array { struct pfn_array {
/* Starting guest physical I/O address. */ /* Starting guest physical I/O address. */
...@@ -98,17 +99,17 @@ static int pfn_array_alloc(struct pfn_array *pa, u64 iova, unsigned int len) ...@@ -98,17 +99,17 @@ static int pfn_array_alloc(struct pfn_array *pa, u64 iova, unsigned int len)
* If the pin request partially succeeds, or fails completely, * If the pin request partially succeeds, or fails completely,
* all pages are left unpinned and a negative error value is returned. * all pages are left unpinned and a negative error value is returned.
*/ */
static int pfn_array_pin(struct pfn_array *pa, struct device *mdev) static int pfn_array_pin(struct pfn_array *pa, struct vfio_device *vdev)
{ {
int ret = 0; int ret = 0;
ret = vfio_pin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr, ret = vfio_pin_pages(vdev, pa->pa_iova_pfn, pa->pa_nr,
IOMMU_READ | IOMMU_WRITE, pa->pa_pfn); IOMMU_READ | IOMMU_WRITE, pa->pa_pfn);
if (ret < 0) { if (ret < 0) {
goto err_out; goto err_out;
} else if (ret > 0 && ret != pa->pa_nr) { } else if (ret > 0 && ret != pa->pa_nr) {
vfio_unpin_pages(mdev, pa->pa_iova_pfn, ret); vfio_unpin_pages(vdev, pa->pa_iova_pfn, ret);
ret = -EINVAL; ret = -EINVAL;
goto err_out; goto err_out;
} }
...@@ -122,11 +123,11 @@ static int pfn_array_pin(struct pfn_array *pa, struct device *mdev) ...@@ -122,11 +123,11 @@ static int pfn_array_pin(struct pfn_array *pa, struct device *mdev)
} }
/* Unpin the pages before releasing the memory. */ /* Unpin the pages before releasing the memory. */
static void pfn_array_unpin_free(struct pfn_array *pa, struct device *mdev) static void pfn_array_unpin_free(struct pfn_array *pa, struct vfio_device *vdev)
{ {
/* Only unpin if any pages were pinned to begin with */ /* Only unpin if any pages were pinned to begin with */
if (pa->pa_nr) if (pa->pa_nr)
vfio_unpin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr); vfio_unpin_pages(vdev, pa->pa_iova_pfn, pa->pa_nr);
pa->pa_nr = 0; pa->pa_nr = 0;
kfree(pa->pa_iova_pfn); kfree(pa->pa_iova_pfn);
} }
...@@ -190,8 +191,7 @@ static void convert_ccw0_to_ccw1(struct ccw1 *source, unsigned long len) ...@@ -190,8 +191,7 @@ static void convert_ccw0_to_ccw1(struct ccw1 *source, unsigned long len)
* Within the domain (@mdev), copy @n bytes from a guest physical * Within the domain (@mdev), copy @n bytes from a guest physical
* address (@iova) to a host physical address (@to). * address (@iova) to a host physical address (@to).
*/ */
static long copy_from_iova(struct device *mdev, static long copy_from_iova(struct vfio_device *vdev, void *to, u64 iova,
void *to, u64 iova,
unsigned long n) unsigned long n)
{ {
struct pfn_array pa = {0}; struct pfn_array pa = {0};
...@@ -203,9 +203,9 @@ static long copy_from_iova(struct device *mdev, ...@@ -203,9 +203,9 @@ static long copy_from_iova(struct device *mdev,
if (ret < 0) if (ret < 0)
return ret; return ret;
ret = pfn_array_pin(&pa, mdev); ret = pfn_array_pin(&pa, vdev);
if (ret < 0) { if (ret < 0) {
pfn_array_unpin_free(&pa, mdev); pfn_array_unpin_free(&pa, vdev);
return ret; return ret;
} }
...@@ -226,7 +226,7 @@ static long copy_from_iova(struct device *mdev, ...@@ -226,7 +226,7 @@ static long copy_from_iova(struct device *mdev,
break; break;
} }
pfn_array_unpin_free(&pa, mdev); pfn_array_unpin_free(&pa, vdev);
return l; return l;
} }
...@@ -423,11 +423,13 @@ static int ccwchain_loop_tic(struct ccwchain *chain, ...@@ -423,11 +423,13 @@ static int ccwchain_loop_tic(struct ccwchain *chain,
static int ccwchain_handle_ccw(u32 cda, struct channel_program *cp) static int ccwchain_handle_ccw(u32 cda, struct channel_program *cp)
{ {
struct vfio_device *vdev =
&container_of(cp, struct vfio_ccw_private, cp)->vdev;
struct ccwchain *chain; struct ccwchain *chain;
int len, ret; int len, ret;
/* Copy 2K (the most we support today) of possible CCWs */ /* Copy 2K (the most we support today) of possible CCWs */
len = copy_from_iova(cp->mdev, cp->guest_cp, cda, len = copy_from_iova(vdev, cp->guest_cp, cda,
CCWCHAIN_LEN_MAX * sizeof(struct ccw1)); CCWCHAIN_LEN_MAX * sizeof(struct ccw1));
if (len) if (len)
return len; return len;
...@@ -508,6 +510,8 @@ static int ccwchain_fetch_direct(struct ccwchain *chain, ...@@ -508,6 +510,8 @@ static int ccwchain_fetch_direct(struct ccwchain *chain,
int idx, int idx,
struct channel_program *cp) struct channel_program *cp)
{ {
struct vfio_device *vdev =
&container_of(cp, struct vfio_ccw_private, cp)->vdev;
struct ccw1 *ccw; struct ccw1 *ccw;
struct pfn_array *pa; struct pfn_array *pa;
u64 iova; u64 iova;
...@@ -526,7 +530,7 @@ static int ccwchain_fetch_direct(struct ccwchain *chain, ...@@ -526,7 +530,7 @@ static int ccwchain_fetch_direct(struct ccwchain *chain,
if (ccw_is_idal(ccw)) { if (ccw_is_idal(ccw)) {
/* Read first IDAW to see if it's 4K-aligned or not. */ /* Read first IDAW to see if it's 4K-aligned or not. */
/* All subsequent IDAws will be 4K-aligned. */ /* All subsequent IDAws will be 4K-aligned. */
ret = copy_from_iova(cp->mdev, &iova, ccw->cda, sizeof(iova)); ret = copy_from_iova(vdev, &iova, ccw->cda, sizeof(iova));
if (ret) if (ret)
return ret; return ret;
} else { } else {
...@@ -555,7 +559,7 @@ static int ccwchain_fetch_direct(struct ccwchain *chain, ...@@ -555,7 +559,7 @@ static int ccwchain_fetch_direct(struct ccwchain *chain,
if (ccw_is_idal(ccw)) { if (ccw_is_idal(ccw)) {
/* Copy guest IDAL into host IDAL */ /* Copy guest IDAL into host IDAL */
ret = copy_from_iova(cp->mdev, idaws, ccw->cda, idal_len); ret = copy_from_iova(vdev, idaws, ccw->cda, idal_len);
if (ret) if (ret)
goto out_unpin; goto out_unpin;
...@@ -574,7 +578,7 @@ static int ccwchain_fetch_direct(struct ccwchain *chain, ...@@ -574,7 +578,7 @@ static int ccwchain_fetch_direct(struct ccwchain *chain,
} }
if (ccw_does_data_transfer(ccw)) { if (ccw_does_data_transfer(ccw)) {
ret = pfn_array_pin(pa, cp->mdev); ret = pfn_array_pin(pa, vdev);
if (ret < 0) if (ret < 0)
goto out_unpin; goto out_unpin;
} else { } else {
...@@ -590,7 +594,7 @@ static int ccwchain_fetch_direct(struct ccwchain *chain, ...@@ -590,7 +594,7 @@ static int ccwchain_fetch_direct(struct ccwchain *chain,
return 0; return 0;
out_unpin: out_unpin:
pfn_array_unpin_free(pa, cp->mdev); pfn_array_unpin_free(pa, vdev);
out_free_idaws: out_free_idaws:
kfree(idaws); kfree(idaws);
out_init: out_init:
...@@ -632,8 +636,10 @@ static int ccwchain_fetch_one(struct ccwchain *chain, ...@@ -632,8 +636,10 @@ static int ccwchain_fetch_one(struct ccwchain *chain,
* Returns: * Returns:
* %0 on success and a negative error value on failure. * %0 on success and a negative error value on failure.
*/ */
int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb) int cp_init(struct channel_program *cp, union orb *orb)
{ {
struct vfio_device *vdev =
&container_of(cp, struct vfio_ccw_private, cp)->vdev;
/* custom ratelimit used to avoid flood during guest IPL */ /* custom ratelimit used to avoid flood during guest IPL */
static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 1); static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 1);
int ret; int ret;
...@@ -650,11 +656,12 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb) ...@@ -650,11 +656,12 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
* the problem if something does break. * the problem if something does break.
*/ */
if (!orb->cmd.pfch && __ratelimit(&ratelimit_state)) if (!orb->cmd.pfch && __ratelimit(&ratelimit_state))
dev_warn(mdev, "Prefetching channel program even though prefetch not specified in ORB"); dev_warn(
vdev->dev,
"Prefetching channel program even though prefetch not specified in ORB");
INIT_LIST_HEAD(&cp->ccwchain_list); INIT_LIST_HEAD(&cp->ccwchain_list);
memcpy(&cp->orb, orb, sizeof(*orb)); memcpy(&cp->orb, orb, sizeof(*orb));
cp->mdev = mdev;
/* Build a ccwchain for the first CCW segment */ /* Build a ccwchain for the first CCW segment */
ret = ccwchain_handle_ccw(orb->cmd.cpa, cp); ret = ccwchain_handle_ccw(orb->cmd.cpa, cp);
...@@ -682,6 +689,8 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb) ...@@ -682,6 +689,8 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
*/ */
void cp_free(struct channel_program *cp) void cp_free(struct channel_program *cp)
{ {
struct vfio_device *vdev =
&container_of(cp, struct vfio_ccw_private, cp)->vdev;
struct ccwchain *chain, *temp; struct ccwchain *chain, *temp;
int i; int i;
...@@ -691,7 +700,7 @@ void cp_free(struct channel_program *cp) ...@@ -691,7 +700,7 @@ void cp_free(struct channel_program *cp)
cp->initialized = false; cp->initialized = false;
list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) { list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) {
for (i = 0; i < chain->ch_len; i++) { for (i = 0; i < chain->ch_len; i++) {
pfn_array_unpin_free(chain->ch_pa + i, cp->mdev); pfn_array_unpin_free(chain->ch_pa + i, vdev);
ccwchain_cda_free(chain, i); ccwchain_cda_free(chain, i);
} }
ccwchain_free(chain); ccwchain_free(chain);
......
...@@ -37,13 +37,11 @@ ...@@ -37,13 +37,11 @@
struct channel_program { struct channel_program {
struct list_head ccwchain_list; struct list_head ccwchain_list;
union orb orb; union orb orb;
struct device *mdev;
bool initialized; bool initialized;
struct ccw1 *guest_cp; struct ccw1 *guest_cp;
}; };
extern int cp_init(struct channel_program *cp, struct device *mdev, extern int cp_init(struct channel_program *cp, union orb *orb);
union orb *orb);
extern void cp_free(struct channel_program *cp); extern void cp_free(struct channel_program *cp);
extern int cp_prefetch(struct channel_program *cp); extern int cp_prefetch(struct channel_program *cp);
extern union orb *cp_get_orb(struct channel_program *cp, u32 intparm, u8 lpm); extern union orb *cp_get_orb(struct channel_program *cp, u32 intparm, u8 lpm);
......
...@@ -262,8 +262,7 @@ static void fsm_io_request(struct vfio_ccw_private *private, ...@@ -262,8 +262,7 @@ static void fsm_io_request(struct vfio_ccw_private *private,
errstr = "transport mode"; errstr = "transport mode";
goto err_out; goto err_out;
} }
io_region->ret_code = cp_init(&private->cp, mdev_dev(mdev), io_region->ret_code = cp_init(&private->cp, orb);
orb);
if (io_region->ret_code) { if (io_region->ret_code) {
VFIO_CCW_MSG_EVENT(2, VFIO_CCW_MSG_EVENT(2,
"%pUl (%x.%x.%04x): cp_init=%d\n", "%pUl (%x.%x.%04x): cp_init=%d\n",
......
...@@ -183,7 +183,7 @@ static int vfio_ccw_mdev_open_device(struct vfio_device *vdev) ...@@ -183,7 +183,7 @@ static int vfio_ccw_mdev_open_device(struct vfio_device *vdev)
private->nb.notifier_call = vfio_ccw_mdev_notifier; private->nb.notifier_call = vfio_ccw_mdev_notifier;
ret = vfio_register_notifier(vdev->dev, VFIO_IOMMU_NOTIFY, ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY,
&events, &private->nb); &events, &private->nb);
if (ret) if (ret)
return ret; return ret;
...@@ -204,8 +204,7 @@ static int vfio_ccw_mdev_open_device(struct vfio_device *vdev) ...@@ -204,8 +204,7 @@ static int vfio_ccw_mdev_open_device(struct vfio_device *vdev)
out_unregister: out_unregister:
vfio_ccw_unregister_dev_regions(private); vfio_ccw_unregister_dev_regions(private);
vfio_unregister_notifier(vdev->dev, VFIO_IOMMU_NOTIFY, vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, &private->nb);
&private->nb);
return ret; return ret;
} }
...@@ -223,7 +222,7 @@ static void vfio_ccw_mdev_close_device(struct vfio_device *vdev) ...@@ -223,7 +222,7 @@ static void vfio_ccw_mdev_close_device(struct vfio_device *vdev)
cp_free(&private->cp); cp_free(&private->cp);
vfio_ccw_unregister_dev_regions(private); vfio_ccw_unregister_dev_regions(private);
vfio_unregister_notifier(vdev->dev, VFIO_IOMMU_NOTIFY, &private->nb); vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, &private->nb);
} }
static ssize_t vfio_ccw_mdev_read_io_region(struct vfio_ccw_private *private, static ssize_t vfio_ccw_mdev_read_io_region(struct vfio_ccw_private *private,
......
...@@ -124,8 +124,7 @@ static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q) ...@@ -124,8 +124,7 @@ static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q)
q->saved_isc = VFIO_AP_ISC_INVALID; q->saved_isc = VFIO_AP_ISC_INVALID;
} }
if (q->saved_pfn && !WARN_ON(!q->matrix_mdev)) { if (q->saved_pfn && !WARN_ON(!q->matrix_mdev)) {
vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev), vfio_unpin_pages(&q->matrix_mdev->vdev, &q->saved_pfn, 1);
&q->saved_pfn, 1);
q->saved_pfn = 0; q->saved_pfn = 0;
} }
} }
...@@ -258,7 +257,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, ...@@ -258,7 +257,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
return status; return status;
} }
ret = vfio_pin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1, ret = vfio_pin_pages(&q->matrix_mdev->vdev, &g_pfn, 1,
IOMMU_READ | IOMMU_WRITE, &h_pfn); IOMMU_READ | IOMMU_WRITE, &h_pfn);
switch (ret) { switch (ret) {
case 1: case 1:
...@@ -301,7 +300,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, ...@@ -301,7 +300,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
break; break;
case AP_RESPONSE_OTHERWISE_CHANGED: case AP_RESPONSE_OTHERWISE_CHANGED:
/* We could not modify IRQ setings: clear new configuration */ /* We could not modify IRQ setings: clear new configuration */
vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1); vfio_unpin_pages(&q->matrix_mdev->vdev, &g_pfn, 1);
kvm_s390_gisc_unregister(kvm, isc); kvm_s390_gisc_unregister(kvm, isc);
break; break;
default: default:
...@@ -1250,7 +1249,7 @@ static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb, ...@@ -1250,7 +1249,7 @@ static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
struct vfio_iommu_type1_dma_unmap *unmap = data; struct vfio_iommu_type1_dma_unmap *unmap = data;
unsigned long g_pfn = unmap->iova >> PAGE_SHIFT; unsigned long g_pfn = unmap->iova >> PAGE_SHIFT;
vfio_unpin_pages(mdev_dev(matrix_mdev->mdev), &g_pfn, 1); vfio_unpin_pages(&matrix_mdev->vdev, &g_pfn, 1);
return NOTIFY_OK; return NOTIFY_OK;
} }
...@@ -1285,25 +1284,6 @@ static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev) ...@@ -1285,25 +1284,6 @@ static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
} }
} }
static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
int notify_rc = NOTIFY_OK;
struct ap_matrix_mdev *matrix_mdev;
if (action != VFIO_GROUP_NOTIFY_SET_KVM)
return NOTIFY_OK;
matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
if (!data)
vfio_ap_mdev_unset_kvm(matrix_mdev);
else if (vfio_ap_mdev_set_kvm(matrix_mdev, data))
notify_rc = NOTIFY_DONE;
return notify_rc;
}
static struct vfio_ap_queue *vfio_ap_find_queue(int apqn) static struct vfio_ap_queue *vfio_ap_find_queue(int apqn)
{ {
struct device *dev; struct device *dev;
...@@ -1403,25 +1383,23 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev) ...@@ -1403,25 +1383,23 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
unsigned long events; unsigned long events;
int ret; int ret;
matrix_mdev->group_notifier.notifier_call = vfio_ap_mdev_group_notifier; if (!vdev->kvm)
events = VFIO_GROUP_NOTIFY_SET_KVM; return -EINVAL;
ret = vfio_register_notifier(vdev->dev, VFIO_GROUP_NOTIFY, ret = vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm);
&events, &matrix_mdev->group_notifier);
if (ret) if (ret)
return ret; return ret;
matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier; matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier;
events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
ret = vfio_register_notifier(vdev->dev, VFIO_IOMMU_NOTIFY, ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, &events,
&events, &matrix_mdev->iommu_notifier); &matrix_mdev->iommu_notifier);
if (ret) if (ret)
goto out_unregister_group; goto err_kvm;
return 0; return 0;
out_unregister_group: err_kvm:
vfio_unregister_notifier(vdev->dev, VFIO_GROUP_NOTIFY, vfio_ap_mdev_unset_kvm(matrix_mdev);
&matrix_mdev->group_notifier);
return ret; return ret;
} }
...@@ -1430,10 +1408,8 @@ static void vfio_ap_mdev_close_device(struct vfio_device *vdev) ...@@ -1430,10 +1408,8 @@ static void vfio_ap_mdev_close_device(struct vfio_device *vdev)
struct ap_matrix_mdev *matrix_mdev = struct ap_matrix_mdev *matrix_mdev =
container_of(vdev, struct ap_matrix_mdev, vdev); container_of(vdev, struct ap_matrix_mdev, vdev);
vfio_unregister_notifier(vdev->dev, VFIO_IOMMU_NOTIFY, vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY,
&matrix_mdev->iommu_notifier); &matrix_mdev->iommu_notifier);
vfio_unregister_notifier(vdev->dev, VFIO_GROUP_NOTIFY,
&matrix_mdev->group_notifier);
vfio_ap_mdev_unset_kvm(matrix_mdev); vfio_ap_mdev_unset_kvm(matrix_mdev);
} }
......
...@@ -81,8 +81,6 @@ struct ap_matrix { ...@@ -81,8 +81,6 @@ struct ap_matrix {
* @node: allows the ap_matrix_mdev struct to be added to a list * @node: allows the ap_matrix_mdev struct to be added to a list
* @matrix: the adapters, usage domains and control domains assigned to the * @matrix: the adapters, usage domains and control domains assigned to the
* mediated matrix device. * mediated matrix device.
* @group_notifier: notifier block used for specifying callback function for
* handling the VFIO_GROUP_NOTIFY_SET_KVM event
* @iommu_notifier: notifier block used for specifying callback function for * @iommu_notifier: notifier block used for specifying callback function for
* handling the VFIO_IOMMU_NOTIFY_DMA_UNMAP even * handling the VFIO_IOMMU_NOTIFY_DMA_UNMAP even
* @kvm: the struct holding guest's state * @kvm: the struct holding guest's state
...@@ -94,7 +92,6 @@ struct ap_matrix_mdev { ...@@ -94,7 +92,6 @@ struct ap_matrix_mdev {
struct vfio_device vdev; struct vfio_device vdev;
struct list_head node; struct list_head node;
struct ap_matrix matrix; struct ap_matrix matrix;
struct notifier_block group_notifier;
struct notifier_block iommu_notifier; struct notifier_block iommu_notifier;
struct kvm *kvm; struct kvm *kvm;
crypto_hook pqap_hook; crypto_hook pqap_hook;
......
...@@ -337,6 +337,14 @@ static int vf_qm_cache_wb(struct hisi_qm *qm) ...@@ -337,6 +337,14 @@ static int vf_qm_cache_wb(struct hisi_qm *qm)
return 0; return 0;
} }
static struct hisi_acc_vf_core_device *hssi_acc_drvdata(struct pci_dev *pdev)
{
struct vfio_pci_core_device *core_device = dev_get_drvdata(&pdev->dev);
return container_of(core_device, struct hisi_acc_vf_core_device,
core_device);
}
static void vf_qm_fun_reset(struct hisi_acc_vf_core_device *hisi_acc_vdev, static void vf_qm_fun_reset(struct hisi_acc_vf_core_device *hisi_acc_vdev,
struct hisi_qm *qm) struct hisi_qm *qm)
{ {
...@@ -962,7 +970,7 @@ hisi_acc_vfio_pci_get_device_state(struct vfio_device *vdev, ...@@ -962,7 +970,7 @@ hisi_acc_vfio_pci_get_device_state(struct vfio_device *vdev,
static void hisi_acc_vf_pci_aer_reset_done(struct pci_dev *pdev) static void hisi_acc_vf_pci_aer_reset_done(struct pci_dev *pdev)
{ {
struct hisi_acc_vf_core_device *hisi_acc_vdev = dev_get_drvdata(&pdev->dev); struct hisi_acc_vf_core_device *hisi_acc_vdev = hssi_acc_drvdata(pdev);
if (hisi_acc_vdev->core_device.vdev.migration_flags != if (hisi_acc_vdev->core_device.vdev.migration_flags !=
VFIO_MIGRATION_STOP_COPY) VFIO_MIGRATION_STOP_COPY)
...@@ -1274,11 +1282,10 @@ static int hisi_acc_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device ...@@ -1274,11 +1282,10 @@ static int hisi_acc_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device
&hisi_acc_vfio_pci_ops); &hisi_acc_vfio_pci_ops);
} }
dev_set_drvdata(&pdev->dev, &hisi_acc_vdev->core_device);
ret = vfio_pci_core_register_device(&hisi_acc_vdev->core_device); ret = vfio_pci_core_register_device(&hisi_acc_vdev->core_device);
if (ret) if (ret)
goto out_free; goto out_free;
dev_set_drvdata(&pdev->dev, hisi_acc_vdev);
return 0; return 0;
out_free: out_free:
...@@ -1289,7 +1296,7 @@ static int hisi_acc_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device ...@@ -1289,7 +1296,7 @@ static int hisi_acc_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device
static void hisi_acc_vfio_pci_remove(struct pci_dev *pdev) static void hisi_acc_vfio_pci_remove(struct pci_dev *pdev)
{ {
struct hisi_acc_vf_core_device *hisi_acc_vdev = dev_get_drvdata(&pdev->dev); struct hisi_acc_vf_core_device *hisi_acc_vdev = hssi_acc_drvdata(pdev);
vfio_pci_core_unregister_device(&hisi_acc_vdev->core_device); vfio_pci_core_unregister_device(&hisi_acc_vdev->core_device);
vfio_pci_core_uninit_device(&hisi_acc_vdev->core_device); vfio_pci_core_uninit_device(&hisi_acc_vdev->core_device);
...@@ -1316,6 +1323,7 @@ static struct pci_driver hisi_acc_vfio_pci_driver = { ...@@ -1316,6 +1323,7 @@ static struct pci_driver hisi_acc_vfio_pci_driver = {
.probe = hisi_acc_vfio_pci_probe, .probe = hisi_acc_vfio_pci_probe,
.remove = hisi_acc_vfio_pci_remove, .remove = hisi_acc_vfio_pci_remove,
.err_handler = &hisi_acc_vf_err_handlers, .err_handler = &hisi_acc_vf_err_handlers,
.driver_managed_dma = true,
}; };
module_pci_driver(hisi_acc_vfio_pci_driver); module_pci_driver(hisi_acc_vfio_pci_driver);
......
This diff is collapsed.
...@@ -7,12 +7,23 @@ ...@@ -7,12 +7,23 @@
#define MLX5_VFIO_CMD_H #define MLX5_VFIO_CMD_H
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/vfio_pci_core.h>
#include <linux/mlx5/driver.h> #include <linux/mlx5/driver.h>
struct mlx5vf_async_data {
struct mlx5_async_work cb_work;
struct work_struct work;
int status;
u32 pdn;
u32 mkey;
void *out;
};
struct mlx5_vf_migration_file { struct mlx5_vf_migration_file {
struct file *filp; struct file *filp;
struct mutex lock; struct mutex lock;
bool disabled; u8 disabled:1;
u8 is_err:1;
struct sg_append_table table; struct sg_append_table table;
size_t total_length; size_t total_length;
...@@ -22,15 +33,42 @@ struct mlx5_vf_migration_file { ...@@ -22,15 +33,42 @@ struct mlx5_vf_migration_file {
struct scatterlist *last_offset_sg; struct scatterlist *last_offset_sg;
unsigned int sg_last_entry; unsigned int sg_last_entry;
unsigned long last_offset; unsigned long last_offset;
struct mlx5vf_pci_core_device *mvdev;
wait_queue_head_t poll_wait;
struct mlx5_async_ctx async_ctx;
struct mlx5vf_async_data async_data;
};
struct mlx5vf_pci_core_device {
struct vfio_pci_core_device core_device;
int vf_id;
u16 vhca_id;
u8 migrate_cap:1;
u8 deferred_reset:1;
u8 mdev_detach:1;
/* protect migration state */
struct mutex state_mutex;
enum vfio_device_mig_state mig_state;
/* protect the reset_done flow */
spinlock_t reset_lock;
struct mlx5_vf_migration_file *resuming_migf;
struct mlx5_vf_migration_file *saving_migf;
struct workqueue_struct *cb_wq;
struct notifier_block nb;
struct mlx5_core_dev *mdev;
}; };
int mlx5vf_cmd_suspend_vhca(struct pci_dev *pdev, u16 vhca_id, u16 op_mod); int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod);
int mlx5vf_cmd_resume_vhca(struct pci_dev *pdev, u16 vhca_id, u16 op_mod); int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod);
int mlx5vf_cmd_query_vhca_migration_state(struct pci_dev *pdev, u16 vhca_id, int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
size_t *state_size); size_t *state_size);
int mlx5vf_cmd_get_vhca_id(struct pci_dev *pdev, u16 function_id, u16 *vhca_id); void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev);
int mlx5vf_cmd_save_vhca_state(struct pci_dev *pdev, u16 vhca_id, void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev);
int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
struct mlx5_vf_migration_file *migf); struct mlx5_vf_migration_file *migf);
int mlx5vf_cmd_load_vhca_state(struct pci_dev *pdev, u16 vhca_id, int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
struct mlx5_vf_migration_file *migf); struct mlx5_vf_migration_file *migf);
void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev);
void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev);
void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work);
#endif /* MLX5_VFIO_CMD_H */ #endif /* MLX5_VFIO_CMD_H */
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/vfio.h> #include <linux/vfio.h>
#include <linux/sched/mm.h> #include <linux/sched/mm.h>
#include <linux/vfio_pci_core.h>
#include <linux/anon_inodes.h> #include <linux/anon_inodes.h>
#include "cmd.h" #include "cmd.h"
...@@ -25,19 +24,13 @@ ...@@ -25,19 +24,13 @@
/* Arbitrary to prevent userspace from consuming endless memory */ /* Arbitrary to prevent userspace from consuming endless memory */
#define MAX_MIGRATION_SIZE (512*1024*1024) #define MAX_MIGRATION_SIZE (512*1024*1024)
struct mlx5vf_pci_core_device { static struct mlx5vf_pci_core_device *mlx5vf_drvdata(struct pci_dev *pdev)
struct vfio_pci_core_device core_device; {
u16 vhca_id; struct vfio_pci_core_device *core_device = dev_get_drvdata(&pdev->dev);
u8 migrate_cap:1;
u8 deferred_reset:1; return container_of(core_device, struct mlx5vf_pci_core_device,
/* protect migration state */ core_device);
struct mutex state_mutex; }
enum vfio_device_mig_state mig_state;
/* protect the reset_done flow */
spinlock_t reset_lock;
struct mlx5_vf_migration_file *resuming_migf;
struct mlx5_vf_migration_file *saving_migf;
};
static struct page * static struct page *
mlx5vf_get_migration_page(struct mlx5_vf_migration_file *migf, mlx5vf_get_migration_page(struct mlx5_vf_migration_file *migf,
...@@ -149,12 +142,22 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len, ...@@ -149,12 +142,22 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len,
return -ESPIPE; return -ESPIPE;
pos = &filp->f_pos; pos = &filp->f_pos;
if (!(filp->f_flags & O_NONBLOCK)) {
if (wait_event_interruptible(migf->poll_wait,
READ_ONCE(migf->total_length) || migf->is_err))
return -ERESTARTSYS;
}
mutex_lock(&migf->lock); mutex_lock(&migf->lock);
if ((filp->f_flags & O_NONBLOCK) && !READ_ONCE(migf->total_length)) {
done = -EAGAIN;
goto out_unlock;
}
if (*pos > migf->total_length) { if (*pos > migf->total_length) {
done = -EINVAL; done = -EINVAL;
goto out_unlock; goto out_unlock;
} }
if (migf->disabled) { if (migf->disabled || migf->is_err) {
done = -ENODEV; done = -ENODEV;
goto out_unlock; goto out_unlock;
} }
...@@ -194,9 +197,28 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len, ...@@ -194,9 +197,28 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len,
return done; return done;
} }
static __poll_t mlx5vf_save_poll(struct file *filp,
struct poll_table_struct *wait)
{
struct mlx5_vf_migration_file *migf = filp->private_data;
__poll_t pollflags = 0;
poll_wait(filp, &migf->poll_wait, wait);
mutex_lock(&migf->lock);
if (migf->disabled || migf->is_err)
pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
else if (READ_ONCE(migf->total_length))
pollflags = EPOLLIN | EPOLLRDNORM;
mutex_unlock(&migf->lock);
return pollflags;
}
static const struct file_operations mlx5vf_save_fops = { static const struct file_operations mlx5vf_save_fops = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.read = mlx5vf_save_read, .read = mlx5vf_save_read,
.poll = mlx5vf_save_poll,
.release = mlx5vf_release_file, .release = mlx5vf_release_file,
.llseek = no_llseek, .llseek = no_llseek,
}; };
...@@ -222,9 +244,11 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev) ...@@ -222,9 +244,11 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev)
stream_open(migf->filp->f_inode, migf->filp); stream_open(migf->filp->f_inode, migf->filp);
mutex_init(&migf->lock); mutex_init(&migf->lock);
init_waitqueue_head(&migf->poll_wait);
ret = mlx5vf_cmd_query_vhca_migration_state( mlx5_cmd_init_async_ctx(mvdev->mdev, &migf->async_ctx);
mvdev->core_device.pdev, mvdev->vhca_id, &migf->total_length); INIT_WORK(&migf->async_data.work, mlx5vf_mig_file_cleanup_cb);
ret = mlx5vf_cmd_query_vhca_migration_state(mvdev,
&migf->total_length);
if (ret) if (ret)
goto out_free; goto out_free;
...@@ -233,8 +257,8 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev) ...@@ -233,8 +257,8 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev)
if (ret) if (ret)
goto out_free; goto out_free;
ret = mlx5vf_cmd_save_vhca_state(mvdev->core_device.pdev, migf->mvdev = mvdev;
mvdev->vhca_id, migf); ret = mlx5vf_cmd_save_vhca_state(mvdev, migf);
if (ret) if (ret)
goto out_free; goto out_free;
return migf; return migf;
...@@ -339,7 +363,7 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev) ...@@ -339,7 +363,7 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
return migf; return migf;
} }
static void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev) void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev)
{ {
if (mvdev->resuming_migf) { if (mvdev->resuming_migf) {
mlx5vf_disable_fd(mvdev->resuming_migf); mlx5vf_disable_fd(mvdev->resuming_migf);
...@@ -347,6 +371,8 @@ static void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev) ...@@ -347,6 +371,8 @@ static void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev)
mvdev->resuming_migf = NULL; mvdev->resuming_migf = NULL;
} }
if (mvdev->saving_migf) { if (mvdev->saving_migf) {
mlx5_cmd_cleanup_async_ctx(&mvdev->saving_migf->async_ctx);
cancel_work_sync(&mvdev->saving_migf->async_data.work);
mlx5vf_disable_fd(mvdev->saving_migf); mlx5vf_disable_fd(mvdev->saving_migf);
fput(mvdev->saving_migf->filp); fput(mvdev->saving_migf->filp);
mvdev->saving_migf = NULL; mvdev->saving_migf = NULL;
...@@ -361,8 +387,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, ...@@ -361,8 +387,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
int ret; int ret;
if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_STOP) { if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_STOP) {
ret = mlx5vf_cmd_suspend_vhca( ret = mlx5vf_cmd_suspend_vhca(mvdev,
mvdev->core_device.pdev, mvdev->vhca_id,
MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_RESPONDER); MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_RESPONDER);
if (ret) if (ret)
return ERR_PTR(ret); return ERR_PTR(ret);
...@@ -370,8 +395,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, ...@@ -370,8 +395,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
} }
if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RUNNING_P2P) { if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RUNNING_P2P) {
ret = mlx5vf_cmd_resume_vhca( ret = mlx5vf_cmd_resume_vhca(mvdev,
mvdev->core_device.pdev, mvdev->vhca_id,
MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_RESPONDER); MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_RESPONDER);
if (ret) if (ret)
return ERR_PTR(ret); return ERR_PTR(ret);
...@@ -379,8 +403,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, ...@@ -379,8 +403,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
} }
if (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) { if (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) {
ret = mlx5vf_cmd_suspend_vhca( ret = mlx5vf_cmd_suspend_vhca(mvdev,
mvdev->core_device.pdev, mvdev->vhca_id,
MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_INITIATOR); MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_INITIATOR);
if (ret) if (ret)
return ERR_PTR(ret); return ERR_PTR(ret);
...@@ -388,8 +411,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, ...@@ -388,8 +411,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
} }
if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) { if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) {
ret = mlx5vf_cmd_resume_vhca( ret = mlx5vf_cmd_resume_vhca(mvdev,
mvdev->core_device.pdev, mvdev->vhca_id,
MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_INITIATOR); MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_INITIATOR);
if (ret) if (ret)
return ERR_PTR(ret); return ERR_PTR(ret);
...@@ -424,8 +446,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, ...@@ -424,8 +446,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
} }
if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) { if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
ret = mlx5vf_cmd_load_vhca_state(mvdev->core_device.pdev, ret = mlx5vf_cmd_load_vhca_state(mvdev,
mvdev->vhca_id,
mvdev->resuming_migf); mvdev->resuming_migf);
if (ret) if (ret)
return ERR_PTR(ret); return ERR_PTR(ret);
...@@ -444,7 +465,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, ...@@ -444,7 +465,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
* This function is called in all state_mutex unlock cases to * This function is called in all state_mutex unlock cases to
* handle a 'deferred_reset' if exists. * handle a 'deferred_reset' if exists.
*/ */
static void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev) void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev)
{ {
again: again:
spin_lock(&mvdev->reset_lock); spin_lock(&mvdev->reset_lock);
...@@ -505,7 +526,7 @@ static int mlx5vf_pci_get_device_state(struct vfio_device *vdev, ...@@ -505,7 +526,7 @@ static int mlx5vf_pci_get_device_state(struct vfio_device *vdev,
static void mlx5vf_pci_aer_reset_done(struct pci_dev *pdev) static void mlx5vf_pci_aer_reset_done(struct pci_dev *pdev)
{ {
struct mlx5vf_pci_core_device *mvdev = dev_get_drvdata(&pdev->dev); struct mlx5vf_pci_core_device *mvdev = mlx5vf_drvdata(pdev);
if (!mvdev->migrate_cap) if (!mvdev->migrate_cap)
return; return;
...@@ -532,34 +553,16 @@ static int mlx5vf_pci_open_device(struct vfio_device *core_vdev) ...@@ -532,34 +553,16 @@ static int mlx5vf_pci_open_device(struct vfio_device *core_vdev)
struct mlx5vf_pci_core_device *mvdev = container_of( struct mlx5vf_pci_core_device *mvdev = container_of(
core_vdev, struct mlx5vf_pci_core_device, core_device.vdev); core_vdev, struct mlx5vf_pci_core_device, core_device.vdev);
struct vfio_pci_core_device *vdev = &mvdev->core_device; struct vfio_pci_core_device *vdev = &mvdev->core_device;
int vf_id;
int ret; int ret;
ret = vfio_pci_core_enable(vdev); ret = vfio_pci_core_enable(vdev);
if (ret) if (ret)
return ret; return ret;
if (!mvdev->migrate_cap) { if (mvdev->migrate_cap)
vfio_pci_core_finish_enable(vdev); mvdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
return 0;
}
vf_id = pci_iov_vf_id(vdev->pdev);
if (vf_id < 0) {
ret = vf_id;
goto out_disable;
}
ret = mlx5vf_cmd_get_vhca_id(vdev->pdev, vf_id + 1, &mvdev->vhca_id);
if (ret)
goto out_disable;
mvdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
vfio_pci_core_finish_enable(vdev); vfio_pci_core_finish_enable(vdev);
return 0; return 0;
out_disable:
vfio_pci_core_disable(vdev);
return ret;
} }
static void mlx5vf_pci_close_device(struct vfio_device *core_vdev) static void mlx5vf_pci_close_device(struct vfio_device *core_vdev)
...@@ -596,32 +599,15 @@ static int mlx5vf_pci_probe(struct pci_dev *pdev, ...@@ -596,32 +599,15 @@ static int mlx5vf_pci_probe(struct pci_dev *pdev,
if (!mvdev) if (!mvdev)
return -ENOMEM; return -ENOMEM;
vfio_pci_core_init_device(&mvdev->core_device, pdev, &mlx5vf_pci_ops); vfio_pci_core_init_device(&mvdev->core_device, pdev, &mlx5vf_pci_ops);
mlx5vf_cmd_set_migratable(mvdev);
if (pdev->is_virtfn) { dev_set_drvdata(&pdev->dev, &mvdev->core_device);
struct mlx5_core_dev *mdev =
mlx5_vf_get_core_dev(pdev);
if (mdev) {
if (MLX5_CAP_GEN(mdev, migration)) {
mvdev->migrate_cap = 1;
mvdev->core_device.vdev.migration_flags =
VFIO_MIGRATION_STOP_COPY |
VFIO_MIGRATION_P2P;
mutex_init(&mvdev->state_mutex);
spin_lock_init(&mvdev->reset_lock);
}
mlx5_vf_put_core_dev(mdev);
}
}
ret = vfio_pci_core_register_device(&mvdev->core_device); ret = vfio_pci_core_register_device(&mvdev->core_device);
if (ret) if (ret)
goto out_free; goto out_free;
dev_set_drvdata(&pdev->dev, mvdev);
return 0; return 0;
out_free: out_free:
mlx5vf_cmd_remove_migratable(mvdev);
vfio_pci_core_uninit_device(&mvdev->core_device); vfio_pci_core_uninit_device(&mvdev->core_device);
kfree(mvdev); kfree(mvdev);
return ret; return ret;
...@@ -629,9 +615,10 @@ static int mlx5vf_pci_probe(struct pci_dev *pdev, ...@@ -629,9 +615,10 @@ static int mlx5vf_pci_probe(struct pci_dev *pdev,
static void mlx5vf_pci_remove(struct pci_dev *pdev) static void mlx5vf_pci_remove(struct pci_dev *pdev)
{ {
struct mlx5vf_pci_core_device *mvdev = dev_get_drvdata(&pdev->dev); struct mlx5vf_pci_core_device *mvdev = mlx5vf_drvdata(pdev);
vfio_pci_core_unregister_device(&mvdev->core_device); vfio_pci_core_unregister_device(&mvdev->core_device);
mlx5vf_cmd_remove_migratable(mvdev);
vfio_pci_core_uninit_device(&mvdev->core_device); vfio_pci_core_uninit_device(&mvdev->core_device);
kfree(mvdev); kfree(mvdev);
} }
...@@ -654,6 +641,7 @@ static struct pci_driver mlx5vf_pci_driver = { ...@@ -654,6 +641,7 @@ static struct pci_driver mlx5vf_pci_driver = {
.probe = mlx5vf_pci_probe, .probe = mlx5vf_pci_probe,
.remove = mlx5vf_pci_remove, .remove = mlx5vf_pci_remove,
.err_handler = &mlx5vf_err_handlers, .err_handler = &mlx5vf_err_handlers,
.driver_managed_dma = true,
}; };
static void __exit mlx5vf_pci_cleanup(void) static void __exit mlx5vf_pci_cleanup(void)
......
...@@ -151,10 +151,10 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) ...@@ -151,10 +151,10 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return -ENOMEM; return -ENOMEM;
vfio_pci_core_init_device(vdev, pdev, &vfio_pci_ops); vfio_pci_core_init_device(vdev, pdev, &vfio_pci_ops);
dev_set_drvdata(&pdev->dev, vdev);
ret = vfio_pci_core_register_device(vdev); ret = vfio_pci_core_register_device(vdev);
if (ret) if (ret)
goto out_free; goto out_free;
dev_set_drvdata(&pdev->dev, vdev);
return 0; return 0;
out_free: out_free:
...@@ -174,10 +174,12 @@ static void vfio_pci_remove(struct pci_dev *pdev) ...@@ -174,10 +174,12 @@ static void vfio_pci_remove(struct pci_dev *pdev)
static int vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn) static int vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn)
{ {
struct vfio_pci_core_device *vdev = dev_get_drvdata(&pdev->dev);
if (!enable_sriov) if (!enable_sriov)
return -ENOENT; return -ENOENT;
return vfio_pci_core_sriov_configure(pdev, nr_virtfn); return vfio_pci_core_sriov_configure(vdev, nr_virtfn);
} }
static const struct pci_device_id vfio_pci_table[] = { static const struct pci_device_id vfio_pci_table[] = {
......
...@@ -402,11 +402,14 @@ bool __vfio_pci_memory_enabled(struct vfio_pci_core_device *vdev) ...@@ -402,11 +402,14 @@ bool __vfio_pci_memory_enabled(struct vfio_pci_core_device *vdev)
u16 cmd = le16_to_cpu(*(__le16 *)&vdev->vconfig[PCI_COMMAND]); u16 cmd = le16_to_cpu(*(__le16 *)&vdev->vconfig[PCI_COMMAND]);
/* /*
* Memory region cannot be accessed if device power state is D3.
*
* SR-IOV VF memory enable is handled by the MSE bit in the * SR-IOV VF memory enable is handled by the MSE bit in the
* PF SR-IOV capability, there's therefore no need to trigger * PF SR-IOV capability, there's therefore no need to trigger
* faults based on the virtual value. * faults based on the virtual value.
*/ */
return pdev->no_command_memory || (cmd & PCI_COMMAND_MEMORY); return pdev->current_state < PCI_D3hot &&
(pdev->no_command_memory || (cmd & PCI_COMMAND_MEMORY));
} }
/* /*
...@@ -692,6 +695,22 @@ static int __init init_pci_cap_basic_perm(struct perm_bits *perm) ...@@ -692,6 +695,22 @@ static int __init init_pci_cap_basic_perm(struct perm_bits *perm)
return 0; return 0;
} }
/*
* It takes all the required locks to protect the access of power related
* variables and then invokes vfio_pci_set_power_state().
*/
static void vfio_lock_and_set_power_state(struct vfio_pci_core_device *vdev,
pci_power_t state)
{
if (state >= PCI_D3hot)
vfio_pci_zap_and_down_write_memory_lock(vdev);
else
down_write(&vdev->memory_lock);
vfio_pci_set_power_state(vdev, state);
up_write(&vdev->memory_lock);
}
static int vfio_pm_config_write(struct vfio_pci_core_device *vdev, int pos, static int vfio_pm_config_write(struct vfio_pci_core_device *vdev, int pos,
int count, struct perm_bits *perm, int count, struct perm_bits *perm,
int offset, __le32 val) int offset, __le32 val)
...@@ -718,7 +737,7 @@ static int vfio_pm_config_write(struct vfio_pci_core_device *vdev, int pos, ...@@ -718,7 +737,7 @@ static int vfio_pm_config_write(struct vfio_pci_core_device *vdev, int pos,
break; break;
} }
vfio_pci_set_power_state(vdev, state); vfio_lock_and_set_power_state(vdev, state);
} }
return count; return count;
...@@ -738,12 +757,29 @@ static int __init init_pci_cap_pm_perm(struct perm_bits *perm) ...@@ -738,12 +757,29 @@ static int __init init_pci_cap_pm_perm(struct perm_bits *perm)
*/ */
p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE); p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE);
/*
* The guests can't process PME events. If any PME event will be
* generated, then it will be mostly handled in the host and the
* host will clear the PME_STATUS. So virtualize PME_Support bits.
* The vconfig bits will be cleared during device capability
* initialization.
*/
p_setw(perm, PCI_PM_PMC, PCI_PM_CAP_PME_MASK, NO_WRITE);
/* /*
* Power management is defined *per function*, so we can let * Power management is defined *per function*, so we can let
* the user change power state, but we trap and initiate the * the user change power state, but we trap and initiate the
* change ourselves, so the state bits are read-only. * change ourselves, so the state bits are read-only.
*
* The guest can't process PME from D3cold so virtualize PME_Status
* and PME_En bits. The vconfig bits will be cleared during device
* capability initialization.
*/ */
p_setd(perm, PCI_PM_CTRL, NO_VIRT, ~PCI_PM_CTRL_STATE_MASK); p_setd(perm, PCI_PM_CTRL,
PCI_PM_CTRL_PME_ENABLE | PCI_PM_CTRL_PME_STATUS,
~(PCI_PM_CTRL_PME_ENABLE | PCI_PM_CTRL_PME_STATUS |
PCI_PM_CTRL_STATE_MASK));
return 0; return 0;
} }
...@@ -1412,6 +1448,17 @@ static int vfio_ext_cap_len(struct vfio_pci_core_device *vdev, u16 ecap, u16 epo ...@@ -1412,6 +1448,17 @@ static int vfio_ext_cap_len(struct vfio_pci_core_device *vdev, u16 ecap, u16 epo
return 0; return 0;
} }
static void vfio_update_pm_vconfig_bytes(struct vfio_pci_core_device *vdev,
int offset)
{
__le16 *pmc = (__le16 *)&vdev->vconfig[offset + PCI_PM_PMC];
__le16 *ctrl = (__le16 *)&vdev->vconfig[offset + PCI_PM_CTRL];
/* Clear vconfig PME_Support, PME_Status, and PME_En bits */
*pmc &= ~cpu_to_le16(PCI_PM_CAP_PME_MASK);
*ctrl &= ~cpu_to_le16(PCI_PM_CTRL_PME_ENABLE | PCI_PM_CTRL_PME_STATUS);
}
static int vfio_fill_vconfig_bytes(struct vfio_pci_core_device *vdev, static int vfio_fill_vconfig_bytes(struct vfio_pci_core_device *vdev,
int offset, int size) int offset, int size)
{ {
...@@ -1535,6 +1582,9 @@ static int vfio_cap_init(struct vfio_pci_core_device *vdev) ...@@ -1535,6 +1582,9 @@ static int vfio_cap_init(struct vfio_pci_core_device *vdev)
if (ret) if (ret)
return ret; return ret;
if (cap == PCI_CAP_ID_PM)
vfio_update_pm_vconfig_bytes(vdev, pos);
prev = &vdev->vconfig[pos + PCI_CAP_LIST_NEXT]; prev = &vdev->vconfig[pos + PCI_CAP_LIST_NEXT];
pos = next; pos = next;
caps++; caps++;
......
This diff is collapsed.
This diff is collapsed.
...@@ -447,6 +447,11 @@ struct mlx5_qp_table { ...@@ -447,6 +447,11 @@ struct mlx5_qp_table {
struct radix_tree_root tree; struct radix_tree_root tree;
}; };
enum {
MLX5_PF_NOTIFY_DISABLE_VF,
MLX5_PF_NOTIFY_ENABLE_VF,
};
struct mlx5_vf_context { struct mlx5_vf_context {
int enabled; int enabled;
u64 port_guid; u64 port_guid;
...@@ -457,6 +462,7 @@ struct mlx5_vf_context { ...@@ -457,6 +462,7 @@ struct mlx5_vf_context {
u8 port_guid_valid:1; u8 port_guid_valid:1;
u8 node_guid_valid:1; u8 node_guid_valid:1;
enum port_state_policy policy; enum port_state_policy policy;
struct blocking_notifier_head notifier;
}; };
struct mlx5_core_sriov { struct mlx5_core_sriov {
...@@ -1162,6 +1168,12 @@ int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type ...@@ -1162,6 +1168,12 @@ int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type
struct mlx5_core_dev *mlx5_vf_get_core_dev(struct pci_dev *pdev); struct mlx5_core_dev *mlx5_vf_get_core_dev(struct pci_dev *pdev);
void mlx5_vf_put_core_dev(struct mlx5_core_dev *mdev); void mlx5_vf_put_core_dev(struct mlx5_core_dev *mdev);
int mlx5_sriov_blocking_notifier_register(struct mlx5_core_dev *mdev,
int vf_id,
struct notifier_block *nb);
void mlx5_sriov_blocking_notifier_unregister(struct mlx5_core_dev *mdev,
int vf_id,
struct notifier_block *nb);
#ifdef CONFIG_MLX5_CORE_IPOIB #ifdef CONFIG_MLX5_CORE_IPOIB
struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
struct ib_device *ibdev, struct ib_device *ibdev,
......
...@@ -15,6 +15,8 @@ ...@@ -15,6 +15,8 @@
#include <linux/poll.h> #include <linux/poll.h>
#include <uapi/linux/vfio.h> #include <uapi/linux/vfio.h>
struct kvm;
/* /*
* VFIO devices can be placed in a set, this allows all devices to share this * VFIO devices can be placed in a set, this allows all devices to share this
* structure and the VFIO core will provide a lock that is held around * structure and the VFIO core will provide a lock that is held around
...@@ -34,6 +36,8 @@ struct vfio_device { ...@@ -34,6 +36,8 @@ struct vfio_device {
struct vfio_device_set *dev_set; struct vfio_device_set *dev_set;
struct list_head dev_set_list; struct list_head dev_set_list;
unsigned int migration_flags; unsigned int migration_flags;
/* Driver must reference the kvm during open_device or never touch it */
struct kvm *kvm;
/* Members below here are private, not for driver use */ /* Members below here are private, not for driver use */
refcount_t refcount; refcount_t refcount;
...@@ -125,8 +129,6 @@ void vfio_uninit_group_dev(struct vfio_device *device); ...@@ -125,8 +129,6 @@ void vfio_uninit_group_dev(struct vfio_device *device);
int vfio_register_group_dev(struct vfio_device *device); int vfio_register_group_dev(struct vfio_device *device);
int vfio_register_emulated_iommu_dev(struct vfio_device *device); int vfio_register_emulated_iommu_dev(struct vfio_device *device);
void vfio_unregister_group_dev(struct vfio_device *device); void vfio_unregister_group_dev(struct vfio_device *device);
extern struct vfio_device *vfio_device_get_from_dev(struct device *dev);
extern void vfio_device_put(struct vfio_device *device);
int vfio_assign_device_set(struct vfio_device *device, void *set_id); int vfio_assign_device_set(struct vfio_device *device, void *set_id);
...@@ -138,56 +140,36 @@ int vfio_mig_get_next_state(struct vfio_device *device, ...@@ -138,56 +140,36 @@ int vfio_mig_get_next_state(struct vfio_device *device,
/* /*
* External user API * External user API
*/ */
extern struct vfio_group *vfio_group_get_external_user(struct file *filep); extern struct iommu_group *vfio_file_iommu_group(struct file *file);
extern void vfio_group_put_external_user(struct vfio_group *group); extern bool vfio_file_enforced_coherent(struct file *file);
extern struct vfio_group *vfio_group_get_external_user_from_dev(struct device extern void vfio_file_set_kvm(struct file *file, struct kvm *kvm);
*dev); extern bool vfio_file_has_dev(struct file *file, struct vfio_device *device);
extern bool vfio_external_group_match_file(struct vfio_group *group,
struct file *filep);
extern int vfio_external_user_iommu_id(struct vfio_group *group);
extern long vfio_external_check_extension(struct vfio_group *group,
unsigned long arg);
#define VFIO_PIN_PAGES_MAX_ENTRIES (PAGE_SIZE/sizeof(unsigned long)) #define VFIO_PIN_PAGES_MAX_ENTRIES (PAGE_SIZE/sizeof(unsigned long))
extern int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, extern int vfio_pin_pages(struct vfio_device *device, unsigned long *user_pfn,
int npage, int prot, unsigned long *phys_pfn); int npage, int prot, unsigned long *phys_pfn);
extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, extern int vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn,
int npage); int npage);
extern int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova,
extern int vfio_group_pin_pages(struct vfio_group *group,
unsigned long *user_iova_pfn, int npage,
int prot, unsigned long *phys_pfn);
extern int vfio_group_unpin_pages(struct vfio_group *group,
unsigned long *user_iova_pfn, int npage);
extern int vfio_dma_rw(struct vfio_group *group, dma_addr_t user_iova,
void *data, size_t len, bool write); void *data, size_t len, bool write);
extern struct iommu_domain *vfio_group_iommu_domain(struct vfio_group *group);
/* each type has independent events */ /* each type has independent events */
enum vfio_notify_type { enum vfio_notify_type {
VFIO_IOMMU_NOTIFY = 0, VFIO_IOMMU_NOTIFY = 0,
VFIO_GROUP_NOTIFY = 1,
}; };
/* events for VFIO_IOMMU_NOTIFY */ /* events for VFIO_IOMMU_NOTIFY */
#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0) #define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0)
/* events for VFIO_GROUP_NOTIFY */ extern int vfio_register_notifier(struct vfio_device *device,
#define VFIO_GROUP_NOTIFY_SET_KVM BIT(0)
extern int vfio_register_notifier(struct device *dev,
enum vfio_notify_type type, enum vfio_notify_type type,
unsigned long *required_events, unsigned long *required_events,
struct notifier_block *nb); struct notifier_block *nb);
extern int vfio_unregister_notifier(struct device *dev, extern int vfio_unregister_notifier(struct vfio_device *device,
enum vfio_notify_type type, enum vfio_notify_type type,
struct notifier_block *nb); struct notifier_block *nb);
struct kvm;
extern void vfio_group_set_kvm(struct vfio_group *group, struct kvm *kvm);
/* /*
* Sub-module helpers * Sub-module helpers
......
...@@ -227,8 +227,9 @@ void vfio_pci_core_init_device(struct vfio_pci_core_device *vdev, ...@@ -227,8 +227,9 @@ void vfio_pci_core_init_device(struct vfio_pci_core_device *vdev,
int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev); int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev);
void vfio_pci_core_uninit_device(struct vfio_pci_core_device *vdev); void vfio_pci_core_uninit_device(struct vfio_pci_core_device *vdev);
void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev); void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev);
int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn);
extern const struct pci_error_handlers vfio_pci_core_err_handlers; extern const struct pci_error_handlers vfio_pci_core_err_handlers;
int vfio_pci_core_sriov_configure(struct vfio_pci_core_device *vdev,
int nr_virtfn);
long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd, long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
unsigned long arg); unsigned long arg);
int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags, int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags,
......
...@@ -643,7 +643,7 @@ enum { ...@@ -643,7 +643,7 @@ enum {
}; };
/** /**
* VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IORW(VFIO_TYPE, VFIO_BASE + 12, * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 12,
* struct vfio_pci_hot_reset_info) * struct vfio_pci_hot_reset_info)
* *
* Return: 0 on success, -errno on failure: * Return: 0 on success, -errno on failure:
...@@ -770,7 +770,7 @@ struct vfio_device_ioeventfd { ...@@ -770,7 +770,7 @@ struct vfio_device_ioeventfd {
#define VFIO_DEVICE_IOEVENTFD _IO(VFIO_TYPE, VFIO_BASE + 16) #define VFIO_DEVICE_IOEVENTFD _IO(VFIO_TYPE, VFIO_BASE + 16)
/** /**
* VFIO_DEVICE_FEATURE - _IORW(VFIO_TYPE, VFIO_BASE + 17, * VFIO_DEVICE_FEATURE - _IOWR(VFIO_TYPE, VFIO_BASE + 17,
* struct vfio_device_feature) * struct vfio_device_feature)
* *
* Get, set, or probe feature data of the device. The feature is selected * Get, set, or probe feature data of the device. The feature is selected
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment