Commit 6a7e448c authored by Brett Creeley's avatar Brett Creeley Committed by Alex Williamson

vfio/pds: Refactor/simplify reset logic

The current logic for handling resets is more complicated than it needs
to be. The deferred_reset flag is used to indicate a reset is needed
and the deferred_reset_state is the requested, post-reset, state.

Also, the deferred_reset logic was added to vfio migration drivers to
prevent a circular locking dependency with respect to mm_lock and state
mutex. This is mainly because of the copy_to/from_user() functions(which
takes mm_lock) invoked under state mutex.

Remove all of the deferred reset logic and just pass the requested
next state to pds_vfio_reset() so it can be used for VMM and DSC
initiated resets.

This removes the need for pds_vfio_state_mutex_lock(), so remove that
and replace its use with a simple mutex_unlock().

Also, remove the reset_mutex as it's no longer needed since the
state_mutex can be the driver's primary protector.
Suggested-by: default avatarShameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Reviewed-by: default avatarShannon Nelson <shannon.nelson@amd.com>
Signed-off-by: default avatarBrett Creeley <brett.creeley@amd.com>
Link: https://lore.kernel.org/r/20240308182149.22036-3-brett.creeley@amd.comSigned-off-by: default avatarAlex Williamson <alex.williamson@redhat.com>
parent 457f7308
......@@ -607,7 +607,7 @@ int pds_vfio_dma_logging_report(struct vfio_device *vdev, unsigned long iova,
mutex_lock(&pds_vfio->state_mutex);
err = pds_vfio_dirty_sync(pds_vfio, dirty, iova, length);
pds_vfio_state_mutex_unlock(pds_vfio);
mutex_unlock(&pds_vfio->state_mutex);
return err;
}
......@@ -624,7 +624,7 @@ int pds_vfio_dma_logging_start(struct vfio_device *vdev,
mutex_lock(&pds_vfio->state_mutex);
pds_vfio_send_host_vf_lm_status_cmd(pds_vfio, PDS_LM_STA_IN_PROGRESS);
err = pds_vfio_dirty_enable(pds_vfio, ranges, nnodes, page_size);
pds_vfio_state_mutex_unlock(pds_vfio);
mutex_unlock(&pds_vfio->state_mutex);
return err;
}
......@@ -637,7 +637,7 @@ int pds_vfio_dma_logging_stop(struct vfio_device *vdev)
mutex_lock(&pds_vfio->state_mutex);
pds_vfio_dirty_disable(pds_vfio, true);
pds_vfio_state_mutex_unlock(pds_vfio);
mutex_unlock(&pds_vfio->state_mutex);
return 0;
}
......@@ -21,16 +21,13 @@
static void pds_vfio_recovery(struct pds_vfio_pci_device *pds_vfio)
{
bool deferred_reset_needed = false;
/*
* Documentation states that the kernel migration driver must not
* generate asynchronous device state transitions outside of
* manipulation by the user or the VFIO_DEVICE_RESET ioctl.
*
* Since recovery is an asynchronous event received from the device,
* initiate a deferred reset. Issue a deferred reset in the following
* situations:
* initiate a reset in the following situations:
* 1. Migration is in progress, which will cause the next step of
* the migration to fail.
* 2. If the device is in a state that will be set to
......@@ -42,24 +39,8 @@ static void pds_vfio_recovery(struct pds_vfio_pci_device *pds_vfio)
pds_vfio->state != VFIO_DEVICE_STATE_ERROR) ||
(pds_vfio->state == VFIO_DEVICE_STATE_RUNNING &&
pds_vfio_dirty_is_enabled(pds_vfio)))
deferred_reset_needed = true;
pds_vfio_reset(pds_vfio, VFIO_DEVICE_STATE_ERROR);
mutex_unlock(&pds_vfio->state_mutex);
/*
* On the next user initiated state transition, the device will
* transition to the VFIO_DEVICE_STATE_ERROR. At this point it's the user's
* responsibility to reset the device.
*
* If a VFIO_DEVICE_RESET is requested post recovery and before the next
* state transition, then the deferred reset state will be set to
* VFIO_DEVICE_STATE_RUNNING.
*/
if (deferred_reset_needed) {
mutex_lock(&pds_vfio->reset_mutex);
pds_vfio->deferred_reset = true;
pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_ERROR;
mutex_unlock(&pds_vfio->reset_mutex);
}
}
static int pds_vfio_pci_notify_handler(struct notifier_block *nb,
......@@ -185,7 +166,9 @@ static void pds_vfio_pci_aer_reset_done(struct pci_dev *pdev)
{
struct pds_vfio_pci_device *pds_vfio = pds_vfio_pci_drvdata(pdev);
pds_vfio_reset(pds_vfio);
mutex_lock(&pds_vfio->state_mutex);
pds_vfio_reset(pds_vfio, VFIO_DEVICE_STATE_RUNNING);
mutex_unlock(&pds_vfio->state_mutex);
}
static const struct pci_error_handlers pds_vfio_pci_err_handlers = {
......
......@@ -26,37 +26,14 @@ struct pds_vfio_pci_device *pds_vfio_pci_drvdata(struct pci_dev *pdev)
vfio_coredev);
}
void pds_vfio_state_mutex_unlock(struct pds_vfio_pci_device *pds_vfio)
void pds_vfio_reset(struct pds_vfio_pci_device *pds_vfio,
enum vfio_device_mig_state state)
{
again:
mutex_lock(&pds_vfio->reset_mutex);
if (pds_vfio->deferred_reset) {
pds_vfio->deferred_reset = false;
pds_vfio_put_restore_file(pds_vfio);
pds_vfio_put_save_file(pds_vfio);
if (pds_vfio->state == VFIO_DEVICE_STATE_ERROR) {
pds_vfio_dirty_disable(pds_vfio, false);
}
pds_vfio->state = pds_vfio->deferred_reset_state;
pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_RUNNING;
mutex_unlock(&pds_vfio->reset_mutex);
goto again;
}
mutex_unlock(&pds_vfio->state_mutex);
mutex_unlock(&pds_vfio->reset_mutex);
}
void pds_vfio_reset(struct pds_vfio_pci_device *pds_vfio)
{
mutex_lock(&pds_vfio->reset_mutex);
pds_vfio->deferred_reset = true;
pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_RUNNING;
if (!mutex_trylock(&pds_vfio->state_mutex)) {
mutex_unlock(&pds_vfio->reset_mutex);
return;
}
mutex_unlock(&pds_vfio->reset_mutex);
pds_vfio_state_mutex_unlock(pds_vfio);
pds_vfio_put_restore_file(pds_vfio);
pds_vfio_put_save_file(pds_vfio);
if (state == VFIO_DEVICE_STATE_ERROR)
pds_vfio_dirty_disable(pds_vfio, false);
pds_vfio->state = state;
}
static struct file *
......@@ -97,8 +74,7 @@ pds_vfio_set_device_state(struct vfio_device *vdev,
break;
}
}
pds_vfio_state_mutex_unlock(pds_vfio);
/* still waiting on a deferred_reset */
mutex_unlock(&pds_vfio->state_mutex);
if (pds_vfio->state == VFIO_DEVICE_STATE_ERROR)
res = ERR_PTR(-EIO);
......@@ -114,7 +90,7 @@ static int pds_vfio_get_device_state(struct vfio_device *vdev,
mutex_lock(&pds_vfio->state_mutex);
*current_state = pds_vfio->state;
pds_vfio_state_mutex_unlock(pds_vfio);
mutex_unlock(&pds_vfio->state_mutex);
return 0;
}
......@@ -156,7 +132,6 @@ static int pds_vfio_init_device(struct vfio_device *vdev)
pds_vfio->vf_id = vf_id;
mutex_init(&pds_vfio->state_mutex);
mutex_init(&pds_vfio->reset_mutex);
vdev->migration_flags = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P;
vdev->mig_ops = &pds_vfio_lm_ops;
......@@ -178,7 +153,6 @@ static void pds_vfio_release_device(struct vfio_device *vdev)
vfio_coredev.vdev);
mutex_destroy(&pds_vfio->state_mutex);
mutex_destroy(&pds_vfio->reset_mutex);
vfio_pci_core_release_dev(vdev);
}
......@@ -194,7 +168,6 @@ static int pds_vfio_open_device(struct vfio_device *vdev)
return err;
pds_vfio->state = VFIO_DEVICE_STATE_RUNNING;
pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_RUNNING;
vfio_pci_core_finish_enable(&pds_vfio->vfio_coredev);
......
......@@ -18,20 +18,16 @@ struct pds_vfio_pci_device {
struct pds_vfio_dirty dirty;
struct mutex state_mutex; /* protect migration state */
enum vfio_device_mig_state state;
struct mutex reset_mutex; /* protect reset_done flow */
u8 deferred_reset;
enum vfio_device_mig_state deferred_reset_state;
struct notifier_block nb;
int vf_id;
u16 client_id;
};
void pds_vfio_state_mutex_unlock(struct pds_vfio_pci_device *pds_vfio);
const struct vfio_device_ops *pds_vfio_ops_info(void);
struct pds_vfio_pci_device *pds_vfio_pci_drvdata(struct pci_dev *pdev);
void pds_vfio_reset(struct pds_vfio_pci_device *pds_vfio);
void pds_vfio_reset(struct pds_vfio_pci_device *pds_vfio,
enum vfio_device_mig_state state);
struct pci_dev *pds_vfio_to_pci_dev(struct pds_vfio_pci_device *pds_vfio);
struct device *pds_vfio_to_dev(struct pds_vfio_pci_device *pds_vfio);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment