Commit deefd502 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'vfio-v6.7-rc1' of https://github.com/awilliam/linux-vfio

Pull VFIO updates from Alex Williamson:

 - Add support for "chunk mode" in the mlx5-vfio-pci variant driver,
   which allows both larger device image sizes for migration, beyond the
   previous 4GB limit, and also read-ahead support for improved
   migration performance (Yishai Hadas)

 - A new bus master control interface for the CDX bus driver where there
   is no in-band mechanism to toggle device DMA as there is through
   config space on PCI devices (Nipun Gupta)

 - Add explicit alignment directives to vfio data structures to reduce
   the chance of breaking 32-bit userspace. In most cases this is
   transparent and the remaining cases where data structures are padded
   work within the existing rules for extending data structures within
   vfio (Stefan Hajnoczi)

 - Resolve a bug in the cdx bus driver noted when compiled with clang
   where missing parenthesis result in the wrong operation (Nathan
   Chancellor)

 - Resolve errors reported by smatch for a function when dealing with
   invalid inputs (Alex Williamson)

 - Add migration support to the mtty vfio/mdev sample driver for testing
   and integration purposes, allowing CI of migration without specific
   hardware requirements. Also resolve many of the short- comings of
   this driver relative to implementation of the vfio interrupt ioctl
   along the way (Alex Williamson)

* tag 'vfio-v6.7-rc1' of https://github.com/awilliam/linux-vfio:
  vfio/mtty: Enable migration support
  vfio/mtty: Overhaul mtty interrupt handling
  vfio: Fix smatch errors in vfio_combine_iova_ranges()
  vfio/cdx: Add parentheses between bitwise AND expression and logical NOT
  vfio/mlx5: Activate the chunk mode functionality
  vfio/mlx5: Add support for READING in chunk mode
  vfio/mlx5: Add support for SAVING in chunk mode
  vfio/mlx5: Pre-allocate chunks for the STOP_COPY phase
  vfio/mlx5: Rename some stuff to match chunk mode
  vfio/mlx5: Enable querying state size which is > 4GB
  vfio/mlx5: Refactor the SAVE callback to activate a work only upon an error
  vfio/mlx5: Wake up the reader post of disabling the SAVING migration file
  vfio: use __aligned_u64 in struct vfio_device_ioeventfd
  vfio: use __aligned_u64 in struct vfio_device_gfx_plane_info
  vfio: trivially use __aligned_u64 for ioctl structs
  vfio-cdx: add bus mastering device feature support
  vfio: add bus master feature to device feature ioctl
  cdx: add support for bus mastering
parents 009fbfc9 2b88119e
...@@ -182,6 +182,38 @@ cdx_match_id(const struct cdx_device_id *ids, struct cdx_device *dev) ...@@ -182,6 +182,38 @@ cdx_match_id(const struct cdx_device_id *ids, struct cdx_device *dev)
return NULL; return NULL;
} }
int cdx_set_master(struct cdx_device *cdx_dev)
{
struct cdx_controller *cdx = cdx_dev->cdx;
struct cdx_device_config dev_config;
int ret = -EOPNOTSUPP;
dev_config.type = CDX_DEV_BUS_MASTER_CONF;
dev_config.bus_master_enable = true;
if (cdx->ops->dev_configure)
ret = cdx->ops->dev_configure(cdx, cdx_dev->bus_num,
cdx_dev->dev_num, &dev_config);
return ret;
}
EXPORT_SYMBOL_GPL(cdx_set_master);
int cdx_clear_master(struct cdx_device *cdx_dev)
{
struct cdx_controller *cdx = cdx_dev->cdx;
struct cdx_device_config dev_config;
int ret = -EOPNOTSUPP;
dev_config.type = CDX_DEV_BUS_MASTER_CONF;
dev_config.bus_master_enable = false;
if (cdx->ops->dev_configure)
ret = cdx->ops->dev_configure(cdx, cdx_dev->bus_num,
cdx_dev->dev_num, &dev_config);
return ret;
}
EXPORT_SYMBOL_GPL(cdx_clear_master);
/** /**
* cdx_bus_match - device to driver matching callback * cdx_bus_match - device to driver matching callback
* @dev: the cdx device to match against * @dev: the cdx device to match against
......
...@@ -56,6 +56,10 @@ static int cdx_configure_device(struct cdx_controller *cdx, ...@@ -56,6 +56,10 @@ static int cdx_configure_device(struct cdx_controller *cdx,
case CDX_DEV_RESET_CONF: case CDX_DEV_RESET_CONF:
ret = cdx_mcdi_reset_device(cdx->priv, bus_num, dev_num); ret = cdx_mcdi_reset_device(cdx->priv, bus_num, dev_num);
break; break;
case CDX_DEV_BUS_MASTER_CONF:
ret = cdx_mcdi_bus_master_enable(cdx->priv, bus_num, dev_num,
dev_config->bus_master_enable);
break;
default: default:
ret = -EINVAL; ret = -EINVAL;
} }
......
...@@ -137,3 +137,61 @@ int cdx_mcdi_reset_device(struct cdx_mcdi *cdx, u8 bus_num, u8 dev_num) ...@@ -137,3 +137,61 @@ int cdx_mcdi_reset_device(struct cdx_mcdi *cdx, u8 bus_num, u8 dev_num)
return ret; return ret;
} }
static int cdx_mcdi_ctrl_flag_get(struct cdx_mcdi *cdx, u8 bus_num,
u8 dev_num, u32 *flags)
{
MCDI_DECLARE_BUF(inbuf, MC_CMD_CDX_DEVICE_CONTROL_GET_IN_LEN);
MCDI_DECLARE_BUF(outbuf, MC_CMD_CDX_DEVICE_CONTROL_GET_OUT_LEN);
size_t outlen;
int ret;
MCDI_SET_DWORD(inbuf, CDX_DEVICE_CONTROL_GET_IN_BUS, bus_num);
MCDI_SET_DWORD(inbuf, CDX_DEVICE_CONTROL_GET_IN_DEVICE, dev_num);
ret = cdx_mcdi_rpc(cdx, MC_CMD_CDX_DEVICE_CONTROL_GET, inbuf,
sizeof(inbuf), outbuf, sizeof(outbuf), &outlen);
if (ret)
return ret;
if (outlen != MC_CMD_CDX_DEVICE_CONTROL_GET_OUT_LEN)
return -EIO;
*flags = MCDI_DWORD(outbuf, CDX_DEVICE_CONTROL_GET_OUT_FLAGS);
return 0;
}
static int cdx_mcdi_ctrl_flag_set(struct cdx_mcdi *cdx, u8 bus_num,
u8 dev_num, bool enable, int bit_pos)
{
MCDI_DECLARE_BUF(inbuf, MC_CMD_CDX_DEVICE_CONTROL_SET_IN_LEN);
u32 flags;
int ret;
/*
* Get flags and then set/reset bit at bit_pos according to
* the input params.
*/
ret = cdx_mcdi_ctrl_flag_get(cdx, bus_num, dev_num, &flags);
if (ret)
return ret;
flags = flags & (u32)(~(BIT(bit_pos)));
if (enable)
flags |= (1 << bit_pos);
MCDI_SET_DWORD(inbuf, CDX_DEVICE_CONTROL_SET_IN_BUS, bus_num);
MCDI_SET_DWORD(inbuf, CDX_DEVICE_CONTROL_SET_IN_DEVICE, dev_num);
MCDI_SET_DWORD(inbuf, CDX_DEVICE_CONTROL_SET_IN_FLAGS, flags);
ret = cdx_mcdi_rpc(cdx, MC_CMD_CDX_DEVICE_CONTROL_SET, inbuf,
sizeof(inbuf), NULL, 0, NULL);
return ret;
}
int cdx_mcdi_bus_master_enable(struct cdx_mcdi *cdx, u8 bus_num,
u8 dev_num, bool enable)
{
return cdx_mcdi_ctrl_flag_set(cdx, bus_num, dev_num, enable,
MC_CMD_CDX_DEVICE_CONTROL_SET_IN_BUS_MASTER_ENABLE_LBN);
}
...@@ -58,4 +58,17 @@ int cdx_mcdi_get_dev_config(struct cdx_mcdi *cdx, ...@@ -58,4 +58,17 @@ int cdx_mcdi_get_dev_config(struct cdx_mcdi *cdx,
int cdx_mcdi_reset_device(struct cdx_mcdi *cdx, int cdx_mcdi_reset_device(struct cdx_mcdi *cdx,
u8 bus_num, u8 dev_num); u8 bus_num, u8 dev_num);
/**
* cdx_mcdi_bus_master_enable - Set/Reset bus mastering for cdx device
* represented by bus_num:dev_num
* @cdx: pointer to MCDI interface.
* @bus_num: Bus number.
* @dev_num: Device number.
* @enable: Enable bus mastering if set, disable otherwise.
*
* Return: 0 on success, <0 on failure
*/
int cdx_mcdi_bus_master_enable(struct cdx_mcdi *cdx, u8 bus_num,
u8 dev_num, bool enable);
#endif /* CDX_MCDI_FUNCTIONS_H */ #endif /* CDX_MCDI_FUNCTIONS_H */
...@@ -1379,7 +1379,7 @@ static long intel_vgpu_ioctl(struct vfio_device *vfio_dev, unsigned int cmd, ...@@ -1379,7 +1379,7 @@ static long intel_vgpu_ioctl(struct vfio_device *vfio_dev, unsigned int cmd,
intel_gvt_reset_vgpu(vgpu); intel_gvt_reset_vgpu(vgpu);
return 0; return 0;
} else if (cmd == VFIO_DEVICE_QUERY_GFX_PLANE) { } else if (cmd == VFIO_DEVICE_QUERY_GFX_PLANE) {
struct vfio_device_gfx_plane_info dmabuf; struct vfio_device_gfx_plane_info dmabuf = {};
int ret = 0; int ret = 0;
minsz = offsetofend(struct vfio_device_gfx_plane_info, minsz = offsetofend(struct vfio_device_gfx_plane_info,
......
...@@ -14,7 +14,7 @@ static int vfio_cdx_open_device(struct vfio_device *core_vdev) ...@@ -14,7 +14,7 @@ static int vfio_cdx_open_device(struct vfio_device *core_vdev)
container_of(core_vdev, struct vfio_cdx_device, vdev); container_of(core_vdev, struct vfio_cdx_device, vdev);
struct cdx_device *cdx_dev = to_cdx_device(core_vdev->dev); struct cdx_device *cdx_dev = to_cdx_device(core_vdev->dev);
int count = cdx_dev->res_count; int count = cdx_dev->res_count;
int i; int i, ret;
vdev->regions = kcalloc(count, sizeof(struct vfio_cdx_region), vdev->regions = kcalloc(count, sizeof(struct vfio_cdx_region),
GFP_KERNEL_ACCOUNT); GFP_KERNEL_ACCOUNT);
...@@ -39,6 +39,17 @@ static int vfio_cdx_open_device(struct vfio_device *core_vdev) ...@@ -39,6 +39,17 @@ static int vfio_cdx_open_device(struct vfio_device *core_vdev)
if (!(cdx_dev->res[i].flags & IORESOURCE_READONLY)) if (!(cdx_dev->res[i].flags & IORESOURCE_READONLY))
vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_WRITE; vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_WRITE;
} }
ret = cdx_dev_reset(core_vdev->dev);
if (ret) {
kfree(vdev->regions);
vdev->regions = NULL;
return ret;
}
ret = cdx_clear_master(cdx_dev);
if (ret)
vdev->flags &= ~BME_SUPPORT;
else
vdev->flags |= BME_SUPPORT;
return 0; return 0;
} }
...@@ -52,6 +63,49 @@ static void vfio_cdx_close_device(struct vfio_device *core_vdev) ...@@ -52,6 +63,49 @@ static void vfio_cdx_close_device(struct vfio_device *core_vdev)
cdx_dev_reset(core_vdev->dev); cdx_dev_reset(core_vdev->dev);
} }
static int vfio_cdx_bm_ctrl(struct vfio_device *core_vdev, u32 flags,
void __user *arg, size_t argsz)
{
size_t minsz =
offsetofend(struct vfio_device_feature_bus_master, op);
struct vfio_cdx_device *vdev =
container_of(core_vdev, struct vfio_cdx_device, vdev);
struct cdx_device *cdx_dev = to_cdx_device(core_vdev->dev);
struct vfio_device_feature_bus_master ops;
int ret;
if (!(vdev->flags & BME_SUPPORT))
return -ENOTTY;
ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_SET,
sizeof(ops));
if (ret != 1)
return ret;
if (copy_from_user(&ops, arg, minsz))
return -EFAULT;
switch (ops.op) {
case VFIO_DEVICE_FEATURE_CLEAR_MASTER:
return cdx_clear_master(cdx_dev);
case VFIO_DEVICE_FEATURE_SET_MASTER:
return cdx_set_master(cdx_dev);
default:
return -EINVAL;
}
}
static int vfio_cdx_ioctl_feature(struct vfio_device *device, u32 flags,
void __user *arg, size_t argsz)
{
switch (flags & VFIO_DEVICE_FEATURE_MASK) {
case VFIO_DEVICE_FEATURE_BUS_MASTER:
return vfio_cdx_bm_ctrl(device, flags, arg, argsz);
default:
return -ENOTTY;
}
}
static int vfio_cdx_ioctl_get_info(struct vfio_cdx_device *vdev, static int vfio_cdx_ioctl_get_info(struct vfio_cdx_device *vdev,
struct vfio_device_info __user *arg) struct vfio_device_info __user *arg)
{ {
...@@ -169,6 +223,7 @@ static const struct vfio_device_ops vfio_cdx_ops = { ...@@ -169,6 +223,7 @@ static const struct vfio_device_ops vfio_cdx_ops = {
.open_device = vfio_cdx_open_device, .open_device = vfio_cdx_open_device,
.close_device = vfio_cdx_close_device, .close_device = vfio_cdx_close_device,
.ioctl = vfio_cdx_ioctl, .ioctl = vfio_cdx_ioctl,
.device_feature = vfio_cdx_ioctl_feature,
.mmap = vfio_cdx_mmap, .mmap = vfio_cdx_mmap,
.bind_iommufd = vfio_iommufd_physical_bind, .bind_iommufd = vfio_iommufd_physical_bind,
.unbind_iommufd = vfio_iommufd_physical_unbind, .unbind_iommufd = vfio_iommufd_physical_unbind,
......
...@@ -23,6 +23,8 @@ struct vfio_cdx_region { ...@@ -23,6 +23,8 @@ struct vfio_cdx_region {
struct vfio_cdx_device { struct vfio_cdx_device {
struct vfio_device vdev; struct vfio_device vdev;
struct vfio_cdx_region *regions; struct vfio_cdx_region *regions;
u32 flags;
#define BME_SUPPORT BIT(0)
}; };
#endif /* VFIO_CDX_PRIVATE_H */ #endif /* VFIO_CDX_PRIVATE_H */
...@@ -86,7 +86,8 @@ int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod) ...@@ -86,7 +86,8 @@ int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod)
} }
int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
size_t *state_size, u8 query_flags) size_t *state_size, u64 *total_size,
u8 query_flags)
{ {
u32 out[MLX5_ST_SZ_DW(query_vhca_migration_state_out)] = {}; u32 out[MLX5_ST_SZ_DW(query_vhca_migration_state_out)] = {};
u32 in[MLX5_ST_SZ_DW(query_vhca_migration_state_in)] = {}; u32 in[MLX5_ST_SZ_DW(query_vhca_migration_state_in)] = {};
...@@ -128,6 +129,7 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, ...@@ -128,6 +129,7 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
MLX5_SET(query_vhca_migration_state_in, in, op_mod, 0); MLX5_SET(query_vhca_migration_state_in, in, op_mod, 0);
MLX5_SET(query_vhca_migration_state_in, in, incremental, MLX5_SET(query_vhca_migration_state_in, in, incremental,
query_flags & MLX5VF_QUERY_INC); query_flags & MLX5VF_QUERY_INC);
MLX5_SET(query_vhca_migration_state_in, in, chunk, mvdev->chunk_mode);
ret = mlx5_cmd_exec_inout(mvdev->mdev, query_vhca_migration_state, in, ret = mlx5_cmd_exec_inout(mvdev->mdev, query_vhca_migration_state, in,
out); out);
...@@ -139,6 +141,11 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, ...@@ -139,6 +141,11 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
*state_size = MLX5_GET(query_vhca_migration_state_out, out, *state_size = MLX5_GET(query_vhca_migration_state_out, out,
required_umem_size); required_umem_size);
if (total_size)
*total_size = mvdev->chunk_mode ?
MLX5_GET64(query_vhca_migration_state_out, out,
remaining_total_size) : *state_size;
return 0; return 0;
} }
...@@ -254,6 +261,9 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, ...@@ -254,6 +261,9 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
mvdev->core_device.vdev.migration_flags |= mvdev->core_device.vdev.migration_flags |=
VFIO_MIGRATION_PRE_COPY; VFIO_MIGRATION_PRE_COPY;
if (MLX5_CAP_GEN_2(mvdev->mdev, migration_in_chunks))
mvdev->chunk_mode = 1;
end: end:
mlx5_vf_put_core_dev(mvdev->mdev); mlx5_vf_put_core_dev(mvdev->mdev);
} }
...@@ -428,6 +438,7 @@ mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, ...@@ -428,6 +438,7 @@ mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf,
void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf) void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf)
{ {
spin_lock_irq(&buf->migf->list_lock); spin_lock_irq(&buf->migf->list_lock);
buf->stop_copy_chunk_num = 0;
list_add_tail(&buf->buf_elm, &buf->migf->avail_list); list_add_tail(&buf->buf_elm, &buf->migf->avail_list);
spin_unlock_irq(&buf->migf->list_lock); spin_unlock_irq(&buf->migf->list_lock);
} }
...@@ -475,6 +486,15 @@ mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf, ...@@ -475,6 +486,15 @@ mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf,
return buf; return buf;
} }
static void
mlx5vf_save_callback_complete(struct mlx5_vf_migration_file *migf,
struct mlx5vf_async_data *async_data)
{
kvfree(async_data->out);
complete(&migf->save_comp);
fput(migf->filp);
}
void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work) void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work)
{ {
struct mlx5vf_async_data *async_data = container_of(_work, struct mlx5vf_async_data *async_data = container_of(_work,
...@@ -487,16 +507,15 @@ void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work) ...@@ -487,16 +507,15 @@ void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work)
mlx5vf_put_data_buffer(async_data->buf); mlx5vf_put_data_buffer(async_data->buf);
if (async_data->header_buf) if (async_data->header_buf)
mlx5vf_put_data_buffer(async_data->header_buf); mlx5vf_put_data_buffer(async_data->header_buf);
if (async_data->status == MLX5_CMD_STAT_BAD_RES_STATE_ERR) if (!async_data->stop_copy_chunk &&
async_data->status == MLX5_CMD_STAT_BAD_RES_STATE_ERR)
migf->state = MLX5_MIGF_STATE_PRE_COPY_ERROR; migf->state = MLX5_MIGF_STATE_PRE_COPY_ERROR;
else else
migf->state = MLX5_MIGF_STATE_ERROR; migf->state = MLX5_MIGF_STATE_ERROR;
wake_up_interruptible(&migf->poll_wait); wake_up_interruptible(&migf->poll_wait);
} }
mutex_unlock(&migf->lock); mutex_unlock(&migf->lock);
kvfree(async_data->out); mlx5vf_save_callback_complete(migf, async_data);
complete(&migf->save_comp);
fput(migf->filp);
} }
static int add_buf_header(struct mlx5_vhca_data_buffer *header_buf, static int add_buf_header(struct mlx5_vhca_data_buffer *header_buf,
...@@ -536,13 +555,20 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) ...@@ -536,13 +555,20 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context)
struct mlx5_vf_migration_file, async_data); struct mlx5_vf_migration_file, async_data);
if (!status) { if (!status) {
size_t next_required_umem_size = 0;
bool stop_copy_last_chunk;
size_t image_size; size_t image_size;
unsigned long flags; unsigned long flags;
bool initial_pre_copy = migf->state != MLX5_MIGF_STATE_PRE_COPY && bool initial_pre_copy = migf->state != MLX5_MIGF_STATE_PRE_COPY &&
!async_data->last_chunk; !async_data->stop_copy_chunk;
image_size = MLX5_GET(save_vhca_state_out, async_data->out, image_size = MLX5_GET(save_vhca_state_out, async_data->out,
actual_image_size); actual_image_size);
if (async_data->buf->stop_copy_chunk_num)
next_required_umem_size = MLX5_GET(save_vhca_state_out,
async_data->out, next_required_umem_size);
stop_copy_last_chunk = async_data->stop_copy_chunk &&
!next_required_umem_size;
if (async_data->header_buf) { if (async_data->header_buf) {
status = add_buf_header(async_data->header_buf, image_size, status = add_buf_header(async_data->header_buf, image_size,
initial_pre_copy); initial_pre_copy);
...@@ -554,19 +580,34 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) ...@@ -554,19 +580,34 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context)
migf->max_pos += async_data->buf->length; migf->max_pos += async_data->buf->length;
spin_lock_irqsave(&migf->list_lock, flags); spin_lock_irqsave(&migf->list_lock, flags);
list_add_tail(&async_data->buf->buf_elm, &migf->buf_list); list_add_tail(&async_data->buf->buf_elm, &migf->buf_list);
if (async_data->buf->stop_copy_chunk_num) {
migf->num_ready_chunks++;
if (next_required_umem_size &&
migf->num_ready_chunks >= MAX_NUM_CHUNKS) {
/* Delay the next SAVE till one chunk be consumed */
migf->next_required_umem_size = next_required_umem_size;
next_required_umem_size = 0;
}
}
spin_unlock_irqrestore(&migf->list_lock, flags); spin_unlock_irqrestore(&migf->list_lock, flags);
if (initial_pre_copy) if (initial_pre_copy) {
migf->pre_copy_initial_bytes += image_size; migf->pre_copy_initial_bytes += image_size;
migf->state = async_data->last_chunk ? migf->state = MLX5_MIGF_STATE_PRE_COPY;
MLX5_MIGF_STATE_COMPLETE : MLX5_MIGF_STATE_PRE_COPY; }
if (stop_copy_last_chunk)
migf->state = MLX5_MIGF_STATE_COMPLETE;
wake_up_interruptible(&migf->poll_wait); wake_up_interruptible(&migf->poll_wait);
if (next_required_umem_size)
mlx5vf_mig_file_set_save_work(migf,
/* Picking up the next chunk num */
(async_data->buf->stop_copy_chunk_num % MAX_NUM_CHUNKS) + 1,
next_required_umem_size);
mlx5vf_save_callback_complete(migf, async_data);
return;
} }
err: err:
/* /* The error flow can't run from an interrupt context */
* The error and the cleanup flows can't run from an
* interrupt context
*/
if (status == -EREMOTEIO) if (status == -EREMOTEIO)
status = MLX5_GET(save_vhca_state_out, async_data->out, status); status = MLX5_GET(save_vhca_state_out, async_data->out, status);
async_data->status = status; async_data->status = status;
...@@ -610,7 +651,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, ...@@ -610,7 +651,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
async_data = &migf->async_data; async_data = &migf->async_data;
async_data->buf = buf; async_data->buf = buf;
async_data->last_chunk = !track; async_data->stop_copy_chunk = !track;
async_data->out = kvzalloc(out_size, GFP_KERNEL); async_data->out = kvzalloc(out_size, GFP_KERNEL);
if (!async_data->out) { if (!async_data->out) {
err = -ENOMEM; err = -ENOMEM;
...@@ -618,10 +659,15 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, ...@@ -618,10 +659,15 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
} }
if (MLX5VF_PRE_COPY_SUPP(mvdev)) { if (MLX5VF_PRE_COPY_SUPP(mvdev)) {
if (async_data->last_chunk && migf->buf_header) { if (async_data->stop_copy_chunk) {
header_buf = migf->buf_header; u8 header_idx = buf->stop_copy_chunk_num ?
migf->buf_header = NULL; buf->stop_copy_chunk_num - 1 : 0;
} else {
header_buf = migf->buf_header[header_idx];
migf->buf_header[header_idx] = NULL;
}
if (!header_buf) {
header_buf = mlx5vf_get_data_buffer(migf, header_buf = mlx5vf_get_data_buffer(migf,
sizeof(struct mlx5_vf_migration_header), DMA_NONE); sizeof(struct mlx5_vf_migration_header), DMA_NONE);
if (IS_ERR(header_buf)) { if (IS_ERR(header_buf)) {
...@@ -631,8 +677,8 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, ...@@ -631,8 +677,8 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
} }
} }
if (async_data->last_chunk) if (async_data->stop_copy_chunk)
migf->state = MLX5_MIGF_STATE_SAVE_LAST; migf->state = MLX5_MIGF_STATE_SAVE_STOP_COPY_CHUNK;
async_data->header_buf = header_buf; async_data->header_buf = header_buf;
get_file(migf->filp); get_file(migf->filp);
...@@ -707,18 +753,21 @@ void mlx5vf_cmd_dealloc_pd(struct mlx5_vf_migration_file *migf) ...@@ -707,18 +753,21 @@ void mlx5vf_cmd_dealloc_pd(struct mlx5_vf_migration_file *migf)
void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf) void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf)
{ {
struct mlx5_vhca_data_buffer *entry; struct mlx5_vhca_data_buffer *entry;
int i;
lockdep_assert_held(&migf->mvdev->state_mutex); lockdep_assert_held(&migf->mvdev->state_mutex);
WARN_ON(migf->mvdev->mdev_detach); WARN_ON(migf->mvdev->mdev_detach);
if (migf->buf) { for (i = 0; i < MAX_NUM_CHUNKS; i++) {
mlx5vf_free_data_buffer(migf->buf); if (migf->buf[i]) {
migf->buf = NULL; mlx5vf_free_data_buffer(migf->buf[i]);
} migf->buf[i] = NULL;
}
if (migf->buf_header) { if (migf->buf_header[i]) {
mlx5vf_free_data_buffer(migf->buf_header); mlx5vf_free_data_buffer(migf->buf_header[i]);
migf->buf_header = NULL; migf->buf_header[i] = NULL;
}
} }
list_splice(&migf->avail_list, &migf->buf_list); list_splice(&migf->avail_list, &migf->buf_list);
......
...@@ -20,7 +20,7 @@ enum mlx5_vf_migf_state { ...@@ -20,7 +20,7 @@ enum mlx5_vf_migf_state {
MLX5_MIGF_STATE_ERROR = 1, MLX5_MIGF_STATE_ERROR = 1,
MLX5_MIGF_STATE_PRE_COPY_ERROR, MLX5_MIGF_STATE_PRE_COPY_ERROR,
MLX5_MIGF_STATE_PRE_COPY, MLX5_MIGF_STATE_PRE_COPY,
MLX5_MIGF_STATE_SAVE_LAST, MLX5_MIGF_STATE_SAVE_STOP_COPY_CHUNK,
MLX5_MIGF_STATE_COMPLETE, MLX5_MIGF_STATE_COMPLETE,
}; };
...@@ -64,6 +64,7 @@ struct mlx5_vhca_data_buffer { ...@@ -64,6 +64,7 @@ struct mlx5_vhca_data_buffer {
u32 mkey; u32 mkey;
enum dma_data_direction dma_dir; enum dma_data_direction dma_dir;
u8 dmaed:1; u8 dmaed:1;
u8 stop_copy_chunk_num;
struct list_head buf_elm; struct list_head buf_elm;
struct mlx5_vf_migration_file *migf; struct mlx5_vf_migration_file *migf;
/* Optimize mlx5vf_get_migration_page() for sequential access */ /* Optimize mlx5vf_get_migration_page() for sequential access */
...@@ -78,10 +79,19 @@ struct mlx5vf_async_data { ...@@ -78,10 +79,19 @@ struct mlx5vf_async_data {
struct mlx5_vhca_data_buffer *buf; struct mlx5_vhca_data_buffer *buf;
struct mlx5_vhca_data_buffer *header_buf; struct mlx5_vhca_data_buffer *header_buf;
int status; int status;
u8 last_chunk:1; u8 stop_copy_chunk:1;
void *out; void *out;
}; };
struct mlx5vf_save_work_data {
struct mlx5_vf_migration_file *migf;
size_t next_required_umem_size;
struct work_struct work;
u8 chunk_num;
};
#define MAX_NUM_CHUNKS 2
struct mlx5_vf_migration_file { struct mlx5_vf_migration_file {
struct file *filp; struct file *filp;
struct mutex lock; struct mutex lock;
...@@ -94,8 +104,12 @@ struct mlx5_vf_migration_file { ...@@ -94,8 +104,12 @@ struct mlx5_vf_migration_file {
u32 record_tag; u32 record_tag;
u64 stop_copy_prep_size; u64 stop_copy_prep_size;
u64 pre_copy_initial_bytes; u64 pre_copy_initial_bytes;
struct mlx5_vhca_data_buffer *buf; size_t next_required_umem_size;
struct mlx5_vhca_data_buffer *buf_header; u8 num_ready_chunks;
/* Upon chunk mode preserve another set of buffers for stop_copy phase */
struct mlx5_vhca_data_buffer *buf[MAX_NUM_CHUNKS];
struct mlx5_vhca_data_buffer *buf_header[MAX_NUM_CHUNKS];
struct mlx5vf_save_work_data save_data[MAX_NUM_CHUNKS];
spinlock_t list_lock; spinlock_t list_lock;
struct list_head buf_list; struct list_head buf_list;
struct list_head avail_list; struct list_head avail_list;
...@@ -164,6 +178,7 @@ struct mlx5vf_pci_core_device { ...@@ -164,6 +178,7 @@ struct mlx5vf_pci_core_device {
u8 deferred_reset:1; u8 deferred_reset:1;
u8 mdev_detach:1; u8 mdev_detach:1;
u8 log_active:1; u8 log_active:1;
u8 chunk_mode:1;
struct completion tracker_comp; struct completion tracker_comp;
/* protect migration state */ /* protect migration state */
struct mutex state_mutex; struct mutex state_mutex;
...@@ -186,7 +201,8 @@ enum { ...@@ -186,7 +201,8 @@ enum {
int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod); int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod);
int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod); int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod);
int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
size_t *state_size, u8 query_flags); size_t *state_size, u64 *total_size,
u8 query_flags);
void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
const struct vfio_migration_ops *mig_ops, const struct vfio_migration_ops *mig_ops,
const struct vfio_log_ops *log_ops); const struct vfio_log_ops *log_ops);
...@@ -217,6 +233,8 @@ struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf, ...@@ -217,6 +233,8 @@ struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev); void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev);
void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev); void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev);
void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work); void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work);
void mlx5vf_mig_file_set_save_work(struct mlx5_vf_migration_file *migf,
u8 chunk_num, size_t next_required_umem_size);
int mlx5vf_start_page_tracker(struct vfio_device *vdev, int mlx5vf_start_page_tracker(struct vfio_device *vdev,
struct rb_root_cached *ranges, u32 nnodes, u64 *page_size); struct rb_root_cached *ranges, u32 nnodes, u64 *page_size);
int mlx5vf_stop_page_tracker(struct vfio_device *vdev); int mlx5vf_stop_page_tracker(struct vfio_device *vdev);
......
This diff is collapsed.
...@@ -946,6 +946,11 @@ void vfio_combine_iova_ranges(struct rb_root_cached *root, u32 cur_nodes, ...@@ -946,6 +946,11 @@ void vfio_combine_iova_ranges(struct rb_root_cached *root, u32 cur_nodes,
unsigned long last; unsigned long last;
comb_start = interval_tree_iter_first(root, 0, ULONG_MAX); comb_start = interval_tree_iter_first(root, 0, ULONG_MAX);
/* Empty list */
if (WARN_ON_ONCE(!comb_start))
return;
curr = comb_start; curr = comb_start;
while (curr) { while (curr) {
last = curr->last; last = curr->last;
...@@ -975,6 +980,11 @@ void vfio_combine_iova_ranges(struct rb_root_cached *root, u32 cur_nodes, ...@@ -975,6 +980,11 @@ void vfio_combine_iova_ranges(struct rb_root_cached *root, u32 cur_nodes,
prev = curr; prev = curr;
curr = interval_tree_iter_next(curr, 0, ULONG_MAX); curr = interval_tree_iter_next(curr, 0, ULONG_MAX);
} }
/* Empty list or no nodes to combine */
if (WARN_ON_ONCE(min_gap == ULONG_MAX))
break;
comb_start->last = comb_end->last; comb_start->last = comb_end->last;
interval_tree_remove(comb_end, root); interval_tree_remove(comb_end, root);
cur_nodes--; cur_nodes--;
......
...@@ -21,11 +21,13 @@ ...@@ -21,11 +21,13 @@
struct cdx_controller; struct cdx_controller;
enum { enum {
CDX_DEV_BUS_MASTER_CONF,
CDX_DEV_RESET_CONF, CDX_DEV_RESET_CONF,
}; };
struct cdx_device_config { struct cdx_device_config {
u8 type; u8 type;
bool bus_master_enable;
}; };
typedef int (*cdx_scan_cb)(struct cdx_controller *cdx); typedef int (*cdx_scan_cb)(struct cdx_controller *cdx);
...@@ -170,4 +172,20 @@ extern struct bus_type cdx_bus_type; ...@@ -170,4 +172,20 @@ extern struct bus_type cdx_bus_type;
*/ */
int cdx_dev_reset(struct device *dev); int cdx_dev_reset(struct device *dev);
/**
* cdx_set_master - enables bus-mastering for CDX device
* @cdx_dev: the CDX device to enable
*
* Return: 0 for success, -errno on failure
*/
int cdx_set_master(struct cdx_device *cdx_dev);
/**
* cdx_clear_master - disables bus-mastering for CDX device
* @cdx_dev: the CDX device to disable
*
* Return: 0 for success, -errno on failure
*/
int cdx_clear_master(struct cdx_device *cdx_dev);
#endif /* _CDX_BUS_H_ */ #endif /* _CDX_BUS_H_ */
...@@ -277,8 +277,8 @@ struct vfio_region_info { ...@@ -277,8 +277,8 @@ struct vfio_region_info {
#define VFIO_REGION_INFO_FLAG_CAPS (1 << 3) /* Info supports caps */ #define VFIO_REGION_INFO_FLAG_CAPS (1 << 3) /* Info supports caps */
__u32 index; /* Region index */ __u32 index; /* Region index */
__u32 cap_offset; /* Offset within info struct of first cap */ __u32 cap_offset; /* Offset within info struct of first cap */
__u64 size; /* Region size (bytes) */ __aligned_u64 size; /* Region size (bytes) */
__u64 offset; /* Region offset from start of device fd */ __aligned_u64 offset; /* Region offset from start of device fd */
}; };
#define VFIO_DEVICE_GET_REGION_INFO _IO(VFIO_TYPE, VFIO_BASE + 8) #define VFIO_DEVICE_GET_REGION_INFO _IO(VFIO_TYPE, VFIO_BASE + 8)
...@@ -294,8 +294,8 @@ struct vfio_region_info { ...@@ -294,8 +294,8 @@ struct vfio_region_info {
#define VFIO_REGION_INFO_CAP_SPARSE_MMAP 1 #define VFIO_REGION_INFO_CAP_SPARSE_MMAP 1
struct vfio_region_sparse_mmap_area { struct vfio_region_sparse_mmap_area {
__u64 offset; /* Offset of mmap'able area within region */ __aligned_u64 offset; /* Offset of mmap'able area within region */
__u64 size; /* Size of mmap'able area */ __aligned_u64 size; /* Size of mmap'able area */
}; };
struct vfio_region_info_cap_sparse_mmap { struct vfio_region_info_cap_sparse_mmap {
...@@ -450,9 +450,9 @@ struct vfio_device_migration_info { ...@@ -450,9 +450,9 @@ struct vfio_device_migration_info {
VFIO_DEVICE_STATE_V1_RESUMING) VFIO_DEVICE_STATE_V1_RESUMING)
__u32 reserved; __u32 reserved;
__u64 pending_bytes; __aligned_u64 pending_bytes;
__u64 data_offset; __aligned_u64 data_offset;
__u64 data_size; __aligned_u64 data_size;
}; };
/* /*
...@@ -476,7 +476,7 @@ struct vfio_device_migration_info { ...@@ -476,7 +476,7 @@ struct vfio_device_migration_info {
struct vfio_region_info_cap_nvlink2_ssatgt { struct vfio_region_info_cap_nvlink2_ssatgt {
struct vfio_info_cap_header header; struct vfio_info_cap_header header;
__u64 tgt; __aligned_u64 tgt;
}; };
/* /*
...@@ -816,7 +816,7 @@ struct vfio_device_gfx_plane_info { ...@@ -816,7 +816,7 @@ struct vfio_device_gfx_plane_info {
__u32 drm_plane_type; /* type of plane: DRM_PLANE_TYPE_* */ __u32 drm_plane_type; /* type of plane: DRM_PLANE_TYPE_* */
/* out */ /* out */
__u32 drm_format; /* drm format of plane */ __u32 drm_format; /* drm format of plane */
__u64 drm_format_mod; /* tiled mode */ __aligned_u64 drm_format_mod; /* tiled mode */
__u32 width; /* width of plane */ __u32 width; /* width of plane */
__u32 height; /* height of plane */ __u32 height; /* height of plane */
__u32 stride; /* stride of plane */ __u32 stride; /* stride of plane */
...@@ -829,6 +829,7 @@ struct vfio_device_gfx_plane_info { ...@@ -829,6 +829,7 @@ struct vfio_device_gfx_plane_info {
__u32 region_index; /* region index */ __u32 region_index; /* region index */
__u32 dmabuf_id; /* dma-buf id */ __u32 dmabuf_id; /* dma-buf id */
}; };
__u32 reserved;
}; };
#define VFIO_DEVICE_QUERY_GFX_PLANE _IO(VFIO_TYPE, VFIO_BASE + 14) #define VFIO_DEVICE_QUERY_GFX_PLANE _IO(VFIO_TYPE, VFIO_BASE + 14)
...@@ -863,9 +864,10 @@ struct vfio_device_ioeventfd { ...@@ -863,9 +864,10 @@ struct vfio_device_ioeventfd {
#define VFIO_DEVICE_IOEVENTFD_32 (1 << 2) /* 4-byte write */ #define VFIO_DEVICE_IOEVENTFD_32 (1 << 2) /* 4-byte write */
#define VFIO_DEVICE_IOEVENTFD_64 (1 << 3) /* 8-byte write */ #define VFIO_DEVICE_IOEVENTFD_64 (1 << 3) /* 8-byte write */
#define VFIO_DEVICE_IOEVENTFD_SIZE_MASK (0xf) #define VFIO_DEVICE_IOEVENTFD_SIZE_MASK (0xf)
__u64 offset; /* device fd offset of write */ __aligned_u64 offset; /* device fd offset of write */
__u64 data; /* data to be written */ __aligned_u64 data; /* data to be written */
__s32 fd; /* -1 for de-assignment */ __s32 fd; /* -1 for de-assignment */
__u32 reserved;
}; };
#define VFIO_DEVICE_IOEVENTFD _IO(VFIO_TYPE, VFIO_BASE + 16) #define VFIO_DEVICE_IOEVENTFD _IO(VFIO_TYPE, VFIO_BASE + 16)
...@@ -1434,6 +1436,27 @@ struct vfio_device_feature_mig_data_size { ...@@ -1434,6 +1436,27 @@ struct vfio_device_feature_mig_data_size {
#define VFIO_DEVICE_FEATURE_MIG_DATA_SIZE 9 #define VFIO_DEVICE_FEATURE_MIG_DATA_SIZE 9
/**
* Upon VFIO_DEVICE_FEATURE_SET, set or clear the BUS mastering for the device
* based on the operation specified in op flag.
*
* The functionality is incorporated for devices that needs bus master control,
* but the in-band device interface lacks the support. Consequently, it is not
* applicable to PCI devices, as bus master control for PCI devices is managed
* in-band through the configuration space. At present, this feature is supported
* only for CDX devices.
* When the device's BUS MASTER setting is configured as CLEAR, it will result in
* blocking all incoming DMA requests from the device. On the other hand, configuring
* the device's BUS MASTER setting as SET (enable) will grant the device the
* capability to perform DMA to the host memory.
*/
struct vfio_device_feature_bus_master {
__u32 op;
#define VFIO_DEVICE_FEATURE_CLEAR_MASTER 0 /* Clear Bus Master */
#define VFIO_DEVICE_FEATURE_SET_MASTER 1 /* Set Bus Master */
};
#define VFIO_DEVICE_FEATURE_BUS_MASTER 10
/* -------- API for Type1 VFIO IOMMU -------- */ /* -------- API for Type1 VFIO IOMMU -------- */
/** /**
...@@ -1449,7 +1472,7 @@ struct vfio_iommu_type1_info { ...@@ -1449,7 +1472,7 @@ struct vfio_iommu_type1_info {
__u32 flags; __u32 flags;
#define VFIO_IOMMU_INFO_PGSIZES (1 << 0) /* supported page sizes info */ #define VFIO_IOMMU_INFO_PGSIZES (1 << 0) /* supported page sizes info */
#define VFIO_IOMMU_INFO_CAPS (1 << 1) /* Info supports caps */ #define VFIO_IOMMU_INFO_CAPS (1 << 1) /* Info supports caps */
__u64 iova_pgsizes; /* Bitmap of supported page sizes */ __aligned_u64 iova_pgsizes; /* Bitmap of supported page sizes */
__u32 cap_offset; /* Offset within info struct of first cap */ __u32 cap_offset; /* Offset within info struct of first cap */
__u32 pad; __u32 pad;
}; };
......
...@@ -1262,7 +1262,7 @@ static long mbochs_ioctl(struct vfio_device *vdev, unsigned int cmd, ...@@ -1262,7 +1262,7 @@ static long mbochs_ioctl(struct vfio_device *vdev, unsigned int cmd,
case VFIO_DEVICE_QUERY_GFX_PLANE: case VFIO_DEVICE_QUERY_GFX_PLANE:
{ {
struct vfio_device_gfx_plane_info plane; struct vfio_device_gfx_plane_info plane = {};
minsz = offsetofend(struct vfio_device_gfx_plane_info, minsz = offsetofend(struct vfio_device_gfx_plane_info,
region_index); region_index);
......
...@@ -591,7 +591,7 @@ static long mdpy_ioctl(struct vfio_device *vdev, unsigned int cmd, ...@@ -591,7 +591,7 @@ static long mdpy_ioctl(struct vfio_device *vdev, unsigned int cmd,
case VFIO_DEVICE_QUERY_GFX_PLANE: case VFIO_DEVICE_QUERY_GFX_PLANE:
{ {
struct vfio_device_gfx_plane_info plane; struct vfio_device_gfx_plane_info plane = {};
minsz = offsetofend(struct vfio_device_gfx_plane_info, minsz = offsetofend(struct vfio_device_gfx_plane_info,
region_index); region_index);
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment