Commit 67135f29 authored by Yishai Hadas's avatar Yishai Hadas Committed by Alex Williamson

vfio/mlx5: Add support for SAVING in chunk mode

Add support for SAVING in chunk mode, it includes running a work
that will fill the next chunk from the device.

In case the number of available chunks will reach the MAX_NUM_CHUNKS,
the next chunk SAVING will be delayed till the reader will consume one
chunk.

The next patch from the series will add the reader part of the chunk
mode.
Signed-off-by: default avatarYishai Hadas <yishaih@nvidia.com>
Reviewed-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20230911093856.81910-8-yishaih@nvidia.comSigned-off-by: default avatarAlex Williamson <alex.williamson@redhat.com>
parent 5798e4dd
......@@ -435,6 +435,7 @@ mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf,
void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf)
{
spin_lock_irq(&buf->migf->list_lock);
buf->stop_copy_chunk_num = 0;
list_add_tail(&buf->buf_elm, &buf->migf->avail_list);
spin_unlock_irq(&buf->migf->list_lock);
}
......@@ -551,6 +552,8 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context)
struct mlx5_vf_migration_file, async_data);
if (!status) {
size_t next_required_umem_size = 0;
bool stop_copy_last_chunk;
size_t image_size;
unsigned long flags;
bool initial_pre_copy = migf->state != MLX5_MIGF_STATE_PRE_COPY &&
......@@ -558,6 +561,11 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context)
image_size = MLX5_GET(save_vhca_state_out, async_data->out,
actual_image_size);
if (async_data->buf->stop_copy_chunk_num)
next_required_umem_size = MLX5_GET(save_vhca_state_out,
async_data->out, next_required_umem_size);
stop_copy_last_chunk = async_data->stop_copy_chunk &&
!next_required_umem_size;
if (async_data->header_buf) {
status = add_buf_header(async_data->header_buf, image_size,
initial_pre_copy);
......@@ -569,12 +577,28 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context)
migf->max_pos += async_data->buf->length;
spin_lock_irqsave(&migf->list_lock, flags);
list_add_tail(&async_data->buf->buf_elm, &migf->buf_list);
if (async_data->buf->stop_copy_chunk_num) {
migf->num_ready_chunks++;
if (next_required_umem_size &&
migf->num_ready_chunks >= MAX_NUM_CHUNKS) {
/* Delay the next SAVE till one chunk be consumed */
migf->next_required_umem_size = next_required_umem_size;
next_required_umem_size = 0;
}
}
spin_unlock_irqrestore(&migf->list_lock, flags);
if (initial_pre_copy)
if (initial_pre_copy) {
migf->pre_copy_initial_bytes += image_size;
migf->state = async_data->stop_copy_chunk ?
MLX5_MIGF_STATE_COMPLETE : MLX5_MIGF_STATE_PRE_COPY;
migf->state = MLX5_MIGF_STATE_PRE_COPY;
}
if (stop_copy_last_chunk)
migf->state = MLX5_MIGF_STATE_COMPLETE;
wake_up_interruptible(&migf->poll_wait);
if (next_required_umem_size)
mlx5vf_mig_file_set_save_work(migf,
/* Picking up the next chunk num */
(async_data->buf->stop_copy_chunk_num % MAX_NUM_CHUNKS) + 1,
next_required_umem_size);
mlx5vf_save_callback_complete(migf, async_data);
return;
}
......@@ -632,10 +656,15 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
}
if (MLX5VF_PRE_COPY_SUPP(mvdev)) {
if (async_data->stop_copy_chunk && migf->buf_header[0]) {
header_buf = migf->buf_header[0];
migf->buf_header[0] = NULL;
} else {
if (async_data->stop_copy_chunk) {
u8 header_idx = buf->stop_copy_chunk_num ?
buf->stop_copy_chunk_num - 1 : 0;
header_buf = migf->buf_header[header_idx];
migf->buf_header[header_idx] = NULL;
}
if (!header_buf) {
header_buf = mlx5vf_get_data_buffer(migf,
sizeof(struct mlx5_vf_migration_header), DMA_NONE);
if (IS_ERR(header_buf)) {
......
......@@ -83,6 +83,13 @@ struct mlx5vf_async_data {
void *out;
};
struct mlx5vf_save_work_data {
struct mlx5_vf_migration_file *migf;
size_t next_required_umem_size;
struct work_struct work;
u8 chunk_num;
};
#define MAX_NUM_CHUNKS 2
struct mlx5_vf_migration_file {
......@@ -97,9 +104,12 @@ struct mlx5_vf_migration_file {
u32 record_tag;
u64 stop_copy_prep_size;
u64 pre_copy_initial_bytes;
size_t next_required_umem_size;
u8 num_ready_chunks;
/* Upon chunk mode preserve another set of buffers for stop_copy phase */
struct mlx5_vhca_data_buffer *buf[MAX_NUM_CHUNKS];
struct mlx5_vhca_data_buffer *buf_header[MAX_NUM_CHUNKS];
struct mlx5vf_save_work_data save_data[MAX_NUM_CHUNKS];
spinlock_t list_lock;
struct list_head buf_list;
struct list_head avail_list;
......@@ -223,6 +233,8 @@ struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev);
void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev);
void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work);
void mlx5vf_mig_file_set_save_work(struct mlx5_vf_migration_file *migf,
u8 chunk_num, size_t next_required_umem_size);
int mlx5vf_start_page_tracker(struct vfio_device *vdev,
struct rb_root_cached *ranges, u32 nnodes, u64 *page_size);
int mlx5vf_stop_page_tracker(struct vfio_device *vdev);
......
......@@ -306,6 +306,73 @@ static void mlx5vf_mark_err(struct mlx5_vf_migration_file *migf)
wake_up_interruptible(&migf->poll_wait);
}
void mlx5vf_mig_file_set_save_work(struct mlx5_vf_migration_file *migf,
u8 chunk_num, size_t next_required_umem_size)
{
migf->save_data[chunk_num - 1].next_required_umem_size =
next_required_umem_size;
migf->save_data[chunk_num - 1].migf = migf;
get_file(migf->filp);
queue_work(migf->mvdev->cb_wq,
&migf->save_data[chunk_num - 1].work);
}
static struct mlx5_vhca_data_buffer *
mlx5vf_mig_file_get_stop_copy_buf(struct mlx5_vf_migration_file *migf,
u8 index, size_t required_length)
{
struct mlx5_vhca_data_buffer *buf = migf->buf[index];
u8 chunk_num;
WARN_ON(!buf);
chunk_num = buf->stop_copy_chunk_num;
buf->migf->buf[index] = NULL;
/* Checking whether the pre-allocated buffer can fit */
if (buf->allocated_length >= required_length)
return buf;
mlx5vf_put_data_buffer(buf);
buf = mlx5vf_get_data_buffer(buf->migf, required_length,
DMA_FROM_DEVICE);
if (IS_ERR(buf))
return buf;
buf->stop_copy_chunk_num = chunk_num;
return buf;
}
static void mlx5vf_mig_file_save_work(struct work_struct *_work)
{
struct mlx5vf_save_work_data *save_data = container_of(_work,
struct mlx5vf_save_work_data, work);
struct mlx5_vf_migration_file *migf = save_data->migf;
struct mlx5vf_pci_core_device *mvdev = migf->mvdev;
struct mlx5_vhca_data_buffer *buf;
mutex_lock(&mvdev->state_mutex);
if (migf->state == MLX5_MIGF_STATE_ERROR)
goto end;
buf = mlx5vf_mig_file_get_stop_copy_buf(migf,
save_data->chunk_num - 1,
save_data->next_required_umem_size);
if (IS_ERR(buf))
goto err;
if (mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, true, false))
goto err_save;
goto end;
err_save:
mlx5vf_put_data_buffer(buf);
err:
mlx5vf_mark_err(migf);
end:
mlx5vf_state_mutex_unlock(mvdev);
fput(migf->filp);
}
static int mlx5vf_add_stop_copy_header(struct mlx5_vf_migration_file *migf,
bool track)
{
......@@ -400,6 +467,9 @@ static int mlx5vf_prep_stop_copy(struct mlx5vf_pci_core_device *mvdev,
if (mvdev->chunk_mode) {
migf->buf[i]->stop_copy_chunk_num = i + 1;
migf->buf_header[i]->stop_copy_chunk_num = i + 1;
INIT_WORK(&migf->save_data[i].work,
mlx5vf_mig_file_save_work);
migf->save_data[i].chunk_num = i + 1;
}
}
......@@ -548,16 +618,10 @@ static int mlx5vf_pci_save_device_inc_data(struct mlx5vf_pci_core_device *mvdev)
if (ret)
goto err;
/* Checking whether we have a matching pre-allocated buffer that can fit */
if (migf->buf[0]->allocated_length >= length) {
buf = migf->buf[0];
migf->buf[0] = NULL;
} else {
buf = mlx5vf_get_data_buffer(migf, length, DMA_FROM_DEVICE);
if (IS_ERR(buf)) {
ret = PTR_ERR(buf);
goto err;
}
buf = mlx5vf_mig_file_get_stop_copy_buf(migf, 0, length);
if (IS_ERR(buf)) {
ret = PTR_ERR(buf);
goto err;
}
ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, true, false);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment