Commit 34e2f271 authored by Yishai Hadas's avatar Yishai Hadas Committed by Alex Williamson

vfio/mlx5: Introduce multiple loads

In order to support PRE_COPY, mlx5 driver transfers multiple states
(images) of the device. e.g.: the source VF can save and transfer
multiple states, and the target VF will load them by that order.

This patch implements the changes for the target VF to decompose the
header for each state and to write and load multiple states.
Reviewed-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Signed-off-by: default avatarYishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/20221206083438.37807-13-yishaih@nvidia.comSigned-off-by: default avatarAlex Williamson <alex.williamson@redhat.com>
parent 81156c27
...@@ -598,9 +598,11 @@ int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev, ...@@ -598,9 +598,11 @@ int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
if (mvdev->mdev_detach) if (mvdev->mdev_detach)
return -ENOTCONN; return -ENOTCONN;
if (!buf->dmaed) {
err = mlx5vf_dma_data_buffer(buf); err = mlx5vf_dma_data_buffer(buf);
if (err) if (err)
return err; return err;
}
MLX5_SET(load_vhca_state_in, in, opcode, MLX5_SET(load_vhca_state_in, in, opcode,
MLX5_CMD_OP_LOAD_VHCA_STATE); MLX5_CMD_OP_LOAD_VHCA_STATE);
...@@ -644,6 +646,11 @@ void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf) ...@@ -644,6 +646,11 @@ void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf)
migf->buf = NULL; migf->buf = NULL;
} }
if (migf->buf_header) {
mlx5vf_free_data_buffer(migf->buf_header);
migf->buf_header = NULL;
}
list_splice(&migf->avail_list, &migf->buf_list); list_splice(&migf->avail_list, &migf->buf_list);
while ((entry = list_first_entry_or_null(&migf->buf_list, while ((entry = list_first_entry_or_null(&migf->buf_list,
......
...@@ -22,6 +22,14 @@ enum mlx5_vf_migf_state { ...@@ -22,6 +22,14 @@ enum mlx5_vf_migf_state {
MLX5_MIGF_STATE_COMPLETE, MLX5_MIGF_STATE_COMPLETE,
}; };
enum mlx5_vf_load_state {
MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER,
MLX5_VF_LOAD_STATE_READ_HEADER,
MLX5_VF_LOAD_STATE_PREP_IMAGE,
MLX5_VF_LOAD_STATE_READ_IMAGE,
MLX5_VF_LOAD_STATE_LOAD_IMAGE,
};
struct mlx5_vf_migration_header { struct mlx5_vf_migration_header {
__le64 image_size; __le64 image_size;
/* For future use in case we may need to change the kernel protocol */ /* For future use in case we may need to change the kernel protocol */
...@@ -60,9 +68,11 @@ struct mlx5_vf_migration_file { ...@@ -60,9 +68,11 @@ struct mlx5_vf_migration_file {
struct mutex lock; struct mutex lock;
enum mlx5_vf_migf_state state; enum mlx5_vf_migf_state state;
enum mlx5_vf_load_state load_state;
u32 pdn; u32 pdn;
loff_t max_pos; loff_t max_pos;
struct mlx5_vhca_data_buffer *buf; struct mlx5_vhca_data_buffer *buf;
struct mlx5_vhca_data_buffer *buf_header;
spinlock_t list_lock; spinlock_t list_lock;
struct list_head buf_list; struct list_head buf_list;
struct list_head avail_list; struct list_head avail_list;
......
...@@ -518,13 +518,162 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev, bool track) ...@@ -518,13 +518,162 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev, bool track)
return ERR_PTR(ret); return ERR_PTR(ret);
} }
static int
mlx5vf_append_page_to_mig_buf(struct mlx5_vhca_data_buffer *vhca_buf,
const char __user **buf, size_t *len,
loff_t *pos, ssize_t *done)
{
unsigned long offset;
size_t page_offset;
struct page *page;
size_t page_len;
u8 *to_buff;
int ret;
offset = *pos - vhca_buf->start_pos;
page_offset = offset % PAGE_SIZE;
page = mlx5vf_get_migration_page(vhca_buf, offset - page_offset);
if (!page)
return -EINVAL;
page_len = min_t(size_t, *len, PAGE_SIZE - page_offset);
to_buff = kmap_local_page(page);
ret = copy_from_user(to_buff + page_offset, *buf, page_len);
kunmap_local(to_buff);
if (ret)
return -EFAULT;
*pos += page_len;
*done += page_len;
*buf += page_len;
*len -= page_len;
vhca_buf->length += page_len;
return 0;
}
static int
mlx5vf_resume_read_image_no_header(struct mlx5_vhca_data_buffer *vhca_buf,
loff_t requested_length,
const char __user **buf, size_t *len,
loff_t *pos, ssize_t *done)
{
int ret;
if (requested_length > MAX_MIGRATION_SIZE)
return -ENOMEM;
if (vhca_buf->allocated_length < requested_length) {
ret = mlx5vf_add_migration_pages(
vhca_buf,
DIV_ROUND_UP(requested_length - vhca_buf->allocated_length,
PAGE_SIZE));
if (ret)
return ret;
}
while (*len) {
ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, len, pos,
done);
if (ret)
return ret;
}
return 0;
}
static ssize_t
mlx5vf_resume_read_image(struct mlx5_vf_migration_file *migf,
struct mlx5_vhca_data_buffer *vhca_buf,
size_t image_size, const char __user **buf,
size_t *len, loff_t *pos, ssize_t *done,
bool *has_work)
{
size_t copy_len, to_copy;
int ret;
to_copy = min_t(size_t, *len, image_size - vhca_buf->length);
copy_len = to_copy;
while (to_copy) {
ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, &to_copy, pos,
done);
if (ret)
return ret;
}
*len -= copy_len;
if (vhca_buf->length == image_size) {
migf->load_state = MLX5_VF_LOAD_STATE_LOAD_IMAGE;
migf->max_pos += image_size;
*has_work = true;
}
return 0;
}
static int
mlx5vf_resume_read_header(struct mlx5_vf_migration_file *migf,
struct mlx5_vhca_data_buffer *vhca_buf,
const char __user **buf,
size_t *len, loff_t *pos,
ssize_t *done, bool *has_work)
{
struct page *page;
size_t copy_len;
u8 *to_buff;
int ret;
copy_len = min_t(size_t, *len,
sizeof(struct mlx5_vf_migration_header) - vhca_buf->length);
page = mlx5vf_get_migration_page(vhca_buf, 0);
if (!page)
return -EINVAL;
to_buff = kmap_local_page(page);
ret = copy_from_user(to_buff + vhca_buf->length, *buf, copy_len);
if (ret) {
ret = -EFAULT;
goto end;
}
*buf += copy_len;
*pos += copy_len;
*done += copy_len;
*len -= copy_len;
vhca_buf->length += copy_len;
if (vhca_buf->length == sizeof(struct mlx5_vf_migration_header)) {
u64 flags;
vhca_buf->header_image_size = le64_to_cpup((__le64 *)to_buff);
if (vhca_buf->header_image_size > MAX_MIGRATION_SIZE) {
ret = -ENOMEM;
goto end;
}
flags = le64_to_cpup((__le64 *)(to_buff +
offsetof(struct mlx5_vf_migration_header, flags)));
if (flags) {
ret = -EOPNOTSUPP;
goto end;
}
migf->load_state = MLX5_VF_LOAD_STATE_PREP_IMAGE;
migf->max_pos += vhca_buf->length;
*has_work = true;
}
end:
kunmap_local(to_buff);
return ret;
}
static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf, static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
size_t len, loff_t *pos) size_t len, loff_t *pos)
{ {
struct mlx5_vf_migration_file *migf = filp->private_data; struct mlx5_vf_migration_file *migf = filp->private_data;
struct mlx5_vhca_data_buffer *vhca_buf = migf->buf; struct mlx5_vhca_data_buffer *vhca_buf = migf->buf;
struct mlx5_vhca_data_buffer *vhca_buf_header = migf->buf_header;
loff_t requested_length; loff_t requested_length;
bool has_work = false;
ssize_t done = 0; ssize_t done = 0;
int ret = 0;
if (pos) if (pos)
return -ESPIPE; return -ESPIPE;
...@@ -534,56 +683,83 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf, ...@@ -534,56 +683,83 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
check_add_overflow((loff_t)len, *pos, &requested_length)) check_add_overflow((loff_t)len, *pos, &requested_length))
return -EINVAL; return -EINVAL;
if (requested_length > MAX_MIGRATION_SIZE) mutex_lock(&migf->mvdev->state_mutex);
return -ENOMEM;
mutex_lock(&migf->lock); mutex_lock(&migf->lock);
if (migf->state == MLX5_MIGF_STATE_ERROR) { if (migf->state == MLX5_MIGF_STATE_ERROR) {
done = -ENODEV; ret = -ENODEV;
goto out_unlock; goto out_unlock;
} }
if (vhca_buf->allocated_length < requested_length) { while (len || has_work) {
done = mlx5vf_add_migration_pages( has_work = false;
vhca_buf, switch (migf->load_state) {
DIV_ROUND_UP(requested_length - vhca_buf->allocated_length, case MLX5_VF_LOAD_STATE_READ_HEADER:
PAGE_SIZE)); ret = mlx5vf_resume_read_header(migf, vhca_buf_header,
if (done) &buf, &len, pos,
&done, &has_work);
if (ret)
goto out_unlock;
break;
case MLX5_VF_LOAD_STATE_PREP_IMAGE:
{
u64 size = vhca_buf_header->header_image_size;
if (vhca_buf->allocated_length < size) {
mlx5vf_free_data_buffer(vhca_buf);
migf->buf = mlx5vf_alloc_data_buffer(migf,
size, DMA_TO_DEVICE);
if (IS_ERR(migf->buf)) {
ret = PTR_ERR(migf->buf);
migf->buf = NULL;
goto out_unlock; goto out_unlock;
} }
while (len) { vhca_buf = migf->buf;
size_t page_offset;
struct page *page;
size_t page_len;
u8 *to_buff;
int ret;
page_offset = (*pos) % PAGE_SIZE;
page = mlx5vf_get_migration_page(vhca_buf, *pos - page_offset);
if (!page) {
if (done == 0)
done = -EINVAL;
goto out_unlock;
} }
page_len = min_t(size_t, len, PAGE_SIZE - page_offset); vhca_buf->start_pos = migf->max_pos;
to_buff = kmap_local_page(page); migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE;
ret = copy_from_user(to_buff + page_offset, buf, page_len); break;
kunmap_local(to_buff); }
if (ret) { case MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER:
done = -EFAULT; ret = mlx5vf_resume_read_image_no_header(vhca_buf,
requested_length,
&buf, &len, pos, &done);
if (ret)
goto out_unlock; goto out_unlock;
break;
case MLX5_VF_LOAD_STATE_READ_IMAGE:
ret = mlx5vf_resume_read_image(migf, vhca_buf,
vhca_buf_header->header_image_size,
&buf, &len, pos, &done, &has_work);
if (ret)
goto out_unlock;
break;
case MLX5_VF_LOAD_STATE_LOAD_IMAGE:
ret = mlx5vf_cmd_load_vhca_state(migf->mvdev, migf, vhca_buf);
if (ret)
goto out_unlock;
migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER;
/* prep header buf for next image */
vhca_buf_header->length = 0;
vhca_buf_header->header_image_size = 0;
/* prep data buf for next image */
vhca_buf->length = 0;
break;
default:
break;
} }
*pos += page_len;
len -= page_len;
done += page_len;
buf += page_len;
vhca_buf->length += page_len;
} }
out_unlock: out_unlock:
if (ret)
migf->state = MLX5_MIGF_STATE_ERROR;
mutex_unlock(&migf->lock); mutex_unlock(&migf->lock);
return done; mlx5vf_state_mutex_unlock(migf->mvdev);
return ret ? ret : done;
} }
static const struct file_operations mlx5vf_resume_fops = { static const struct file_operations mlx5vf_resume_fops = {
...@@ -623,12 +799,29 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev) ...@@ -623,12 +799,29 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
} }
migf->buf = buf; migf->buf = buf;
if (MLX5VF_PRE_COPY_SUPP(mvdev)) {
buf = mlx5vf_alloc_data_buffer(migf,
sizeof(struct mlx5_vf_migration_header), DMA_NONE);
if (IS_ERR(buf)) {
ret = PTR_ERR(buf);
goto out_buf;
}
migf->buf_header = buf;
migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER;
} else {
/* Initial state will be to read the image */
migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER;
}
stream_open(migf->filp->f_inode, migf->filp); stream_open(migf->filp->f_inode, migf->filp);
mutex_init(&migf->lock); mutex_init(&migf->lock);
INIT_LIST_HEAD(&migf->buf_list); INIT_LIST_HEAD(&migf->buf_list);
INIT_LIST_HEAD(&migf->avail_list); INIT_LIST_HEAD(&migf->avail_list);
spin_lock_init(&migf->list_lock); spin_lock_init(&migf->list_lock);
return migf; return migf;
out_buf:
mlx5vf_free_data_buffer(buf);
out_pd: out_pd:
mlx5vf_cmd_dealloc_pd(migf); mlx5vf_cmd_dealloc_pd(migf);
out_free: out_free:
...@@ -728,11 +921,13 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, ...@@ -728,11 +921,13 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
} }
if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) { if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
if (!MLX5VF_PRE_COPY_SUPP(mvdev)) {
ret = mlx5vf_cmd_load_vhca_state(mvdev, ret = mlx5vf_cmd_load_vhca_state(mvdev,
mvdev->resuming_migf, mvdev->resuming_migf,
mvdev->resuming_migf->buf); mvdev->resuming_migf->buf);
if (ret) if (ret)
return ERR_PTR(ret); return ERR_PTR(ret);
}
mlx5vf_disable_fds(mvdev); mlx5vf_disable_fds(mvdev);
return NULL; return NULL;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment