Commit 79a463be authored by Xie Yongji's avatar Xie Yongji Committed by Michael S. Tsirkin

vduse: Support registering userspace memory for IOVA regions

Introduce two ioctls: VDUSE_IOTLB_REG_UMEM and
VDUSE_IOTLB_DEREG_UMEM to support registering
and de-registering userspace memory for IOVA
regions.

Now it only supports registering userspace memory
for bounce buffer region in virtio-vdpa case.
Signed-off-by: default avatarXie Yongji <xieyongji@bytedance.com>
Acked-by: default avatarJason Wang <jasowang@redhat.com>
Message-Id: <20220803045523.23851-5-xieyongji@bytedance.com>
Signed-off-by: default avatarMichael S. Tsirkin <mst@redhat.com>
parent 6c77ed22
...@@ -21,6 +21,8 @@ ...@@ -21,6 +21,8 @@
#include <linux/uio.h> #include <linux/uio.h>
#include <linux/vdpa.h> #include <linux/vdpa.h>
#include <linux/nospec.h> #include <linux/nospec.h>
#include <linux/vmalloc.h>
#include <linux/sched/mm.h>
#include <uapi/linux/vduse.h> #include <uapi/linux/vduse.h>
#include <uapi/linux/vdpa.h> #include <uapi/linux/vdpa.h>
#include <uapi/linux/virtio_config.h> #include <uapi/linux/virtio_config.h>
...@@ -64,6 +66,13 @@ struct vduse_vdpa { ...@@ -64,6 +66,13 @@ struct vduse_vdpa {
struct vduse_dev *dev; struct vduse_dev *dev;
}; };
struct vduse_umem {
unsigned long iova;
unsigned long npages;
struct page **pages;
struct mm_struct *mm;
};
struct vduse_dev { struct vduse_dev {
struct vduse_vdpa *vdev; struct vduse_vdpa *vdev;
struct device *dev; struct device *dev;
...@@ -95,6 +104,8 @@ struct vduse_dev { ...@@ -95,6 +104,8 @@ struct vduse_dev {
u8 status; u8 status;
u32 vq_num; u32 vq_num;
u32 vq_align; u32 vq_align;
struct vduse_umem *umem;
struct mutex mem_lock;
}; };
struct vduse_dev_msg { struct vduse_dev_msg {
...@@ -917,6 +928,102 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev, ...@@ -917,6 +928,102 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
return ret; return ret;
} }
static int vduse_dev_dereg_umem(struct vduse_dev *dev,
u64 iova, u64 size)
{
int ret;
mutex_lock(&dev->mem_lock);
ret = -ENOENT;
if (!dev->umem)
goto unlock;
ret = -EINVAL;
if (dev->umem->iova != iova || size != dev->domain->bounce_size)
goto unlock;
vduse_domain_remove_user_bounce_pages(dev->domain);
unpin_user_pages_dirty_lock(dev->umem->pages,
dev->umem->npages, true);
atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm);
mmdrop(dev->umem->mm);
vfree(dev->umem->pages);
kfree(dev->umem);
dev->umem = NULL;
ret = 0;
unlock:
mutex_unlock(&dev->mem_lock);
return ret;
}
static int vduse_dev_reg_umem(struct vduse_dev *dev,
u64 iova, u64 uaddr, u64 size)
{
struct page **page_list = NULL;
struct vduse_umem *umem = NULL;
long pinned = 0;
unsigned long npages, lock_limit;
int ret;
if (!dev->domain->bounce_map ||
size != dev->domain->bounce_size ||
iova != 0 || uaddr & ~PAGE_MASK)
return -EINVAL;
mutex_lock(&dev->mem_lock);
ret = -EEXIST;
if (dev->umem)
goto unlock;
ret = -ENOMEM;
npages = size >> PAGE_SHIFT;
page_list = __vmalloc(array_size(npages, sizeof(struct page *)),
GFP_KERNEL_ACCOUNT);
umem = kzalloc(sizeof(*umem), GFP_KERNEL);
if (!page_list || !umem)
goto unlock;
mmap_read_lock(current->mm);
lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
if (npages + atomic64_read(&current->mm->pinned_vm) > lock_limit)
goto out;
pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE,
page_list, NULL);
if (pinned != npages) {
ret = pinned < 0 ? pinned : -ENOMEM;
goto out;
}
ret = vduse_domain_add_user_bounce_pages(dev->domain,
page_list, pinned);
if (ret)
goto out;
atomic64_add(npages, &current->mm->pinned_vm);
umem->pages = page_list;
umem->npages = pinned;
umem->iova = iova;
umem->mm = current->mm;
mmgrab(current->mm);
dev->umem = umem;
out:
if (ret && pinned > 0)
unpin_user_pages(page_list, pinned);
mmap_read_unlock(current->mm);
unlock:
if (ret) {
vfree(page_list);
kfree(umem);
}
mutex_unlock(&dev->mem_lock);
return ret;
}
static long vduse_dev_ioctl(struct file *file, unsigned int cmd, static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
unsigned long arg) unsigned long arg)
{ {
...@@ -1089,6 +1196,38 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd, ...@@ -1089,6 +1196,38 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject); ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject);
break; break;
} }
case VDUSE_IOTLB_REG_UMEM: {
struct vduse_iova_umem umem;
ret = -EFAULT;
if (copy_from_user(&umem, argp, sizeof(umem)))
break;
ret = -EINVAL;
if (!is_mem_zero((const char *)umem.reserved,
sizeof(umem.reserved)))
break;
ret = vduse_dev_reg_umem(dev, umem.iova,
umem.uaddr, umem.size);
break;
}
case VDUSE_IOTLB_DEREG_UMEM: {
struct vduse_iova_umem umem;
ret = -EFAULT;
if (copy_from_user(&umem, argp, sizeof(umem)))
break;
ret = -EINVAL;
if (!is_mem_zero((const char *)umem.reserved,
sizeof(umem.reserved)))
break;
ret = vduse_dev_dereg_umem(dev, umem.iova,
umem.size);
break;
}
default: default:
ret = -ENOIOCTLCMD; ret = -ENOIOCTLCMD;
break; break;
...@@ -1101,6 +1240,7 @@ static int vduse_dev_release(struct inode *inode, struct file *file) ...@@ -1101,6 +1240,7 @@ static int vduse_dev_release(struct inode *inode, struct file *file)
{ {
struct vduse_dev *dev = file->private_data; struct vduse_dev *dev = file->private_data;
vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
spin_lock(&dev->msg_lock); spin_lock(&dev->msg_lock);
/* Make sure the inflight messages can processed after reconncection */ /* Make sure the inflight messages can processed after reconncection */
list_splice_init(&dev->recv_list, &dev->send_list); list_splice_init(&dev->recv_list, &dev->send_list);
...@@ -1163,6 +1303,7 @@ static struct vduse_dev *vduse_dev_create(void) ...@@ -1163,6 +1303,7 @@ static struct vduse_dev *vduse_dev_create(void)
return NULL; return NULL;
mutex_init(&dev->lock); mutex_init(&dev->lock);
mutex_init(&dev->mem_lock);
spin_lock_init(&dev->msg_lock); spin_lock_init(&dev->msg_lock);
INIT_LIST_HEAD(&dev->send_list); INIT_LIST_HEAD(&dev->send_list);
INIT_LIST_HEAD(&dev->recv_list); INIT_LIST_HEAD(&dev->recv_list);
......
...@@ -210,6 +210,29 @@ struct vduse_vq_eventfd { ...@@ -210,6 +210,29 @@ struct vduse_vq_eventfd {
*/ */
#define VDUSE_VQ_INJECT_IRQ _IOW(VDUSE_BASE, 0x17, __u32) #define VDUSE_VQ_INJECT_IRQ _IOW(VDUSE_BASE, 0x17, __u32)
/**
* struct vduse_iova_umem - userspace memory configuration for one IOVA region
* @uaddr: start address of userspace memory, it must be aligned to page size
* @iova: start of the IOVA region
* @size: size of the IOVA region
* @reserved: for future use, needs to be initialized to zero
*
* Structure used by VDUSE_IOTLB_REG_UMEM and VDUSE_IOTLB_DEREG_UMEM
* ioctls to register/de-register userspace memory for IOVA regions
*/
struct vduse_iova_umem {
__u64 uaddr;
__u64 iova;
__u64 size;
__u64 reserved[3];
};
/* Register userspace memory for IOVA regions */
#define VDUSE_IOTLB_REG_UMEM _IOW(VDUSE_BASE, 0x18, struct vduse_iova_umem)
/* De-register the userspace memory. Caller should set iova and size field. */
#define VDUSE_IOTLB_DEREG_UMEM _IOW(VDUSE_BASE, 0x19, struct vduse_iova_umem)
/* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */ /* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */
/** /**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment