Commit 06be3029 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio fixes from Michael Tsirkin:
 "Some last minute fixes that took a while to get ready. Not
  regressions, but they look safe and seem to be worth to have"

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
  tools/virtio: handle fallout from folio work
  tools/virtio: fix virtio_test execution
  vhost: remove avail_event arg from vhost_update_avail_event()
  virtio: drop default for virtio-mem
  vdpa: fix use-after-free on vp_vdpa_remove
  virtio-blk: Remove BUG_ON() in virtio_queue_rq()
  virtio-blk: Don't use MAX_DISCARD_SEGMENTS if max_discard_seg is zero
  vhost: fix hung thread due to erroneous iotlb entries
  vduse: Fix returning wrong type in vduse_domain_alloc_iova()
  vdpa/mlx5: add validation for VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET command
  vdpa/mlx5: should verify CTRL_VQ feature exists for MQ
  vdpa: factor out vdpa_set_features_unlocked for vdpa internal use
  virtio_console: break out of buf poll on remove
  virtio: document virtio_reset_device
  virtio: acknowledge all features before access
  virtio: unexport virtio_finalize_features
parents aa6f8dcb 3dd7d135
...@@ -76,9 +76,6 @@ struct virtio_blk { ...@@ -76,9 +76,6 @@ struct virtio_blk {
*/ */
refcount_t refs; refcount_t refs;
/* What host tells us, plus 2 for header & tailer. */
unsigned int sg_elems;
/* Ida index - used to track minor number allocations. */ /* Ida index - used to track minor number allocations. */
int index; int index;
...@@ -322,8 +319,6 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -322,8 +319,6 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_status_t status; blk_status_t status;
int err; int err;
BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
status = virtblk_setup_cmd(vblk->vdev, req, vbr); status = virtblk_setup_cmd(vblk->vdev, req, vbr);
if (unlikely(status)) if (unlikely(status))
return status; return status;
...@@ -783,8 +778,6 @@ static int virtblk_probe(struct virtio_device *vdev) ...@@ -783,8 +778,6 @@ static int virtblk_probe(struct virtio_device *vdev)
/* Prevent integer overflows and honor max vq size */ /* Prevent integer overflows and honor max vq size */
sg_elems = min_t(u32, sg_elems, VIRTIO_BLK_MAX_SG_ELEMS - 2); sg_elems = min_t(u32, sg_elems, VIRTIO_BLK_MAX_SG_ELEMS - 2);
/* We need extra sg elements at head and tail. */
sg_elems += 2;
vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL); vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
if (!vblk) { if (!vblk) {
err = -ENOMEM; err = -ENOMEM;
...@@ -796,7 +789,6 @@ static int virtblk_probe(struct virtio_device *vdev) ...@@ -796,7 +789,6 @@ static int virtblk_probe(struct virtio_device *vdev)
mutex_init(&vblk->vdev_mutex); mutex_init(&vblk->vdev_mutex);
vblk->vdev = vdev; vblk->vdev = vdev;
vblk->sg_elems = sg_elems;
INIT_WORK(&vblk->config_work, virtblk_config_changed_work); INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
...@@ -853,7 +845,7 @@ static int virtblk_probe(struct virtio_device *vdev) ...@@ -853,7 +845,7 @@ static int virtblk_probe(struct virtio_device *vdev)
set_disk_ro(vblk->disk, 1); set_disk_ro(vblk->disk, 1);
/* We can handle whatever the host told us to handle. */ /* We can handle whatever the host told us to handle. */
blk_queue_max_segments(q, vblk->sg_elems-2); blk_queue_max_segments(q, sg_elems);
/* No real sector limit. */ /* No real sector limit. */
blk_queue_max_hw_sectors(q, -1U); blk_queue_max_hw_sectors(q, -1U);
...@@ -925,9 +917,15 @@ static int virtblk_probe(struct virtio_device *vdev) ...@@ -925,9 +917,15 @@ static int virtblk_probe(struct virtio_device *vdev)
virtio_cread(vdev, struct virtio_blk_config, max_discard_seg, virtio_cread(vdev, struct virtio_blk_config, max_discard_seg,
&v); &v);
/*
* max_discard_seg == 0 is out of spec but we always
* handled it.
*/
if (!v)
v = sg_elems;
blk_queue_max_discard_segments(q, blk_queue_max_discard_segments(q,
min_not_zero(v, min(v, MAX_DISCARD_SEGMENTS));
MAX_DISCARD_SEGMENTS));
blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
} }
......
...@@ -1957,6 +1957,13 @@ static void virtcons_remove(struct virtio_device *vdev) ...@@ -1957,6 +1957,13 @@ static void virtcons_remove(struct virtio_device *vdev)
list_del(&portdev->list); list_del(&portdev->list);
spin_unlock_irq(&pdrvdata_lock); spin_unlock_irq(&pdrvdata_lock);
/* Device is going away, exit any polling for buffers */
virtio_break_device(vdev);
if (use_multiport(portdev))
flush_work(&portdev->control_work);
else
flush_work(&portdev->config_work);
/* Disable interrupts for vqs */ /* Disable interrupts for vqs */
virtio_reset_device(vdev); virtio_reset_device(vdev);
/* Finish up work that's lined up */ /* Finish up work that's lined up */
......
...@@ -1563,11 +1563,27 @@ static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd) ...@@ -1563,11 +1563,27 @@ static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
switch (cmd) { switch (cmd) {
case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET: case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
/* This mq feature check aligns with pre-existing userspace
* implementation.
*
* Without it, an untrusted driver could fake a multiqueue config
* request down to a non-mq device that may cause kernel to
* panic due to uninitialized resources for extra vqs. Even with
* a well behaving guest driver, it is not expected to allow
* changing the number of vqs on a non-mq device.
*/
if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ))
break;
read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq)); read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
if (read != sizeof(mq)) if (read != sizeof(mq))
break; break;
newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs); newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
newqps > mlx5_vdpa_max_qps(mvdev->max_vqs))
break;
if (ndev->cur_num_vqs == 2 * newqps) { if (ndev->cur_num_vqs == 2 * newqps) {
status = VIRTIO_NET_OK; status = VIRTIO_NET_OK;
break; break;
...@@ -1897,11 +1913,25 @@ static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev) ...@@ -1897,11 +1913,25 @@ static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
return ndev->mvdev.mlx_features; return ndev->mvdev.mlx_features;
} }
static int verify_min_features(struct mlx5_vdpa_dev *mvdev, u64 features) static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features)
{ {
/* Minimum features to expect */
if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
return -EOPNOTSUPP; return -EOPNOTSUPP;
/* Double check features combination sent down by the driver.
* Fail invalid features due to absence of the depended feature.
*
* Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit
* requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ".
* By failing the invalid features sent down by untrusted drivers,
* we're assured the assumption made upon is_index_valid() and
* is_ctrl_vq_idx() will not be compromised.
*/
if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) ==
BIT_ULL(VIRTIO_NET_F_MQ))
return -EINVAL;
return 0; return 0;
} }
...@@ -1977,7 +2007,7 @@ static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features) ...@@ -1977,7 +2007,7 @@ static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
print_features(mvdev, features, true); print_features(mvdev, features, true);
err = verify_min_features(mvdev, features); err = verify_driver_features(mvdev, features);
if (err) if (err)
return err; return err;
......
...@@ -393,7 +393,7 @@ static void vdpa_get_config_unlocked(struct vdpa_device *vdev, ...@@ -393,7 +393,7 @@ static void vdpa_get_config_unlocked(struct vdpa_device *vdev,
* If it does happen we assume a legacy guest. * If it does happen we assume a legacy guest.
*/ */
if (!vdev->features_valid) if (!vdev->features_valid)
vdpa_set_features(vdev, 0, true); vdpa_set_features_unlocked(vdev, 0);
ops->get_config(vdev, offset, buf, len); ops->get_config(vdev, offset, buf, len);
} }
......
...@@ -294,7 +294,7 @@ vduse_domain_alloc_iova(struct iova_domain *iovad, ...@@ -294,7 +294,7 @@ vduse_domain_alloc_iova(struct iova_domain *iovad,
iova_pfn = alloc_iova_fast(iovad, iova_len, limit >> shift, true); iova_pfn = alloc_iova_fast(iovad, iova_len, limit >> shift, true);
return iova_pfn << shift; return (dma_addr_t)iova_pfn << shift;
} }
static void vduse_domain_free_iova(struct iova_domain *iovad, static void vduse_domain_free_iova(struct iova_domain *iovad,
......
...@@ -533,8 +533,8 @@ static void vp_vdpa_remove(struct pci_dev *pdev) ...@@ -533,8 +533,8 @@ static void vp_vdpa_remove(struct pci_dev *pdev)
{ {
struct vp_vdpa *vp_vdpa = pci_get_drvdata(pdev); struct vp_vdpa *vp_vdpa = pci_get_drvdata(pdev);
vdpa_unregister_device(&vp_vdpa->vdpa);
vp_modern_remove(&vp_vdpa->mdev); vp_modern_remove(&vp_vdpa->mdev);
vdpa_unregister_device(&vp_vdpa->vdpa);
} }
static struct pci_driver vp_vdpa_driver = { static struct pci_driver vp_vdpa_driver = {
......
...@@ -57,6 +57,17 @@ int vhost_iotlb_add_range_ctx(struct vhost_iotlb *iotlb, ...@@ -57,6 +57,17 @@ int vhost_iotlb_add_range_ctx(struct vhost_iotlb *iotlb,
if (last < start) if (last < start)
return -EFAULT; return -EFAULT;
/* If the range being mapped is [0, ULONG_MAX], split it into two entries
* otherwise its size would overflow u64.
*/
if (start == 0 && last == ULONG_MAX) {
u64 mid = last / 2;
vhost_iotlb_add_range_ctx(iotlb, start, mid, addr, perm, opaque);
addr += mid + 1;
start = mid + 1;
}
if (iotlb->limit && if (iotlb->limit &&
iotlb->nmaps == iotlb->limit && iotlb->nmaps == iotlb->limit &&
iotlb->flags & VHOST_IOTLB_FLAG_RETIRE) { iotlb->flags & VHOST_IOTLB_FLAG_RETIRE) {
......
...@@ -286,7 +286,7 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) ...@@ -286,7 +286,7 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
if (copy_from_user(&features, featurep, sizeof(features))) if (copy_from_user(&features, featurep, sizeof(features)))
return -EFAULT; return -EFAULT;
if (vdpa_set_features(vdpa, features, false)) if (vdpa_set_features(vdpa, features))
return -EINVAL; return -EINVAL;
return 0; return 0;
......
...@@ -1170,6 +1170,11 @@ ssize_t vhost_chr_write_iter(struct vhost_dev *dev, ...@@ -1170,6 +1170,11 @@ ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
goto done; goto done;
} }
if (msg.size == 0) {
ret = -EINVAL;
goto done;
}
if (dev->msg_handler) if (dev->msg_handler)
ret = dev->msg_handler(dev, &msg); ret = dev->msg_handler(dev, &msg);
else else
...@@ -1981,7 +1986,7 @@ static int vhost_update_used_flags(struct vhost_virtqueue *vq) ...@@ -1981,7 +1986,7 @@ static int vhost_update_used_flags(struct vhost_virtqueue *vq)
return 0; return 0;
} }
static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event) static int vhost_update_avail_event(struct vhost_virtqueue *vq)
{ {
if (vhost_put_avail_event(vq)) if (vhost_put_avail_event(vq))
return -EFAULT; return -EFAULT;
...@@ -2527,7 +2532,7 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) ...@@ -2527,7 +2532,7 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
return false; return false;
} }
} else { } else {
r = vhost_update_avail_event(vq, vq->avail_idx); r = vhost_update_avail_event(vq);
if (r) { if (r) {
vq_err(vq, "Failed to update avail event index at %p: %d\n", vq_err(vq, "Failed to update avail event index at %p: %d\n",
vhost_avail_event(vq), r); vhost_avail_event(vq), r);
......
...@@ -105,7 +105,6 @@ config VIRTIO_BALLOON ...@@ -105,7 +105,6 @@ config VIRTIO_BALLOON
config VIRTIO_MEM config VIRTIO_MEM
tristate "Virtio mem driver" tristate "Virtio mem driver"
default m
depends on X86_64 depends on X86_64
depends on VIRTIO depends on VIRTIO
depends on MEMORY_HOTPLUG depends on MEMORY_HOTPLUG
......
...@@ -166,14 +166,13 @@ void virtio_add_status(struct virtio_device *dev, unsigned int status) ...@@ -166,14 +166,13 @@ void virtio_add_status(struct virtio_device *dev, unsigned int status)
} }
EXPORT_SYMBOL_GPL(virtio_add_status); EXPORT_SYMBOL_GPL(virtio_add_status);
int virtio_finalize_features(struct virtio_device *dev) /* Do some validation, then set FEATURES_OK */
static int virtio_features_ok(struct virtio_device *dev)
{ {
int ret = dev->config->finalize_features(dev);
unsigned status; unsigned status;
int ret;
might_sleep(); might_sleep();
if (ret)
return ret;
ret = arch_has_restricted_virtio_memory_access(); ret = arch_has_restricted_virtio_memory_access();
if (ret) { if (ret) {
...@@ -202,8 +201,23 @@ int virtio_finalize_features(struct virtio_device *dev) ...@@ -202,8 +201,23 @@ int virtio_finalize_features(struct virtio_device *dev)
} }
return 0; return 0;
} }
EXPORT_SYMBOL_GPL(virtio_finalize_features);
/**
* virtio_reset_device - quiesce device for removal
* @dev: the device to reset
*
* Prevents device from sending interrupts and accessing memory.
*
* Generally used for cleanup during driver / device removal.
*
* Once this has been invoked, caller must ensure that
* virtqueue_notify / virtqueue_kick are not in progress.
*
* Note: this guarantees that vq callbacks are not in progress, however caller
* is responsible for preventing access from other contexts, such as a system
* call/workqueue/bh. Invoking virtio_break_device then flushing any such
* contexts is one way to handle that.
* */
void virtio_reset_device(struct virtio_device *dev) void virtio_reset_device(struct virtio_device *dev)
{ {
dev->config->reset(dev); dev->config->reset(dev);
...@@ -245,17 +259,6 @@ static int virtio_dev_probe(struct device *_d) ...@@ -245,17 +259,6 @@ static int virtio_dev_probe(struct device *_d)
driver_features_legacy = driver_features; driver_features_legacy = driver_features;
} }
/*
* Some devices detect legacy solely via F_VERSION_1. Write
* F_VERSION_1 to force LE config space accesses before FEATURES_OK for
* these when needed.
*/
if (drv->validate && !virtio_legacy_is_little_endian()
&& device_features & BIT_ULL(VIRTIO_F_VERSION_1)) {
dev->features = BIT_ULL(VIRTIO_F_VERSION_1);
dev->config->finalize_features(dev);
}
if (device_features & (1ULL << VIRTIO_F_VERSION_1)) if (device_features & (1ULL << VIRTIO_F_VERSION_1))
dev->features = driver_features & device_features; dev->features = driver_features & device_features;
else else
...@@ -266,13 +269,26 @@ static int virtio_dev_probe(struct device *_d) ...@@ -266,13 +269,26 @@ static int virtio_dev_probe(struct device *_d)
if (device_features & (1ULL << i)) if (device_features & (1ULL << i))
__virtio_set_bit(dev, i); __virtio_set_bit(dev, i);
err = dev->config->finalize_features(dev);
if (err)
goto err;
if (drv->validate) { if (drv->validate) {
u64 features = dev->features;
err = drv->validate(dev); err = drv->validate(dev);
if (err) if (err)
goto err; goto err;
/* Did validation change any features? Then write them again. */
if (features != dev->features) {
err = dev->config->finalize_features(dev);
if (err)
goto err;
}
} }
err = virtio_finalize_features(dev); err = virtio_features_ok(dev);
if (err) if (err)
goto err; goto err;
...@@ -496,7 +512,11 @@ int virtio_device_restore(struct virtio_device *dev) ...@@ -496,7 +512,11 @@ int virtio_device_restore(struct virtio_device *dev)
/* We have a driver! */ /* We have a driver! */
virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER); virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER);
ret = virtio_finalize_features(dev); ret = dev->config->finalize_features(dev);
if (ret)
goto err;
ret = virtio_features_ok(dev);
if (ret) if (ret)
goto err; goto err;
......
...@@ -317,7 +317,7 @@ static int virtio_vdpa_finalize_features(struct virtio_device *vdev) ...@@ -317,7 +317,7 @@ static int virtio_vdpa_finalize_features(struct virtio_device *vdev)
/* Give virtio_ring a chance to accept features. */ /* Give virtio_ring a chance to accept features. */
vring_transport_features(vdev); vring_transport_features(vdev);
return vdpa_set_features(vdpa, vdev->features, false); return vdpa_set_features(vdpa, vdev->features);
} }
static const char *virtio_vdpa_bus_name(struct virtio_device *vdev) static const char *virtio_vdpa_bus_name(struct virtio_device *vdev)
......
...@@ -401,18 +401,24 @@ static inline int vdpa_reset(struct vdpa_device *vdev) ...@@ -401,18 +401,24 @@ static inline int vdpa_reset(struct vdpa_device *vdev)
return ret; return ret;
} }
static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features, bool locked) static inline int vdpa_set_features_unlocked(struct vdpa_device *vdev, u64 features)
{ {
const struct vdpa_config_ops *ops = vdev->config; const struct vdpa_config_ops *ops = vdev->config;
int ret; int ret;
if (!locked)
mutex_lock(&vdev->cf_mutex);
vdev->features_valid = true; vdev->features_valid = true;
ret = ops->set_driver_features(vdev, features); ret = ops->set_driver_features(vdev, features);
if (!locked)
mutex_unlock(&vdev->cf_mutex); return ret;
}
static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features)
{
int ret;
mutex_lock(&vdev->cf_mutex);
ret = vdpa_set_features_unlocked(vdev, features);
mutex_unlock(&vdev->cf_mutex);
return ret; return ret;
} }
......
...@@ -133,7 +133,6 @@ bool is_virtio_device(struct device *dev); ...@@ -133,7 +133,6 @@ bool is_virtio_device(struct device *dev);
void virtio_break_device(struct virtio_device *dev); void virtio_break_device(struct virtio_device *dev);
void virtio_config_changed(struct virtio_device *dev); void virtio_config_changed(struct virtio_device *dev);
int virtio_finalize_features(struct virtio_device *dev);
#ifdef CONFIG_PM_SLEEP #ifdef CONFIG_PM_SLEEP
int virtio_device_freeze(struct virtio_device *dev); int virtio_device_freeze(struct virtio_device *dev);
int virtio_device_restore(struct virtio_device *dev); int virtio_device_restore(struct virtio_device *dev);
......
...@@ -64,8 +64,9 @@ struct virtio_shm_region { ...@@ -64,8 +64,9 @@ struct virtio_shm_region {
* Returns the first 64 feature bits (all we currently need). * Returns the first 64 feature bits (all we currently need).
* @finalize_features: confirm what device features we'll be using. * @finalize_features: confirm what device features we'll be using.
* vdev: the virtio_device * vdev: the virtio_device
* This gives the final feature bits for the device: it can change * This sends the driver feature bits to the device: it can change
* the dev->feature bits if it wants. * the dev->feature bits if it wants.
* Note: despite the name this can be called any number of times.
* Returns 0 on success or error status * Returns 0 on success or error status
* @bus_name: return the bus name associated with the device (optional) * @bus_name: return the bus name associated with the device (optional)
* vdev: the virtio_device * vdev: the virtio_device
......
struct folio {
struct page page;
};
...@@ -130,6 +130,7 @@ static void vdev_info_init(struct vdev_info* dev, unsigned long long features) ...@@ -130,6 +130,7 @@ static void vdev_info_init(struct vdev_info* dev, unsigned long long features)
memset(dev, 0, sizeof *dev); memset(dev, 0, sizeof *dev);
dev->vdev.features = features; dev->vdev.features = features;
INIT_LIST_HEAD(&dev->vdev.vqs); INIT_LIST_HEAD(&dev->vdev.vqs);
spin_lock_init(&dev->vdev.vqs_list_lock);
dev->buf_size = 1024; dev->buf_size = 1024;
dev->buf = malloc(dev->buf_size); dev->buf = malloc(dev->buf_size);
assert(dev->buf); assert(dev->buf);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment