Commit 1eb8df18 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio,vhost,vdpa updates from Michael Tsirkin:

 - Doorbell remapping for ifcvf, mlx5

 - virtio_vdpa support for mlx5

 - Validate device input in several drivers (for SEV and friends)

 - ZONE_MOVABLE aware handling in virtio-mem

 - Misc fixes, cleanups

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (48 commits)
  virtio-mem: prioritize unplug from ZONE_MOVABLE in Big Block Mode
  virtio-mem: simplify high-level unplug handling in Big Block Mode
  virtio-mem: prioritize unplug from ZONE_MOVABLE in Sub Block Mode
  virtio-mem: simplify high-level unplug handling in Sub Block Mode
  virtio-mem: simplify high-level plug handling in Sub Block Mode
  virtio-mem: use page_zonenum() in virtio_mem_fake_offline()
  virtio-mem: don't read big block size in Sub Block Mode
  virtio/vdpa: clear the virtqueue state during probe
  vp_vdpa: allow set vq state to initial state after reset
  virtio-pci library: introduce vp_modern_get_driver_features()
  vdpa: support packed virtqueue for set/get_vq_state()
  virtio-ring: store DMA metadata in desc_extra for split virtqueue
  virtio: use err label in __vring_new_virtqueue()
  virtio_ring: introduce virtqueue_desc_add_split()
  virtio_ring: secure handling of mapping errors
  virtio-ring: factor out desc_extra allocation
  virtio_ring: rename vring_desc_extra_packed
  virtio-ring: maintain next in extra state for packed virtqueue
  vdpa/mlx5: Clear vq ready indication upon device reset
  vdpa/mlx5: Add support for doorbell bypassing
  ...
parents d8dc121e db7b3377
...@@ -21,6 +21,9 @@ ...@@ -21,6 +21,9 @@
#define VQ_NAME_LEN 16 #define VQ_NAME_LEN 16
#define MAX_DISCARD_SEGMENTS 256u #define MAX_DISCARD_SEGMENTS 256u
/* The maximum number of sg elements that fit into a virtqueue */
#define VIRTIO_BLK_MAX_SG_ELEMS 32768
static int major; static int major;
static DEFINE_IDA(vd_index_ida); static DEFINE_IDA(vd_index_ida);
...@@ -447,13 +450,6 @@ static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize) ...@@ -447,13 +450,6 @@ static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize)
/* Host must always specify the capacity. */ /* Host must always specify the capacity. */
virtio_cread(vdev, struct virtio_blk_config, capacity, &capacity); virtio_cread(vdev, struct virtio_blk_config, capacity, &capacity);
/* If capacity is too big, truncate with warning. */
if ((sector_t)capacity != capacity) {
dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
(unsigned long long)capacity);
capacity = (sector_t)-1;
}
nblocks = DIV_ROUND_UP_ULL(capacity, queue_logical_block_size(q) >> 9); nblocks = DIV_ROUND_UP_ULL(capacity, queue_logical_block_size(q) >> 9);
string_get_size(nblocks, queue_logical_block_size(q), string_get_size(nblocks, queue_logical_block_size(q),
...@@ -728,7 +724,10 @@ static int virtblk_probe(struct virtio_device *vdev) ...@@ -728,7 +724,10 @@ static int virtblk_probe(struct virtio_device *vdev)
if (err || !sg_elems) if (err || !sg_elems)
sg_elems = 1; sg_elems = 1;
/* We need an extra sg elements at head and tail. */ /* Prevent integer overflows and honor max vq size */
sg_elems = min_t(u32, sg_elems, VIRTIO_BLK_MAX_SG_ELEMS - 2);
/* We need extra sg elements at head and tail. */
sg_elems += 2; sg_elems += 2;
vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL); vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
if (!vblk) { if (!vblk) {
...@@ -936,6 +935,8 @@ static int virtblk_freeze(struct virtio_device *vdev) ...@@ -936,6 +935,8 @@ static int virtblk_freeze(struct virtio_device *vdev)
blk_mq_quiesce_queue(vblk->disk->queue); blk_mq_quiesce_queue(vblk->disk->queue);
vdev->config->del_vqs(vdev); vdev->config->del_vqs(vdev);
kfree(vblk->vqs);
return 0; return 0;
} }
......
...@@ -475,7 +475,7 @@ static struct port_buffer *get_inbuf(struct port *port) ...@@ -475,7 +475,7 @@ static struct port_buffer *get_inbuf(struct port *port)
buf = virtqueue_get_buf(port->in_vq, &len); buf = virtqueue_get_buf(port->in_vq, &len);
if (buf) { if (buf) {
buf->len = len; buf->len = min_t(size_t, len, buf->size);
buf->offset = 0; buf->offset = 0;
port->stats.bytes_received += len; port->stats.bytes_received += len;
} }
...@@ -1709,7 +1709,7 @@ static void control_work_handler(struct work_struct *work) ...@@ -1709,7 +1709,7 @@ static void control_work_handler(struct work_struct *work)
while ((buf = virtqueue_get_buf(vq, &len))) { while ((buf = virtqueue_get_buf(vq, &len))) {
spin_unlock(&portdev->c_ivq_lock); spin_unlock(&portdev->c_ivq_lock);
buf->len = len; buf->len = min_t(size_t, len, buf->size);
buf->offset = 0; buf->offset = 0;
handle_control_message(vq->vdev, portdev, buf); handle_control_message(vq->vdev, portdev, buf);
......
...@@ -1516,12 +1516,16 @@ static void virtnet_poll_cleantx(struct receive_queue *rq) ...@@ -1516,12 +1516,16 @@ static void virtnet_poll_cleantx(struct receive_queue *rq)
return; return;
if (__netif_tx_trylock(txq)) { if (__netif_tx_trylock(txq)) {
free_old_xmit_skbs(sq, true); do {
virtqueue_disable_cb(sq->vq);
free_old_xmit_skbs(sq, true);
} while (unlikely(!virtqueue_enable_cb_delayed(sq->vq)));
if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
netif_tx_wake_queue(txq);
__netif_tx_unlock(txq); __netif_tx_unlock(txq);
} }
if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
netif_tx_wake_queue(txq);
} }
static int virtnet_poll(struct napi_struct *napi, int budget) static int virtnet_poll(struct napi_struct *napi, int budget)
...@@ -1592,6 +1596,8 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) ...@@ -1592,6 +1596,8 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
struct virtnet_info *vi = sq->vq->vdev->priv; struct virtnet_info *vi = sq->vq->vdev->priv;
unsigned int index = vq2txq(sq->vq); unsigned int index = vq2txq(sq->vq);
struct netdev_queue *txq; struct netdev_queue *txq;
int opaque;
bool done;
if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { if (unlikely(is_xdp_raw_buffer_queue(vi, index))) {
/* We don't need to enable cb for XDP */ /* We don't need to enable cb for XDP */
...@@ -1601,14 +1607,32 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) ...@@ -1601,14 +1607,32 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
txq = netdev_get_tx_queue(vi->dev, index); txq = netdev_get_tx_queue(vi->dev, index);
__netif_tx_lock(txq, raw_smp_processor_id()); __netif_tx_lock(txq, raw_smp_processor_id());
virtqueue_disable_cb(sq->vq);
free_old_xmit_skbs(sq, true); free_old_xmit_skbs(sq, true);
__netif_tx_unlock(txq);
virtqueue_napi_complete(napi, sq->vq, 0);
if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
netif_tx_wake_queue(txq); netif_tx_wake_queue(txq);
opaque = virtqueue_enable_cb_prepare(sq->vq);
done = napi_complete_done(napi, 0);
if (!done)
virtqueue_disable_cb(sq->vq);
__netif_tx_unlock(txq);
if (done) {
if (unlikely(virtqueue_poll(sq->vq, opaque))) {
if (napi_schedule_prep(napi)) {
__netif_tx_lock(txq, raw_smp_processor_id());
virtqueue_disable_cb(sq->vq);
__netif_tx_unlock(txq);
__napi_schedule(napi);
}
}
}
return 0; return 0;
} }
...@@ -1670,10 +1694,14 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -1670,10 +1694,14 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
bool use_napi = sq->napi.weight; bool use_napi = sq->napi.weight;
/* Free up any pending old buffers before queueing new ones. */ /* Free up any pending old buffers before queueing new ones. */
free_old_xmit_skbs(sq, false); do {
if (use_napi)
virtqueue_disable_cb(sq->vq);
free_old_xmit_skbs(sq, false);
if (use_napi && kick) } while (use_napi && kick &&
virtqueue_enable_cb_delayed(sq->vq); unlikely(!virtqueue_enable_cb_delayed(sq->vq)));
/* timestamp packet in software */ /* timestamp packet in software */
skb_tx_timestamp(skb); skb_tx_timestamp(skb);
...@@ -3310,8 +3338,11 @@ static __maybe_unused int virtnet_restore(struct virtio_device *vdev) ...@@ -3310,8 +3338,11 @@ static __maybe_unused int virtnet_restore(struct virtio_device *vdev)
virtnet_set_queues(vi, vi->curr_queue_pairs); virtnet_set_queues(vi, vi->curr_queue_pairs);
err = virtnet_cpu_notif_add(vi); err = virtnet_cpu_notif_add(vi);
if (err) if (err) {
virtnet_freeze_down(vdev);
remove_vq_common(vi);
return err; return err;
}
return 0; return 0;
} }
......
...@@ -133,6 +133,8 @@ int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *pdev) ...@@ -133,6 +133,8 @@ int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *pdev)
&hw->notify_off_multiplier); &hw->notify_off_multiplier);
hw->notify_bar = cap.bar; hw->notify_bar = cap.bar;
hw->notify_base = get_cap_addr(hw, &cap); hw->notify_base = get_cap_addr(hw, &cap);
hw->notify_base_pa = pci_resource_start(pdev, cap.bar) +
le32_to_cpu(cap.offset);
IFCVF_DBG(pdev, "hw->notify_base = %p\n", IFCVF_DBG(pdev, "hw->notify_base = %p\n",
hw->notify_base); hw->notify_base);
break; break;
...@@ -161,6 +163,8 @@ int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *pdev) ...@@ -161,6 +163,8 @@ int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *pdev)
notify_off = ifc_ioread16(&hw->common_cfg->queue_notify_off); notify_off = ifc_ioread16(&hw->common_cfg->queue_notify_off);
hw->vring[i].notify_addr = hw->notify_base + hw->vring[i].notify_addr = hw->notify_base +
notify_off * hw->notify_off_multiplier; notify_off * hw->notify_off_multiplier;
hw->vring[i].notify_pa = hw->notify_base_pa +
notify_off * hw->notify_off_multiplier;
} }
hw->lm_cfg = hw->base[IFCVF_LM_BAR]; hw->lm_cfg = hw->base[IFCVF_LM_BAR];
......
...@@ -19,21 +19,9 @@ ...@@ -19,21 +19,9 @@
#include <uapi/linux/virtio_config.h> #include <uapi/linux/virtio_config.h>
#include <uapi/linux/virtio_pci.h> #include <uapi/linux/virtio_pci.h>
#define N3000_VENDOR_ID 0x1AF4
#define N3000_DEVICE_ID 0x1041 #define N3000_DEVICE_ID 0x1041
#define N3000_SUBSYS_VENDOR_ID 0x8086
#define N3000_SUBSYS_DEVICE_ID 0x001A #define N3000_SUBSYS_DEVICE_ID 0x001A
#define C5000X_PL_VENDOR_ID 0x1AF4
#define C5000X_PL_DEVICE_ID 0x1000
#define C5000X_PL_SUBSYS_VENDOR_ID 0x8086
#define C5000X_PL_SUBSYS_DEVICE_ID 0x0001
#define C5000X_PL_BLK_VENDOR_ID 0x1AF4
#define C5000X_PL_BLK_DEVICE_ID 0x1001
#define C5000X_PL_BLK_SUBSYS_VENDOR_ID 0x8086
#define C5000X_PL_BLK_SUBSYS_DEVICE_ID 0x0002
#define IFCVF_NET_SUPPORTED_FEATURES \ #define IFCVF_NET_SUPPORTED_FEATURES \
((1ULL << VIRTIO_NET_F_MAC) | \ ((1ULL << VIRTIO_NET_F_MAC) | \
(1ULL << VIRTIO_F_ANY_LAYOUT) | \ (1ULL << VIRTIO_F_ANY_LAYOUT) | \
...@@ -73,6 +61,7 @@ struct vring_info { ...@@ -73,6 +61,7 @@ struct vring_info {
u16 last_avail_idx; u16 last_avail_idx;
bool ready; bool ready;
void __iomem *notify_addr; void __iomem *notify_addr;
phys_addr_t notify_pa;
u32 irq; u32 irq;
struct vdpa_callback cb; struct vdpa_callback cb;
char msix_name[256]; char msix_name[256];
...@@ -87,6 +76,7 @@ struct ifcvf_hw { ...@@ -87,6 +76,7 @@ struct ifcvf_hw {
u8 notify_bar; u8 notify_bar;
/* Notificaiton bar address */ /* Notificaiton bar address */
void __iomem *notify_base; void __iomem *notify_base;
phys_addr_t notify_base_pa;
u32 notify_off_multiplier; u32 notify_off_multiplier;
u64 req_features; u64 req_features;
u64 hw_features; u64 hw_features;
......
...@@ -264,7 +264,7 @@ static int ifcvf_vdpa_get_vq_state(struct vdpa_device *vdpa_dev, u16 qid, ...@@ -264,7 +264,7 @@ static int ifcvf_vdpa_get_vq_state(struct vdpa_device *vdpa_dev, u16 qid,
{ {
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
state->avail_index = ifcvf_get_vq_state(vf, qid); state->split.avail_index = ifcvf_get_vq_state(vf, qid);
return 0; return 0;
} }
...@@ -273,7 +273,7 @@ static int ifcvf_vdpa_set_vq_state(struct vdpa_device *vdpa_dev, u16 qid, ...@@ -273,7 +273,7 @@ static int ifcvf_vdpa_set_vq_state(struct vdpa_device *vdpa_dev, u16 qid,
{ {
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
return ifcvf_set_vq_state(vf, qid, state->avail_index); return ifcvf_set_vq_state(vf, qid, state->split.avail_index);
} }
static void ifcvf_vdpa_set_vq_cb(struct vdpa_device *vdpa_dev, u16 qid, static void ifcvf_vdpa_set_vq_cb(struct vdpa_device *vdpa_dev, u16 qid,
...@@ -413,6 +413,21 @@ static int ifcvf_vdpa_get_vq_irq(struct vdpa_device *vdpa_dev, ...@@ -413,6 +413,21 @@ static int ifcvf_vdpa_get_vq_irq(struct vdpa_device *vdpa_dev,
return vf->vring[qid].irq; return vf->vring[qid].irq;
} }
static struct vdpa_notification_area ifcvf_get_vq_notification(struct vdpa_device *vdpa_dev,
u16 idx)
{
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
struct vdpa_notification_area area;
area.addr = vf->vring[idx].notify_pa;
if (!vf->notify_off_multiplier)
area.size = PAGE_SIZE;
else
area.size = vf->notify_off_multiplier;
return area;
}
/* /*
* IFCVF currently does't have on-chip IOMMU, so not * IFCVF currently does't have on-chip IOMMU, so not
* implemented set_map()/dma_map()/dma_unmap() * implemented set_map()/dma_map()/dma_unmap()
...@@ -440,6 +455,7 @@ static const struct vdpa_config_ops ifc_vdpa_ops = { ...@@ -440,6 +455,7 @@ static const struct vdpa_config_ops ifc_vdpa_ops = {
.get_config = ifcvf_vdpa_get_config, .get_config = ifcvf_vdpa_get_config,
.set_config = ifcvf_vdpa_set_config, .set_config = ifcvf_vdpa_set_config,
.set_config_cb = ifcvf_vdpa_set_config_cb, .set_config_cb = ifcvf_vdpa_set_config_cb,
.get_vq_notification = ifcvf_get_vq_notification,
}; };
static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
...@@ -536,18 +552,21 @@ static void ifcvf_remove(struct pci_dev *pdev) ...@@ -536,18 +552,21 @@ static void ifcvf_remove(struct pci_dev *pdev)
} }
static struct pci_device_id ifcvf_pci_ids[] = { static struct pci_device_id ifcvf_pci_ids[] = {
{ PCI_DEVICE_SUB(N3000_VENDOR_ID, /* N3000 network device */
{ PCI_DEVICE_SUB(PCI_VENDOR_ID_REDHAT_QUMRANET,
N3000_DEVICE_ID, N3000_DEVICE_ID,
N3000_SUBSYS_VENDOR_ID, PCI_VENDOR_ID_INTEL,
N3000_SUBSYS_DEVICE_ID) }, N3000_SUBSYS_DEVICE_ID) },
{ PCI_DEVICE_SUB(C5000X_PL_VENDOR_ID, /* C5000X-PL network device */
C5000X_PL_DEVICE_ID, { PCI_DEVICE_SUB(PCI_VENDOR_ID_REDHAT_QUMRANET,
C5000X_PL_SUBSYS_VENDOR_ID, VIRTIO_TRANS_ID_NET,
C5000X_PL_SUBSYS_DEVICE_ID) }, PCI_VENDOR_ID_INTEL,
{ PCI_DEVICE_SUB(C5000X_PL_BLK_VENDOR_ID, VIRTIO_ID_NET) },
C5000X_PL_BLK_DEVICE_ID, /* C5000X-PL block device */
C5000X_PL_BLK_SUBSYS_VENDOR_ID, { PCI_DEVICE_SUB(PCI_VENDOR_ID_REDHAT_QUMRANET,
C5000X_PL_BLK_SUBSYS_DEVICE_ID) }, VIRTIO_TRANS_ID_BLOCK,
PCI_VENDOR_ID_INTEL,
VIRTIO_ID_BLOCK) },
{ 0 }, { 0 },
}; };
......
...@@ -35,12 +35,14 @@ struct mlx5_vdpa_mr { ...@@ -35,12 +35,14 @@ struct mlx5_vdpa_mr {
/* serialize mkey creation and destruction */ /* serialize mkey creation and destruction */
struct mutex mkey_mtx; struct mutex mkey_mtx;
bool user_mr;
}; };
struct mlx5_vdpa_resources { struct mlx5_vdpa_resources {
u32 pdn; u32 pdn;
struct mlx5_uars_page *uar; struct mlx5_uars_page *uar;
void __iomem *kick_addr; void __iomem *kick_addr;
u64 phys_kick_addr;
u16 uid; u16 uid;
u32 null_mkey; u32 null_mkey;
bool valid; bool valid;
......
...@@ -219,11 +219,6 @@ static void destroy_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_m ...@@ -219,11 +219,6 @@ static void destroy_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_m
mlx5_vdpa_destroy_mkey(mvdev, &mkey->mkey); mlx5_vdpa_destroy_mkey(mvdev, &mkey->mkey);
} }
static struct device *get_dma_device(struct mlx5_vdpa_dev *mvdev)
{
return &mvdev->mdev->pdev->dev;
}
static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr, static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr,
struct vhost_iotlb *iotlb) struct vhost_iotlb *iotlb)
{ {
...@@ -239,7 +234,7 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr ...@@ -239,7 +234,7 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr
u64 pa; u64 pa;
u64 paend; u64 paend;
struct scatterlist *sg; struct scatterlist *sg;
struct device *dma = get_dma_device(mvdev); struct device *dma = mvdev->vdev.dma_dev;
for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1); for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1);
map; map = vhost_iotlb_itree_next(map, start, mr->end - 1)) { map; map = vhost_iotlb_itree_next(map, start, mr->end - 1)) {
...@@ -298,7 +293,7 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr ...@@ -298,7 +293,7 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr
static void unmap_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) static void unmap_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
{ {
struct device *dma = get_dma_device(mvdev); struct device *dma = mvdev->vdev.dma_dev;
destroy_direct_mr(mvdev, mr); destroy_direct_mr(mvdev, mr);
dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
...@@ -360,7 +355,7 @@ static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, u64 start, u64 size, u8 ...@@ -360,7 +355,7 @@ static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, u64 start, u64 size, u8
* indirect memory key that provides access to the enitre address space given * indirect memory key that provides access to the enitre address space given
* by iotlb. * by iotlb.
*/ */
static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) static int create_user_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
{ {
struct mlx5_vdpa_mr *mr = &mvdev->mr; struct mlx5_vdpa_mr *mr = &mvdev->mr;
struct mlx5_vdpa_direct_mr *dmr; struct mlx5_vdpa_direct_mr *dmr;
...@@ -374,9 +369,6 @@ static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb ...@@ -374,9 +369,6 @@ static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb
int err = 0; int err = 0;
int nnuls; int nnuls;
if (mr->initialized)
return 0;
INIT_LIST_HEAD(&mr->head); INIT_LIST_HEAD(&mr->head);
for (map = vhost_iotlb_itree_first(iotlb, start, last); map; for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
map = vhost_iotlb_itree_next(map, start, last)) { map = vhost_iotlb_itree_next(map, start, last)) {
...@@ -414,7 +406,7 @@ static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb ...@@ -414,7 +406,7 @@ static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb
if (err) if (err)
goto err_chain; goto err_chain;
mr->initialized = true; mr->user_mr = true;
return 0; return 0;
err_chain: err_chain:
...@@ -426,33 +418,94 @@ static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb ...@@ -426,33 +418,94 @@ static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb
return err; return err;
} }
int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) static int create_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
{
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
void *mkc;
u32 *in;
int err;
in = kzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
MLX5_SET(mkc, mkc, length64, 1);
MLX5_SET(mkc, mkc, lw, 1);
MLX5_SET(mkc, mkc, lr, 1);
MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
MLX5_SET(mkc, mkc, qpn, 0xffffff);
err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen);
if (!err)
mr->user_mr = false;
kfree(in);
return err;
}
static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
{
mlx5_vdpa_destroy_mkey(mvdev, &mr->mkey);
}
static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
{ {
struct mlx5_vdpa_mr *mr = &mvdev->mr; struct mlx5_vdpa_mr *mr = &mvdev->mr;
int err; int err;
mutex_lock(&mr->mkey_mtx); if (mr->initialized)
return 0;
if (iotlb)
err = create_user_mr(mvdev, iotlb);
else
err = create_dma_mr(mvdev, mr);
if (!err)
mr->initialized = true;
return err;
}
int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
{
int err;
mutex_lock(&mvdev->mr.mkey_mtx);
err = _mlx5_vdpa_create_mr(mvdev, iotlb); err = _mlx5_vdpa_create_mr(mvdev, iotlb);
mutex_unlock(&mr->mkey_mtx); mutex_unlock(&mvdev->mr.mkey_mtx);
return err; return err;
} }
void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev) static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
{ {
struct mlx5_vdpa_mr *mr = &mvdev->mr;
struct mlx5_vdpa_direct_mr *dmr; struct mlx5_vdpa_direct_mr *dmr;
struct mlx5_vdpa_direct_mr *n; struct mlx5_vdpa_direct_mr *n;
mutex_lock(&mr->mkey_mtx);
if (!mr->initialized)
goto out;
destroy_indirect_key(mvdev, mr); destroy_indirect_key(mvdev, mr);
list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) { list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) {
list_del_init(&dmr->list); list_del_init(&dmr->list);
unmap_direct_mr(mvdev, dmr); unmap_direct_mr(mvdev, dmr);
kfree(dmr); kfree(dmr);
} }
}
void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
{
struct mlx5_vdpa_mr *mr = &mvdev->mr;
mutex_lock(&mr->mkey_mtx);
if (!mr->initialized)
goto out;
if (mr->user_mr)
destroy_user_mr(mvdev, mr);
else
destroy_dma_mr(mvdev, mr);
memset(mr, 0, sizeof(*mr)); memset(mr, 0, sizeof(*mr));
mr->initialized = false; mr->initialized = false;
out: out:
......
...@@ -54,6 +54,9 @@ static int create_uctx(struct mlx5_vdpa_dev *mvdev, u16 *uid) ...@@ -54,6 +54,9 @@ static int create_uctx(struct mlx5_vdpa_dev *mvdev, u16 *uid)
void *in; void *in;
int err; int err;
if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0))
return 0;
/* 0 means not supported */ /* 0 means not supported */
if (!MLX5_CAP_GEN(mvdev->mdev, log_max_uctx)) if (!MLX5_CAP_GEN(mvdev->mdev, log_max_uctx))
return -EOPNOTSUPP; return -EOPNOTSUPP;
...@@ -79,6 +82,9 @@ static void destroy_uctx(struct mlx5_vdpa_dev *mvdev, u32 uid) ...@@ -79,6 +82,9 @@ static void destroy_uctx(struct mlx5_vdpa_dev *mvdev, u32 uid)
u32 out[MLX5_ST_SZ_DW(destroy_uctx_out)] = {}; u32 out[MLX5_ST_SZ_DW(destroy_uctx_out)] = {};
u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {}; u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {};
if (!uid)
return;
MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX); MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX);
MLX5_SET(destroy_uctx_in, in, uid, uid); MLX5_SET(destroy_uctx_in, in, uid, uid);
...@@ -247,6 +253,7 @@ int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev) ...@@ -247,6 +253,7 @@ int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev)
goto err_key; goto err_key;
kick_addr = mdev->bar_addr + offset; kick_addr = mdev->bar_addr + offset;
res->phys_kick_addr = kick_addr;
res->kick_addr = ioremap(kick_addr, PAGE_SIZE); res->kick_addr = ioremap(kick_addr, PAGE_SIZE);
if (!res->kick_addr) { if (!res->kick_addr) {
......
...@@ -611,8 +611,8 @@ static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx) ...@@ -611,8 +611,8 @@ static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
mlx5_db_free(ndev->mvdev.mdev, &vcq->db); mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
} }
static int umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num, static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
struct mlx5_vdpa_umem **umemp) struct mlx5_vdpa_umem **umemp)
{ {
struct mlx5_core_dev *mdev = ndev->mvdev.mdev; struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
int p_a; int p_a;
...@@ -635,7 +635,7 @@ static int umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq ...@@ -635,7 +635,7 @@ static int umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq
*umemp = &mvq->umem3; *umemp = &mvq->umem3;
break; break;
} }
return p_a * mvq->num_ent + p_b; (*umemp)->size = p_a * mvq->num_ent + p_b;
} }
static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem) static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
...@@ -651,15 +651,10 @@ static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *m ...@@ -651,15 +651,10 @@ static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *m
void *in; void *in;
int err; int err;
__be64 *pas; __be64 *pas;
int size;
struct mlx5_vdpa_umem *umem; struct mlx5_vdpa_umem *umem;
size = umem_size(ndev, mvq, num, &umem); set_umem_size(ndev, mvq, num, &umem);
if (size < 0) err = umem_frag_buf_alloc(ndev, umem, umem->size);
return size;
umem->size = size;
err = umem_frag_buf_alloc(ndev, umem, size);
if (err) if (err)
return err; return err;
...@@ -829,9 +824,9 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque ...@@ -829,9 +824,9 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id); MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size); MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id); MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem1.size); MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id); MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem1.size); MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn); MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
if (MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, eth_frame_offload_type)) if (MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, eth_frame_offload_type))
MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 1); MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 1);
...@@ -1428,8 +1423,8 @@ static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx, ...@@ -1428,8 +1423,8 @@ static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
return -EINVAL; return -EINVAL;
} }
mvq->used_idx = state->avail_index; mvq->used_idx = state->split.avail_index;
mvq->avail_idx = state->avail_index; mvq->avail_idx = state->split.avail_index;
return 0; return 0;
} }
...@@ -1450,7 +1445,7 @@ static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa ...@@ -1450,7 +1445,7 @@ static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa
* Since both values should be identical, we take the value of * Since both values should be identical, we take the value of
* used_idx which is reported correctly. * used_idx which is reported correctly.
*/ */
state->avail_index = mvq->used_idx; state->split.avail_index = mvq->used_idx;
return 0; return 0;
} }
...@@ -1459,7 +1454,7 @@ static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa ...@@ -1459,7 +1454,7 @@ static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa
mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n"); mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
return err; return err;
} }
state->avail_index = attr.used_index; state->split.avail_index = attr.used_index;
return 0; return 0;
} }
...@@ -1772,6 +1767,14 @@ static void teardown_driver(struct mlx5_vdpa_net *ndev) ...@@ -1772,6 +1767,14 @@ static void teardown_driver(struct mlx5_vdpa_net *ndev)
mutex_unlock(&ndev->reslock); mutex_unlock(&ndev->reslock);
} }
static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
{
int i;
for (i = 0; i < ndev->mvdev.max_vqs; i++)
ndev->vqs[i].ready = false;
}
static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
{ {
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
...@@ -1782,10 +1785,15 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) ...@@ -1782,10 +1785,15 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
if (!status) { if (!status) {
mlx5_vdpa_info(mvdev, "performing device reset\n"); mlx5_vdpa_info(mvdev, "performing device reset\n");
teardown_driver(ndev); teardown_driver(ndev);
clear_vqs_ready(ndev);
mlx5_vdpa_destroy_mr(&ndev->mvdev); mlx5_vdpa_destroy_mr(&ndev->mvdev);
ndev->mvdev.status = 0; ndev->mvdev.status = 0;
ndev->mvdev.mlx_features = 0; ndev->mvdev.mlx_features = 0;
++mvdev->generation; ++mvdev->generation;
if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
if (mlx5_vdpa_create_mr(mvdev, NULL))
mlx5_vdpa_warn(mvdev, "create MR failed\n");
}
return; return;
} }
...@@ -1866,6 +1874,7 @@ static void mlx5_vdpa_free(struct vdpa_device *vdev) ...@@ -1866,6 +1874,7 @@ static void mlx5_vdpa_free(struct vdpa_device *vdev)
ndev = to_mlx5_vdpa_ndev(mvdev); ndev = to_mlx5_vdpa_ndev(mvdev);
free_resources(ndev); free_resources(ndev);
mlx5_vdpa_destroy_mr(mvdev);
if (!is_zero_ether_addr(ndev->config.mac)) { if (!is_zero_ether_addr(ndev->config.mac)) {
pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
mlx5_mpfs_del_mac(pfmdev, ndev->config.mac); mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
...@@ -1876,8 +1885,22 @@ static void mlx5_vdpa_free(struct vdpa_device *vdev) ...@@ -1876,8 +1885,22 @@ static void mlx5_vdpa_free(struct vdpa_device *vdev)
static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx) static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
{ {
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
struct vdpa_notification_area ret = {}; struct vdpa_notification_area ret = {};
struct mlx5_vdpa_net *ndev;
phys_addr_t addr;
/* If SF BAR size is smaller than PAGE_SIZE, do not use direct
* notification to avoid the risk of mapping pages that contain BAR of more
* than one SF
*/
if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
return ret;
ndev = to_mlx5_vdpa_ndev(mvdev);
addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
ret.addr = addr;
ret.size = PAGE_SIZE;
return ret; return ret;
} }
...@@ -2037,14 +2060,20 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) ...@@ -2037,14 +2060,20 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
goto err_mtu; goto err_mtu;
} }
mvdev->vdev.dma_dev = mdev->device; mvdev->vdev.dma_dev = &mdev->pdev->dev;
err = mlx5_vdpa_alloc_resources(&ndev->mvdev); err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
if (err) if (err)
goto err_mpfs; goto err_mpfs;
if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
err = mlx5_vdpa_create_mr(mvdev, NULL);
if (err)
goto err_res;
}
err = alloc_resources(ndev); err = alloc_resources(ndev);
if (err) if (err)
goto err_res; goto err_mr;
mvdev->vdev.mdev = &mgtdev->mgtdev; mvdev->vdev.mdev = &mgtdev->mgtdev;
err = _vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs)); err = _vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs));
...@@ -2056,6 +2085,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) ...@@ -2056,6 +2085,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
err_reg: err_reg:
free_resources(ndev); free_resources(ndev);
err_mr:
mlx5_vdpa_destroy_mr(mvdev);
err_res: err_res:
mlx5_vdpa_free_resources(&ndev->mvdev); mlx5_vdpa_free_resources(&ndev->mvdev);
err_mpfs: err_mpfs:
......
...@@ -374,7 +374,7 @@ static int vdpasim_set_vq_state(struct vdpa_device *vdpa, u16 idx, ...@@ -374,7 +374,7 @@ static int vdpasim_set_vq_state(struct vdpa_device *vdpa, u16 idx,
struct vringh *vrh = &vq->vring; struct vringh *vrh = &vq->vring;
spin_lock(&vdpasim->lock); spin_lock(&vdpasim->lock);
vrh->last_avail_idx = state->avail_index; vrh->last_avail_idx = state->split.avail_index;
spin_unlock(&vdpasim->lock); spin_unlock(&vdpasim->lock);
return 0; return 0;
...@@ -387,7 +387,7 @@ static int vdpasim_get_vq_state(struct vdpa_device *vdpa, u16 idx, ...@@ -387,7 +387,7 @@ static int vdpasim_get_vq_state(struct vdpa_device *vdpa, u16 idx,
struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
struct vringh *vrh = &vq->vring; struct vringh *vrh = &vq->vring;
state->avail_index = vrh->last_avail_idx; state->split.avail_index = vrh->last_avail_idx;
return 0; return 0;
} }
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/vringh.h> #include <linux/vringh.h>
#include <linux/vdpa.h> #include <linux/vdpa.h>
#include <linux/blkdev.h>
#include <uapi/linux/virtio_blk.h> #include <uapi/linux/virtio_blk.h>
#include "vdpa_sim.h" #include "vdpa_sim.h"
......
...@@ -210,13 +210,49 @@ static int vp_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 qid, ...@@ -210,13 +210,49 @@ static int vp_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 qid,
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
static int vp_vdpa_set_vq_state_split(struct vdpa_device *vdpa,
const struct vdpa_vq_state *state)
{
const struct vdpa_vq_state_split *split = &state->split;
if (split->avail_index == 0)
return 0;
return -EOPNOTSUPP;
}
static int vp_vdpa_set_vq_state_packed(struct vdpa_device *vdpa,
const struct vdpa_vq_state *state)
{
const struct vdpa_vq_state_packed *packed = &state->packed;
if (packed->last_avail_counter == 1 &&
packed->last_avail_idx == 0 &&
packed->last_used_counter == 1 &&
packed->last_used_idx == 0)
return 0;
return -EOPNOTSUPP;
}
static int vp_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 qid, static int vp_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 qid,
const struct vdpa_vq_state *state) const struct vdpa_vq_state *state)
{ {
/* Note that this is not supported by virtio specification, so struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
* we return -ENOPOTSUPP here. This means we can't support live
* migration, vhost device start/stop. /* Note that this is not supported by virtio specification.
* But if the state is by chance equal to the device initial
* state, we can let it go.
*/ */
if ((vp_modern_get_status(mdev) & VIRTIO_CONFIG_S_FEATURES_OK) &&
!vp_modern_get_queue_enable(mdev, qid)) {
if (vp_modern_get_driver_features(mdev) &
BIT_ULL(VIRTIO_F_RING_PACKED))
return vp_vdpa_set_vq_state_packed(vdpa, state);
else
return vp_vdpa_set_vq_state_split(vdpa, state);
}
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
...@@ -442,6 +478,7 @@ static int vp_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) ...@@ -442,6 +478,7 @@ static int vp_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id)
vp_modern_map_vq_notify(mdev, i, vp_modern_map_vq_notify(mdev, i,
&vp_vdpa->vring[i].notify_pa); &vp_vdpa->vring[i].notify_pa);
if (!vp_vdpa->vring[i].notify) { if (!vp_vdpa->vring[i].notify) {
ret = -EINVAL;
dev_warn(&pdev->dev, "Fail to map vq notify %d\n", i); dev_warn(&pdev->dev, "Fail to map vq notify %d\n", i);
goto err; goto err;
} }
......
...@@ -83,7 +83,7 @@ int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, ...@@ -83,7 +83,7 @@ int vhost_iotlb_add_range(struct vhost_iotlb *iotlb,
EXPORT_SYMBOL_GPL(vhost_iotlb_add_range); EXPORT_SYMBOL_GPL(vhost_iotlb_add_range);
/** /**
* vring_iotlb_del_range - delete overlapped ranges from vhost IOTLB * vhost_iotlb_del_range - delete overlapped ranges from vhost IOTLB
* @iotlb: the IOTLB * @iotlb: the IOTLB
* @start: start of the IOVA range * @start: start of the IOVA range
* @last: last of IOVA range * @last: last of IOVA range
......
...@@ -1430,11 +1430,6 @@ static void vhost_scsi_handle_kick(struct vhost_work *work) ...@@ -1430,11 +1430,6 @@ static void vhost_scsi_handle_kick(struct vhost_work *work)
vhost_scsi_handle_vq(vs, vq); vhost_scsi_handle_vq(vs, vq);
} }
static void vhost_scsi_flush_vq(struct vhost_scsi *vs, int index)
{
vhost_poll_flush(&vs->vqs[index].vq.poll);
}
/* Callers must hold dev mutex */ /* Callers must hold dev mutex */
static void vhost_scsi_flush(struct vhost_scsi *vs) static void vhost_scsi_flush(struct vhost_scsi *vs)
{ {
...@@ -1453,10 +1448,7 @@ static void vhost_scsi_flush(struct vhost_scsi *vs) ...@@ -1453,10 +1448,7 @@ static void vhost_scsi_flush(struct vhost_scsi *vs)
kref_put(&old_inflight[i]->kref, vhost_scsi_done_inflight); kref_put(&old_inflight[i]->kref, vhost_scsi_done_inflight);
/* Flush both the vhost poll and vhost work */ /* Flush both the vhost poll and vhost work */
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) vhost_work_dev_flush(&vs->dev);
vhost_scsi_flush_vq(vs, i);
vhost_work_flush(&vs->dev, &vs->vs_completion_work);
vhost_work_flush(&vs->dev, &vs->vs_event_work);
/* Wait for all reqs issued before the flush to be finished */ /* Wait for all reqs issued before the flush to be finished */
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
...@@ -1740,11 +1732,12 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs, ...@@ -1740,11 +1732,12 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs,
mutex_lock(&vq->mutex); mutex_lock(&vq->mutex);
vhost_vq_set_backend(vq, NULL); vhost_vq_set_backend(vq, NULL);
mutex_unlock(&vq->mutex); mutex_unlock(&vq->mutex);
/* }
* Make sure cmds are not running before tearing them /* Make sure cmds are not running before tearing them down. */
* down. vhost_scsi_flush(vs);
*/
vhost_scsi_flush(vs); for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
vq = &vs->vqs[i].vq;
vhost_scsi_destroy_vq_cmds(vq); vhost_scsi_destroy_vq_cmds(vq);
} }
} }
......
...@@ -383,7 +383,7 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, ...@@ -383,7 +383,7 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
if (r) if (r)
return r; return r;
vq->last_avail_idx = vq_state.avail_index; vq->last_avail_idx = vq_state.split.avail_index;
break; break;
} }
...@@ -401,7 +401,7 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, ...@@ -401,7 +401,7 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
break; break;
case VHOST_SET_VRING_BASE: case VHOST_SET_VRING_BASE:
vq_state.avail_index = vq->last_avail_idx; vq_state.split.avail_index = vq->last_avail_idx;
if (ops->set_vq_state(vdpa, idx, &vq_state)) if (ops->set_vq_state(vdpa, idx, &vq_state))
r = -EINVAL; r = -EINVAL;
break; break;
......
...@@ -231,7 +231,7 @@ void vhost_poll_stop(struct vhost_poll *poll) ...@@ -231,7 +231,7 @@ void vhost_poll_stop(struct vhost_poll *poll)
} }
EXPORT_SYMBOL_GPL(vhost_poll_stop); EXPORT_SYMBOL_GPL(vhost_poll_stop);
void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work) void vhost_work_dev_flush(struct vhost_dev *dev)
{ {
struct vhost_flush_struct flush; struct vhost_flush_struct flush;
...@@ -243,13 +243,13 @@ void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work) ...@@ -243,13 +243,13 @@ void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work)
wait_for_completion(&flush.wait_event); wait_for_completion(&flush.wait_event);
} }
} }
EXPORT_SYMBOL_GPL(vhost_work_flush); EXPORT_SYMBOL_GPL(vhost_work_dev_flush);
/* Flush any work that has been scheduled. When calling this, don't hold any /* Flush any work that has been scheduled. When calling this, don't hold any
* locks that are also used by the callback. */ * locks that are also used by the callback. */
void vhost_poll_flush(struct vhost_poll *poll) void vhost_poll_flush(struct vhost_poll *poll)
{ {
vhost_work_flush(poll->dev, &poll->work); vhost_work_dev_flush(poll->dev);
} }
EXPORT_SYMBOL_GPL(vhost_poll_flush); EXPORT_SYMBOL_GPL(vhost_poll_flush);
...@@ -538,7 +538,7 @@ static int vhost_attach_cgroups(struct vhost_dev *dev) ...@@ -538,7 +538,7 @@ static int vhost_attach_cgroups(struct vhost_dev *dev)
attach.owner = current; attach.owner = current;
vhost_work_init(&attach.work, vhost_attach_cgroups_work); vhost_work_init(&attach.work, vhost_attach_cgroups_work);
vhost_work_queue(dev, &attach.work); vhost_work_queue(dev, &attach.work);
vhost_work_flush(dev, &attach.work); vhost_work_dev_flush(dev);
return attach.ret; return attach.ret;
} }
......
...@@ -20,20 +20,20 @@ typedef void (*vhost_work_fn_t)(struct vhost_work *work); ...@@ -20,20 +20,20 @@ typedef void (*vhost_work_fn_t)(struct vhost_work *work);
#define VHOST_WORK_QUEUED 1 #define VHOST_WORK_QUEUED 1
struct vhost_work { struct vhost_work {
struct llist_node node; struct llist_node node;
vhost_work_fn_t fn; vhost_work_fn_t fn;
unsigned long flags; unsigned long flags;
}; };
/* Poll a file (eventfd or socket) */ /* Poll a file (eventfd or socket) */
/* Note: there's nothing vhost specific about this structure. */ /* Note: there's nothing vhost specific about this structure. */
struct vhost_poll { struct vhost_poll {
poll_table table; poll_table table;
wait_queue_head_t *wqh; wait_queue_head_t *wqh;
wait_queue_entry_t wait; wait_queue_entry_t wait;
struct vhost_work work; struct vhost_work work;
__poll_t mask; __poll_t mask;
struct vhost_dev *dev; struct vhost_dev *dev;
}; };
void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn); void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn);
...@@ -46,8 +46,7 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file); ...@@ -46,8 +46,7 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file);
void vhost_poll_stop(struct vhost_poll *poll); void vhost_poll_stop(struct vhost_poll *poll);
void vhost_poll_flush(struct vhost_poll *poll); void vhost_poll_flush(struct vhost_poll *poll);
void vhost_poll_queue(struct vhost_poll *poll); void vhost_poll_queue(struct vhost_poll *poll);
void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work); void vhost_work_dev_flush(struct vhost_dev *dev);
long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp);
struct vhost_log { struct vhost_log {
u64 addr; u64 addr;
......
...@@ -708,7 +708,7 @@ static void vhost_vsock_flush(struct vhost_vsock *vsock) ...@@ -708,7 +708,7 @@ static void vhost_vsock_flush(struct vhost_vsock *vsock)
for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++)
if (vsock->vqs[i].handle_kick) if (vsock->vqs[i].handle_kick)
vhost_poll_flush(&vsock->vqs[i].poll); vhost_poll_flush(&vsock->vqs[i].poll);
vhost_work_flush(&vsock->dev, &vsock->send_pkt_work); vhost_work_dev_flush(&vsock->dev);
} }
static void vhost_vsock_reset_orphans(struct sock *sk) static void vhost_vsock_reset_orphans(struct sock *sk)
......
This diff is collapsed.
...@@ -383,6 +383,27 @@ u64 vp_modern_get_features(struct virtio_pci_modern_device *mdev) ...@@ -383,6 +383,27 @@ u64 vp_modern_get_features(struct virtio_pci_modern_device *mdev)
} }
EXPORT_SYMBOL_GPL(vp_modern_get_features); EXPORT_SYMBOL_GPL(vp_modern_get_features);
/*
* vp_modern_get_driver_features - get driver features from device
* @mdev: the modern virtio-pci device
*
* Returns the driver features read from the device
*/
u64 vp_modern_get_driver_features(struct virtio_pci_modern_device *mdev)
{
struct virtio_pci_common_cfg __iomem *cfg = mdev->common;
u64 features;
vp_iowrite32(0, &cfg->guest_feature_select);
features = vp_ioread32(&cfg->guest_feature);
vp_iowrite32(1, &cfg->guest_feature_select);
features |= ((u64)vp_ioread32(&cfg->guest_feature) << 32);
return features;
}
EXPORT_SYMBOL_GPL(vp_modern_get_driver_features);
/* /*
* vp_modern_set_features - set features to device * vp_modern_set_features - set features to device
* @mdev: the modern virtio-pci device * @mdev: the modern virtio-pci device
......
This diff is collapsed.
...@@ -142,6 +142,8 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, ...@@ -142,6 +142,8 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
struct vdpa_callback cb; struct vdpa_callback cb;
struct virtqueue *vq; struct virtqueue *vq;
u64 desc_addr, driver_addr, device_addr; u64 desc_addr, driver_addr, device_addr;
/* Assume split virtqueue, switch to packed if necessary */
struct vdpa_vq_state state = {0};
unsigned long flags; unsigned long flags;
u32 align, num; u32 align, num;
int err; int err;
...@@ -191,6 +193,19 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, ...@@ -191,6 +193,19 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
goto err_vq; goto err_vq;
} }
/* reset virtqueue state index */
if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
struct vdpa_vq_state_packed *s = &state.packed;
s->last_avail_counter = 1;
s->last_avail_idx = 0;
s->last_used_counter = 1;
s->last_used_idx = 0;
}
err = ops->set_vq_state(vdpa, index, &state);
if (err)
goto err_vq;
ops->set_vq_ready(vdpa, index, 1); ops->set_vq_ready(vdpa, index, 1);
vq->priv = info; vq->priv = info;
......
...@@ -1512,7 +1512,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { ...@@ -1512,7 +1512,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 uar_4k[0x1]; u8 uar_4k[0x1];
u8 reserved_at_241[0x9]; u8 reserved_at_241[0x9];
u8 uar_sz[0x6]; u8 uar_sz[0x6];
u8 reserved_at_250[0x8]; u8 reserved_at_248[0x2];
u8 umem_uid_0[0x1];
u8 reserved_at_250[0x5];
u8 log_pg_sz[0x8]; u8 log_pg_sz[0x8];
u8 bf[0x1]; u8 bf[0x1];
......
...@@ -28,13 +28,34 @@ struct vdpa_notification_area { ...@@ -28,13 +28,34 @@ struct vdpa_notification_area {
}; };
/** /**
* struct vdpa_vq_state - vDPA vq_state definition * struct vdpa_vq_state_split - vDPA split virtqueue state
* @avail_index: available index * @avail_index: available index
*/ */
struct vdpa_vq_state { struct vdpa_vq_state_split {
u16 avail_index; u16 avail_index;
}; };
/**
* struct vdpa_vq_state_packed - vDPA packed virtqueue state
* @last_avail_counter: last driver ring wrap counter observed by device
* @last_avail_idx: device available index
* @last_used_counter: device ring wrap counter
* @last_used_idx: used index
*/
struct vdpa_vq_state_packed {
u16 last_avail_counter:1;
u16 last_avail_idx:15;
u16 last_used_counter:1;
u16 last_used_idx:15;
};
struct vdpa_vq_state {
union {
struct vdpa_vq_state_split split;
struct vdpa_vq_state_packed packed;
};
};
struct vdpa_mgmt_dev; struct vdpa_mgmt_dev;
/** /**
......
...@@ -79,6 +79,7 @@ static inline void vp_iowrite64_twopart(u64 val, ...@@ -79,6 +79,7 @@ static inline void vp_iowrite64_twopart(u64 val,
} }
u64 vp_modern_get_features(struct virtio_pci_modern_device *mdev); u64 vp_modern_get_features(struct virtio_pci_modern_device *mdev);
u64 vp_modern_get_driver_features(struct virtio_pci_modern_device *mdev);
void vp_modern_set_features(struct virtio_pci_modern_device *mdev, void vp_modern_set_features(struct virtio_pci_modern_device *mdev,
u64 features); u64 features);
u32 vp_modern_generation(struct virtio_pci_modern_device *mdev); u32 vp_modern_generation(struct virtio_pci_modern_device *mdev);
......
...@@ -57,4 +57,16 @@ ...@@ -57,4 +57,16 @@
#define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ #define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */
#define VIRTIO_ID_BT 40 /* virtio bluetooth */ #define VIRTIO_ID_BT 40 /* virtio bluetooth */
/*
* Virtio Transitional IDs
*/
#define VIRTIO_TRANS_ID_NET 1000 /* transitional virtio net */
#define VIRTIO_TRANS_ID_BLOCK 1001 /* transitional virtio block */
#define VIRTIO_TRANS_ID_BALLOON 1002 /* transitional virtio balloon */
#define VIRTIO_TRANS_ID_CONSOLE 1003 /* transitional virtio console */
#define VIRTIO_TRANS_ID_SCSI 1004 /* transitional virtio SCSI */
#define VIRTIO_TRANS_ID_RNG 1005 /* transitional virtio rng */
#define VIRTIO_TRANS_ID_9P 1009 /* transitional virtio 9p console */
#endif /* _LINUX_VIRTIO_IDS_H */ #endif /* _LINUX_VIRTIO_IDS_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment