Commit f0691533 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio/vhost updates from Michael Tsirkin:
 "New features, performance improvements, cleanups:

   - basic polling support for vhost
   - rework virtio to optionally use DMA API, fixing it on Xen
   - balloon stats gained a new entry
   - using the new napi_alloc_skb speeds up virtio net
   - virtio blk stats can now be read while another VCPU is busy
     inflating or deflating the balloon

  plus misc cleanups in various places"

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
  virtio_net: replace netdev_alloc_skb_ip_align() with napi_alloc_skb()
  vhost_net: basic polling support
  vhost: introduce vhost_vq_avail_empty()
  vhost: introduce vhost_has_work()
  virtio_balloon: Allow to resize and update the balloon stats in parallel
  virtio_balloon: Use a workqueue instead of "vballoon" kthread
  virtio/s390: size of SET_IND payload
  virtio/s390: use dev_to_virtio
  vhost: rename vhost_init_used()
  vhost: rename cross-endian helpers
  virtio_blk: VIRTIO_BLK_F_WCE->VIRTIO_BLK_F_FLUSH
  vring: Use the DMA API on Xen
  virtio_pci: Use the DMA API if enabled
  virtio_mmio: Use the DMA API if enabled
  virtio: Add improved queue allocation API
  virtio_ring: Support DMA APIs
  vring: Introduce vring_use_dma_api()
  s390/dma: Allow per device dma ops
  alpha/dma: use common noop dma ops
  dma: Provide simple noop dma ops
parents 2b2f72d8 c67f5db8
...@@ -123,44 +123,6 @@ static void *alpha_noop_alloc_coherent(struct device *dev, size_t size, ...@@ -123,44 +123,6 @@ static void *alpha_noop_alloc_coherent(struct device *dev, size_t size,
return ret; return ret;
} }
static void alpha_noop_free_coherent(struct device *dev, size_t size,
void *cpu_addr, dma_addr_t dma_addr,
struct dma_attrs *attrs)
{
free_pages((unsigned long)cpu_addr, get_order(size));
}
static dma_addr_t alpha_noop_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction dir,
struct dma_attrs *attrs)
{
return page_to_pa(page) + offset;
}
static int alpha_noop_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
enum dma_data_direction dir, struct dma_attrs *attrs)
{
int i;
struct scatterlist *sg;
for_each_sg(sgl, sg, nents, i) {
void *va;
BUG_ON(!sg_page(sg));
va = sg_virt(sg);
sg_dma_address(sg) = (dma_addr_t)virt_to_phys(va);
sg_dma_len(sg) = sg->length;
}
return nents;
}
static int alpha_noop_mapping_error(struct device *dev, dma_addr_t dma_addr)
{
return 0;
}
static int alpha_noop_supported(struct device *dev, u64 mask) static int alpha_noop_supported(struct device *dev, u64 mask)
{ {
return mask < 0x00ffffffUL ? 0 : 1; return mask < 0x00ffffffUL ? 0 : 1;
...@@ -168,10 +130,10 @@ static int alpha_noop_supported(struct device *dev, u64 mask) ...@@ -168,10 +130,10 @@ static int alpha_noop_supported(struct device *dev, u64 mask)
struct dma_map_ops alpha_noop_ops = { struct dma_map_ops alpha_noop_ops = {
.alloc = alpha_noop_alloc_coherent, .alloc = alpha_noop_alloc_coherent,
.free = alpha_noop_free_coherent, .free = dma_noop_free_coherent,
.map_page = alpha_noop_map_page, .map_page = dma_noop_map_page,
.map_sg = alpha_noop_map_sg, .map_sg = dma_noop_map_sg,
.mapping_error = alpha_noop_mapping_error, .mapping_error = dma_noop_mapping_error,
.dma_supported = alpha_noop_supported, .dma_supported = alpha_noop_supported,
}; };
......
...@@ -124,6 +124,7 @@ config S390 ...@@ -124,6 +124,7 @@ config S390
select HAVE_CMPXCHG_DOUBLE select HAVE_CMPXCHG_DOUBLE
select HAVE_CMPXCHG_LOCAL select HAVE_CMPXCHG_LOCAL
select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_API_DEBUG
select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FTRACE_MCOUNT_RECORD
...@@ -617,10 +618,6 @@ config HAS_IOMEM ...@@ -617,10 +618,6 @@ config HAS_IOMEM
config IOMMU_HELPER config IOMMU_HELPER
def_bool PCI def_bool PCI
config HAS_DMA
def_bool PCI
select HAVE_DMA_API_DEBUG
config NEED_SG_DMA_LENGTH config NEED_SG_DMA_LENGTH
def_bool PCI def_bool PCI
......
...@@ -3,5 +3,9 @@ ...@@ -3,5 +3,9 @@
* *
* This file is released under the GPLv2 * This file is released under the GPLv2
*/ */
#include <asm-generic/device.h> struct dev_archdata {
struct dma_map_ops *dma_ops;
};
struct pdev_archdata {
};
...@@ -11,11 +11,13 @@ ...@@ -11,11 +11,13 @@
#define DMA_ERROR_CODE (~(dma_addr_t) 0x0) #define DMA_ERROR_CODE (~(dma_addr_t) 0x0)
extern struct dma_map_ops s390_dma_ops; extern struct dma_map_ops s390_pci_dma_ops;
static inline struct dma_map_ops *get_dma_ops(struct device *dev) static inline struct dma_map_ops *get_dma_ops(struct device *dev)
{ {
return &s390_dma_ops; if (dev && dev->archdata.dma_ops)
return dev->archdata.dma_ops;
return &dma_noop_ops;
} }
static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
......
...@@ -641,6 +641,7 @@ int pcibios_add_device(struct pci_dev *pdev) ...@@ -641,6 +641,7 @@ int pcibios_add_device(struct pci_dev *pdev)
int i; int i;
pdev->dev.groups = zpci_attr_groups; pdev->dev.groups = zpci_attr_groups;
pdev->dev.archdata.dma_ops = &s390_pci_dma_ops;
zpci_map_resources(pdev); zpci_map_resources(pdev);
for (i = 0; i < PCI_BAR_COUNT; i++) { for (i = 0; i < PCI_BAR_COUNT; i++) {
......
...@@ -547,7 +547,7 @@ static int __init dma_debug_do_init(void) ...@@ -547,7 +547,7 @@ static int __init dma_debug_do_init(void)
} }
fs_initcall(dma_debug_do_init); fs_initcall(dma_debug_do_init);
struct dma_map_ops s390_dma_ops = { struct dma_map_ops s390_pci_dma_ops = {
.alloc = s390_dma_alloc, .alloc = s390_dma_alloc,
.free = s390_dma_free, .free = s390_dma_free,
.map_sg = s390_dma_map_sg, .map_sg = s390_dma_map_sg,
...@@ -558,7 +558,7 @@ struct dma_map_ops s390_dma_ops = { ...@@ -558,7 +558,7 @@ struct dma_map_ops s390_dma_ops = {
.is_phys = 0, .is_phys = 0,
/* dma_supported is unconditionally true without a callback */ /* dma_supported is unconditionally true without a callback */
}; };
EXPORT_SYMBOL_GPL(s390_dma_ops); EXPORT_SYMBOL_GPL(s390_pci_dma_ops);
static int __init s390_iommu_setup(char *str) static int __init s390_iommu_setup(char *str)
{ {
......
...@@ -477,8 +477,13 @@ static int virtblk_get_cache_mode(struct virtio_device *vdev) ...@@ -477,8 +477,13 @@ static int virtblk_get_cache_mode(struct virtio_device *vdev)
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE, err = virtio_cread_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE,
struct virtio_blk_config, wce, struct virtio_blk_config, wce,
&writeback); &writeback);
/*
* If WCE is not configurable and flush is not available,
* assume no writeback cache is in use.
*/
if (err) if (err)
writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_WCE); writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH);
return writeback; return writeback;
} }
...@@ -833,14 +838,14 @@ static const struct virtio_device_id id_table[] = { ...@@ -833,14 +838,14 @@ static const struct virtio_device_id id_table[] = {
static unsigned int features_legacy[] = { static unsigned int features_legacy[] = {
VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_MQ,
} }
; ;
static unsigned int features[] = { static unsigned int features[] = {
VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_MQ,
}; };
......
...@@ -260,7 +260,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, ...@@ -260,7 +260,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
p = page_address(page) + offset; p = page_address(page) + offset;
/* copy small packet so we can reuse these pages for small data */ /* copy small packet so we can reuse these pages for small data */
skb = netdev_alloc_skb_ip_align(vi->dev, GOOD_COPY_LEN); skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN);
if (unlikely(!skb)) if (unlikely(!skb))
return NULL; return NULL;
......
...@@ -342,13 +342,14 @@ static void virtio_ccw_drop_indicator(struct virtio_ccw_device *vcdev, ...@@ -342,13 +342,14 @@ static void virtio_ccw_drop_indicator(struct virtio_ccw_device *vcdev,
ccw->count = sizeof(*thinint_area); ccw->count = sizeof(*thinint_area);
ccw->cda = (__u32)(unsigned long) thinint_area; ccw->cda = (__u32)(unsigned long) thinint_area;
} else { } else {
/* payload is the address of the indicators */
indicatorp = kmalloc(sizeof(&vcdev->indicators), indicatorp = kmalloc(sizeof(&vcdev->indicators),
GFP_DMA | GFP_KERNEL); GFP_DMA | GFP_KERNEL);
if (!indicatorp) if (!indicatorp)
return; return;
*indicatorp = 0; *indicatorp = 0;
ccw->cmd_code = CCW_CMD_SET_IND; ccw->cmd_code = CCW_CMD_SET_IND;
ccw->count = sizeof(vcdev->indicators); ccw->count = sizeof(&vcdev->indicators);
ccw->cda = (__u32)(unsigned long) indicatorp; ccw->cda = (__u32)(unsigned long) indicatorp;
} }
/* Deregister indicators from host. */ /* Deregister indicators from host. */
...@@ -656,7 +657,10 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, ...@@ -656,7 +657,10 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
} }
} }
ret = -ENOMEM; ret = -ENOMEM;
/* We need a data area under 2G to communicate. */ /*
* We need a data area under 2G to communicate. Our payload is
* the address of the indicators.
*/
indicatorp = kmalloc(sizeof(&vcdev->indicators), GFP_DMA | GFP_KERNEL); indicatorp = kmalloc(sizeof(&vcdev->indicators), GFP_DMA | GFP_KERNEL);
if (!indicatorp) if (!indicatorp)
goto out; goto out;
...@@ -672,7 +676,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, ...@@ -672,7 +676,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
vcdev->indicators = 0; vcdev->indicators = 0;
ccw->cmd_code = CCW_CMD_SET_IND; ccw->cmd_code = CCW_CMD_SET_IND;
ccw->flags = 0; ccw->flags = 0;
ccw->count = sizeof(vcdev->indicators); ccw->count = sizeof(&vcdev->indicators);
ccw->cda = (__u32)(unsigned long) indicatorp; ccw->cda = (__u32)(unsigned long) indicatorp;
ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_SET_IND); ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_SET_IND);
if (ret) if (ret)
...@@ -683,7 +687,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, ...@@ -683,7 +687,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
vcdev->indicators2 = 0; vcdev->indicators2 = 0;
ccw->cmd_code = CCW_CMD_SET_CONF_IND; ccw->cmd_code = CCW_CMD_SET_CONF_IND;
ccw->flags = 0; ccw->flags = 0;
ccw->count = sizeof(vcdev->indicators2); ccw->count = sizeof(&vcdev->indicators2);
ccw->cda = (__u32)(unsigned long) indicatorp; ccw->cda = (__u32)(unsigned long) indicatorp;
ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_SET_CONF_IND); ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_SET_CONF_IND);
if (ret) if (ret)
...@@ -945,8 +949,7 @@ static struct virtio_config_ops virtio_ccw_config_ops = { ...@@ -945,8 +949,7 @@ static struct virtio_config_ops virtio_ccw_config_ops = {
static void virtio_ccw_release_dev(struct device *_d) static void virtio_ccw_release_dev(struct device *_d)
{ {
struct virtio_device *dev = container_of(_d, struct virtio_device, struct virtio_device *dev = dev_to_virtio(_d);
dev);
struct virtio_ccw_device *vcdev = to_vc_device(dev); struct virtio_ccw_device *vcdev = to_vc_device(dev);
kfree(vcdev->status); kfree(vcdev->status);
......
...@@ -287,6 +287,43 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) ...@@ -287,6 +287,43 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
rcu_read_unlock_bh(); rcu_read_unlock_bh();
} }
static inline unsigned long busy_clock(void)
{
return local_clock() >> 10;
}
static bool vhost_can_busy_poll(struct vhost_dev *dev,
unsigned long endtime)
{
return likely(!need_resched()) &&
likely(!time_after(busy_clock(), endtime)) &&
likely(!signal_pending(current)) &&
!vhost_has_work(dev);
}
static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
struct vhost_virtqueue *vq,
struct iovec iov[], unsigned int iov_size,
unsigned int *out_num, unsigned int *in_num)
{
unsigned long uninitialized_var(endtime);
int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
out_num, in_num, NULL, NULL);
if (r == vq->num && vq->busyloop_timeout) {
preempt_disable();
endtime = busy_clock() + vq->busyloop_timeout;
while (vhost_can_busy_poll(vq->dev, endtime) &&
vhost_vq_avail_empty(vq->dev, vq))
cpu_relax_lowlatency();
preempt_enable();
r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
out_num, in_num, NULL, NULL);
}
return r;
}
/* Expects to be always run from workqueue - which acts as /* Expects to be always run from workqueue - which acts as
* read-size critical section for our kind of RCU. */ * read-size critical section for our kind of RCU. */
static void handle_tx(struct vhost_net *net) static void handle_tx(struct vhost_net *net)
...@@ -331,10 +368,9 @@ static void handle_tx(struct vhost_net *net) ...@@ -331,10 +368,9 @@ static void handle_tx(struct vhost_net *net)
% UIO_MAXIOV == nvq->done_idx)) % UIO_MAXIOV == nvq->done_idx))
break; break;
head = vhost_get_vq_desc(vq, vq->iov, head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
ARRAY_SIZE(vq->iov), ARRAY_SIZE(vq->iov),
&out, &in, &out, &in);
NULL, NULL);
/* On error, stop handling until the next kick. */ /* On error, stop handling until the next kick. */
if (unlikely(head < 0)) if (unlikely(head < 0))
break; break;
...@@ -435,6 +471,38 @@ static int peek_head_len(struct sock *sk) ...@@ -435,6 +471,38 @@ static int peek_head_len(struct sock *sk)
return len; return len;
} }
static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
{
struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
struct vhost_virtqueue *vq = &nvq->vq;
unsigned long uninitialized_var(endtime);
int len = peek_head_len(sk);
if (!len && vq->busyloop_timeout) {
/* Both tx vq and rx socket were polled here */
mutex_lock(&vq->mutex);
vhost_disable_notify(&net->dev, vq);
preempt_disable();
endtime = busy_clock() + vq->busyloop_timeout;
while (vhost_can_busy_poll(&net->dev, endtime) &&
skb_queue_empty(&sk->sk_receive_queue) &&
vhost_vq_avail_empty(&net->dev, vq))
cpu_relax_lowlatency();
preempt_enable();
if (vhost_enable_notify(&net->dev, vq))
vhost_poll_queue(&vq->poll);
mutex_unlock(&vq->mutex);
len = peek_head_len(sk);
}
return len;
}
/* This is a multi-buffer version of vhost_get_desc, that works if /* This is a multi-buffer version of vhost_get_desc, that works if
* vq has read descriptors only. * vq has read descriptors only.
* @vq - the relevant virtqueue * @vq - the relevant virtqueue
...@@ -553,7 +621,7 @@ static void handle_rx(struct vhost_net *net) ...@@ -553,7 +621,7 @@ static void handle_rx(struct vhost_net *net)
vq->log : NULL; vq->log : NULL;
mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF); mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF);
while ((sock_len = peek_head_len(sock->sk))) { while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk))) {
sock_len += sock_hlen; sock_len += sock_hlen;
vhost_len = sock_len + vhost_hlen; vhost_len = sock_len + vhost_hlen;
headcount = get_rx_bufs(vq, vq->heads, vhost_len, headcount = get_rx_bufs(vq, vq->heads, vhost_len,
...@@ -917,7 +985,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) ...@@ -917,7 +985,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
vhost_net_disable_vq(n, vq); vhost_net_disable_vq(n, vq);
vq->private_data = sock; vq->private_data = sock;
r = vhost_init_used(vq); r = vhost_vq_init_access(vq);
if (r) if (r)
goto err_used; goto err_used;
r = vhost_net_enable_vq(n, vq); r = vhost_net_enable_vq(n, vq);
......
...@@ -1274,7 +1274,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, ...@@ -1274,7 +1274,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs,
vq = &vs->vqs[i].vq; vq = &vs->vqs[i].vq;
mutex_lock(&vq->mutex); mutex_lock(&vq->mutex);
vq->private_data = vs_tpg; vq->private_data = vs_tpg;
vhost_init_used(vq); vhost_vq_init_access(vq);
mutex_unlock(&vq->mutex); mutex_unlock(&vq->mutex);
} }
ret = 0; ret = 0;
......
...@@ -196,7 +196,7 @@ static long vhost_test_run(struct vhost_test *n, int test) ...@@ -196,7 +196,7 @@ static long vhost_test_run(struct vhost_test *n, int test)
oldpriv = vq->private_data; oldpriv = vq->private_data;
vq->private_data = priv; vq->private_data = priv;
r = vhost_init_used(&n->vqs[index]); r = vhost_vq_init_access(&n->vqs[index]);
mutex_unlock(&vq->mutex); mutex_unlock(&vq->mutex);
......
...@@ -43,11 +43,21 @@ enum { ...@@ -43,11 +43,21 @@ enum {
#define vhost_avail_event(vq) ((__virtio16 __user *)&vq->used->ring[vq->num]) #define vhost_avail_event(vq) ((__virtio16 __user *)&vq->used->ring[vq->num])
#ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY #ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY
static void vhost_vq_reset_user_be(struct vhost_virtqueue *vq) static void vhost_disable_cross_endian(struct vhost_virtqueue *vq)
{ {
vq->user_be = !virtio_legacy_is_little_endian(); vq->user_be = !virtio_legacy_is_little_endian();
} }
static void vhost_enable_cross_endian_big(struct vhost_virtqueue *vq)
{
vq->user_be = true;
}
static void vhost_enable_cross_endian_little(struct vhost_virtqueue *vq)
{
vq->user_be = false;
}
static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp) static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp)
{ {
struct vhost_vring_state s; struct vhost_vring_state s;
...@@ -62,7 +72,10 @@ static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp) ...@@ -62,7 +72,10 @@ static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp)
s.num != VHOST_VRING_BIG_ENDIAN) s.num != VHOST_VRING_BIG_ENDIAN)
return -EINVAL; return -EINVAL;
vq->user_be = s.num; if (s.num == VHOST_VRING_BIG_ENDIAN)
vhost_enable_cross_endian_big(vq);
else
vhost_enable_cross_endian_little(vq);
return 0; return 0;
} }
...@@ -91,7 +104,7 @@ static void vhost_init_is_le(struct vhost_virtqueue *vq) ...@@ -91,7 +104,7 @@ static void vhost_init_is_le(struct vhost_virtqueue *vq)
vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq->user_be; vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq->user_be;
} }
#else #else
static void vhost_vq_reset_user_be(struct vhost_virtqueue *vq) static void vhost_disable_cross_endian(struct vhost_virtqueue *vq)
{ {
} }
...@@ -113,6 +126,11 @@ static void vhost_init_is_le(struct vhost_virtqueue *vq) ...@@ -113,6 +126,11 @@ static void vhost_init_is_le(struct vhost_virtqueue *vq)
} }
#endif /* CONFIG_VHOST_CROSS_ENDIAN_LEGACY */ #endif /* CONFIG_VHOST_CROSS_ENDIAN_LEGACY */
static void vhost_reset_is_le(struct vhost_virtqueue *vq)
{
vq->is_le = virtio_legacy_is_little_endian();
}
static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh,
poll_table *pt) poll_table *pt)
{ {
...@@ -245,6 +263,13 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) ...@@ -245,6 +263,13 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
} }
EXPORT_SYMBOL_GPL(vhost_work_queue); EXPORT_SYMBOL_GPL(vhost_work_queue);
/* A lockless hint for busy polling code to exit the loop */
bool vhost_has_work(struct vhost_dev *dev)
{
return !list_empty(&dev->work_list);
}
EXPORT_SYMBOL_GPL(vhost_has_work);
void vhost_poll_queue(struct vhost_poll *poll) void vhost_poll_queue(struct vhost_poll *poll)
{ {
vhost_work_queue(poll->dev, &poll->work); vhost_work_queue(poll->dev, &poll->work);
...@@ -276,8 +301,9 @@ static void vhost_vq_reset(struct vhost_dev *dev, ...@@ -276,8 +301,9 @@ static void vhost_vq_reset(struct vhost_dev *dev,
vq->call = NULL; vq->call = NULL;
vq->log_ctx = NULL; vq->log_ctx = NULL;
vq->memory = NULL; vq->memory = NULL;
vq->is_le = virtio_legacy_is_little_endian(); vhost_reset_is_le(vq);
vhost_vq_reset_user_be(vq); vhost_disable_cross_endian(vq);
vq->busyloop_timeout = 0;
} }
static int vhost_worker(void *data) static int vhost_worker(void *data)
...@@ -912,6 +938,19 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) ...@@ -912,6 +938,19 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
case VHOST_GET_VRING_ENDIAN: case VHOST_GET_VRING_ENDIAN:
r = vhost_get_vring_endian(vq, idx, argp); r = vhost_get_vring_endian(vq, idx, argp);
break; break;
case VHOST_SET_VRING_BUSYLOOP_TIMEOUT:
if (copy_from_user(&s, argp, sizeof(s))) {
r = -EFAULT;
break;
}
vq->busyloop_timeout = s.num;
break;
case VHOST_GET_VRING_BUSYLOOP_TIMEOUT:
s.index = idx;
s.num = vq->busyloop_timeout;
if (copy_to_user(argp, &s, sizeof(s)))
r = -EFAULT;
break;
default: default:
r = -ENOIOCTLCMD; r = -ENOIOCTLCMD;
} }
...@@ -1152,14 +1191,14 @@ static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event) ...@@ -1152,14 +1191,14 @@ static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event)
return 0; return 0;
} }
int vhost_init_used(struct vhost_virtqueue *vq) int vhost_vq_init_access(struct vhost_virtqueue *vq)
{ {
__virtio16 last_used_idx; __virtio16 last_used_idx;
int r; int r;
bool is_le = vq->is_le; bool is_le = vq->is_le;
if (!vq->private_data) { if (!vq->private_data) {
vq->is_le = virtio_legacy_is_little_endian(); vhost_reset_is_le(vq);
return 0; return 0;
} }
...@@ -1182,7 +1221,7 @@ int vhost_init_used(struct vhost_virtqueue *vq) ...@@ -1182,7 +1221,7 @@ int vhost_init_used(struct vhost_virtqueue *vq)
vq->is_le = is_le; vq->is_le = is_le;
return r; return r;
} }
EXPORT_SYMBOL_GPL(vhost_init_used); EXPORT_SYMBOL_GPL(vhost_vq_init_access);
static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
struct iovec iov[], int iov_size) struct iovec iov[], int iov_size)
...@@ -1633,6 +1672,20 @@ void vhost_add_used_and_signal_n(struct vhost_dev *dev, ...@@ -1633,6 +1672,20 @@ void vhost_add_used_and_signal_n(struct vhost_dev *dev,
} }
EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n); EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n);
/* return true if we're sure that avaiable ring is empty */
bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq)
{
__virtio16 avail_idx;
int r;
r = __get_user(avail_idx, &vq->avail->idx);
if (r)
return false;
return vhost16_to_cpu(vq, avail_idx) == vq->avail_idx;
}
EXPORT_SYMBOL_GPL(vhost_vq_avail_empty);
/* OK, now we need to know about added descriptors. */ /* OK, now we need to know about added descriptors. */
bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
{ {
......
...@@ -37,6 +37,7 @@ struct vhost_poll { ...@@ -37,6 +37,7 @@ struct vhost_poll {
void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn); void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn);
void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work); void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work);
bool vhost_has_work(struct vhost_dev *dev);
void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
unsigned long mask, struct vhost_dev *dev); unsigned long mask, struct vhost_dev *dev);
...@@ -114,6 +115,7 @@ struct vhost_virtqueue { ...@@ -114,6 +115,7 @@ struct vhost_virtqueue {
/* Ring endianness requested by userspace for cross-endian support. */ /* Ring endianness requested by userspace for cross-endian support. */
bool user_be; bool user_be;
#endif #endif
u32 busyloop_timeout;
}; };
struct vhost_dev { struct vhost_dev {
...@@ -148,7 +150,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *, ...@@ -148,7 +150,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *,
struct vhost_log *log, unsigned int *log_num); struct vhost_log *log, unsigned int *log_num);
void vhost_discard_vq_desc(struct vhost_virtqueue *, int n); void vhost_discard_vq_desc(struct vhost_virtqueue *, int n);
int vhost_init_used(struct vhost_virtqueue *); int vhost_vq_init_access(struct vhost_virtqueue *);
int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len); int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len);
int vhost_add_used_n(struct vhost_virtqueue *, struct vring_used_elem *heads, int vhost_add_used_n(struct vhost_virtqueue *, struct vring_used_elem *heads,
unsigned count); unsigned count);
...@@ -158,6 +160,7 @@ void vhost_add_used_and_signal_n(struct vhost_dev *, struct vhost_virtqueue *, ...@@ -158,6 +160,7 @@ void vhost_add_used_and_signal_n(struct vhost_dev *, struct vhost_virtqueue *,
struct vring_used_elem *heads, unsigned count); struct vring_used_elem *heads, unsigned count);
void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *); void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *);
void vhost_disable_notify(struct vhost_dev *, struct vhost_virtqueue *); void vhost_disable_notify(struct vhost_dev *, struct vhost_virtqueue *);
bool vhost_vq_avail_empty(struct vhost_dev *, struct vhost_virtqueue *);
bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *); bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *);
int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
......
...@@ -60,7 +60,7 @@ config VIRTIO_INPUT ...@@ -60,7 +60,7 @@ config VIRTIO_INPUT
config VIRTIO_MMIO config VIRTIO_MMIO
tristate "Platform bus driver for memory mapped virtio devices" tristate "Platform bus driver for memory mapped virtio devices"
depends on HAS_IOMEM depends on HAS_IOMEM && HAS_DMA
select VIRTIO select VIRTIO
---help--- ---help---
This drivers provides support for memory mapped virtio This drivers provides support for memory mapped virtio
......
...@@ -22,8 +22,7 @@ ...@@ -22,8 +22,7 @@
#include <linux/virtio.h> #include <linux/virtio.h>
#include <linux/virtio_balloon.h> #include <linux/virtio_balloon.h>
#include <linux/swap.h> #include <linux/swap.h>
#include <linux/kthread.h> #include <linux/workqueue.h>
#include <linux/freezer.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/module.h> #include <linux/module.h>
...@@ -50,11 +49,13 @@ struct virtio_balloon { ...@@ -50,11 +49,13 @@ struct virtio_balloon {
struct virtio_device *vdev; struct virtio_device *vdev;
struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
/* Where the ballooning thread waits for config to change. */ /* The balloon servicing is delegated to a freezable workqueue. */
wait_queue_head_t config_change; struct work_struct update_balloon_stats_work;
struct work_struct update_balloon_size_work;
/* The thread servicing the balloon. */ /* Prevent updating balloon when it is being canceled. */
struct task_struct *thread; spinlock_t stop_update_lock;
bool stop_update;
/* Waiting for host to ack the pages we released. */ /* Waiting for host to ack the pages we released. */
wait_queue_head_t acked; wait_queue_head_t acked;
...@@ -77,7 +78,6 @@ struct virtio_balloon { ...@@ -77,7 +78,6 @@ struct virtio_balloon {
u32 pfns[VIRTIO_BALLOON_ARRAY_PFNS_MAX]; u32 pfns[VIRTIO_BALLOON_ARRAY_PFNS_MAX];
/* Memory statistics */ /* Memory statistics */
int need_stats_update;
struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
/* To register callback in oom notifier call chain */ /* To register callback in oom notifier call chain */
...@@ -124,6 +124,7 @@ static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq) ...@@ -124,6 +124,7 @@ static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
/* When host has read buffer, this completes via balloon_ack */ /* When host has read buffer, this completes via balloon_ack */
wait_event(vb->acked, virtqueue_get_buf(vq, &len)); wait_event(vb->acked, virtqueue_get_buf(vq, &len));
} }
static void set_page_pfns(u32 pfns[], struct page *page) static void set_page_pfns(u32 pfns[], struct page *page)
...@@ -136,9 +137,10 @@ static void set_page_pfns(u32 pfns[], struct page *page) ...@@ -136,9 +137,10 @@ static void set_page_pfns(u32 pfns[], struct page *page)
pfns[i] = page_to_balloon_pfn(page) + i; pfns[i] = page_to_balloon_pfn(page) + i;
} }
static void fill_balloon(struct virtio_balloon *vb, size_t num) static unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
{ {
struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info; struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
unsigned num_allocated_pages;
/* We can only do one array worth at a time. */ /* We can only do one array worth at a time. */
num = min(num, ARRAY_SIZE(vb->pfns)); num = min(num, ARRAY_SIZE(vb->pfns));
...@@ -163,10 +165,13 @@ static void fill_balloon(struct virtio_balloon *vb, size_t num) ...@@ -163,10 +165,13 @@ static void fill_balloon(struct virtio_balloon *vb, size_t num)
adjust_managed_page_count(page, -1); adjust_managed_page_count(page, -1);
} }
num_allocated_pages = vb->num_pfns;
/* Did we get any? */ /* Did we get any? */
if (vb->num_pfns != 0) if (vb->num_pfns != 0)
tell_host(vb, vb->inflate_vq); tell_host(vb, vb->inflate_vq);
mutex_unlock(&vb->balloon_lock); mutex_unlock(&vb->balloon_lock);
return num_allocated_pages;
} }
static void release_pages_balloon(struct virtio_balloon *vb) static void release_pages_balloon(struct virtio_balloon *vb)
...@@ -257,14 +262,17 @@ static void update_balloon_stats(struct virtio_balloon *vb) ...@@ -257,14 +262,17 @@ static void update_balloon_stats(struct virtio_balloon *vb)
* with a single buffer. From that point forward, all conversations consist of * with a single buffer. From that point forward, all conversations consist of
* a hypervisor request (a call to this function) which directs us to refill * a hypervisor request (a call to this function) which directs us to refill
* the virtqueue with a fresh stats buffer. Since stats collection can sleep, * the virtqueue with a fresh stats buffer. Since stats collection can sleep,
* we notify our kthread which does the actual work via stats_handle_request(). * we delegate the job to a freezable workqueue that will do the actual work via
* stats_handle_request().
*/ */
static void stats_request(struct virtqueue *vq) static void stats_request(struct virtqueue *vq)
{ {
struct virtio_balloon *vb = vq->vdev->priv; struct virtio_balloon *vb = vq->vdev->priv;
vb->need_stats_update = 1; spin_lock(&vb->stop_update_lock);
wake_up(&vb->config_change); if (!vb->stop_update)
queue_work(system_freezable_wq, &vb->update_balloon_stats_work);
spin_unlock(&vb->stop_update_lock);
} }
static void stats_handle_request(struct virtio_balloon *vb) static void stats_handle_request(struct virtio_balloon *vb)
...@@ -273,7 +281,6 @@ static void stats_handle_request(struct virtio_balloon *vb) ...@@ -273,7 +281,6 @@ static void stats_handle_request(struct virtio_balloon *vb)
struct scatterlist sg; struct scatterlist sg;
unsigned int len; unsigned int len;
vb->need_stats_update = 0;
update_balloon_stats(vb); update_balloon_stats(vb);
vq = vb->stats_vq; vq = vb->stats_vq;
...@@ -287,8 +294,12 @@ static void stats_handle_request(struct virtio_balloon *vb) ...@@ -287,8 +294,12 @@ static void stats_handle_request(struct virtio_balloon *vb)
static void virtballoon_changed(struct virtio_device *vdev) static void virtballoon_changed(struct virtio_device *vdev)
{ {
struct virtio_balloon *vb = vdev->priv; struct virtio_balloon *vb = vdev->priv;
unsigned long flags;
wake_up(&vb->config_change); spin_lock_irqsave(&vb->stop_update_lock, flags);
if (!vb->stop_update)
queue_work(system_freezable_wq, &vb->update_balloon_size_work);
spin_unlock_irqrestore(&vb->stop_update_lock, flags);
} }
static inline s64 towards_target(struct virtio_balloon *vb) static inline s64 towards_target(struct virtio_balloon *vb)
...@@ -351,43 +362,32 @@ static int virtballoon_oom_notify(struct notifier_block *self, ...@@ -351,43 +362,32 @@ static int virtballoon_oom_notify(struct notifier_block *self,
return NOTIFY_OK; return NOTIFY_OK;
} }
static int balloon(void *_vballoon) static void update_balloon_stats_func(struct work_struct *work)
{ {
struct virtio_balloon *vb = _vballoon; struct virtio_balloon *vb;
DEFINE_WAIT_FUNC(wait, woken_wake_function);
set_freezable();
while (!kthread_should_stop()) {
s64 diff;
try_to_freeze();
add_wait_queue(&vb->config_change, &wait);
for (;;) {
if ((diff = towards_target(vb)) != 0 ||
vb->need_stats_update ||
kthread_should_stop() ||
freezing(current))
break;
wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
}
remove_wait_queue(&vb->config_change, &wait);
if (vb->need_stats_update) vb = container_of(work, struct virtio_balloon,
stats_handle_request(vb); update_balloon_stats_work);
if (diff > 0) stats_handle_request(vb);
fill_balloon(vb, diff); }
else if (diff < 0)
leak_balloon(vb, -diff);
update_balloon_size(vb);
/* static void update_balloon_size_func(struct work_struct *work)
* For large balloon changes, we could spend a lot of time {
* and always have work to do. Be nice if preempt disabled. struct virtio_balloon *vb;
*/ s64 diff;
cond_resched();
} vb = container_of(work, struct virtio_balloon,
return 0; update_balloon_size_work);
diff = towards_target(vb);
if (diff > 0)
diff -= fill_balloon(vb, diff);
else if (diff < 0)
diff += leak_balloon(vb, -diff);
update_balloon_size(vb);
if (diff)
queue_work(system_freezable_wq, work);
} }
static int init_vqs(struct virtio_balloon *vb) static int init_vqs(struct virtio_balloon *vb)
...@@ -505,12 +505,14 @@ static int virtballoon_probe(struct virtio_device *vdev) ...@@ -505,12 +505,14 @@ static int virtballoon_probe(struct virtio_device *vdev)
goto out; goto out;
} }
INIT_WORK(&vb->update_balloon_stats_work, update_balloon_stats_func);
INIT_WORK(&vb->update_balloon_size_work, update_balloon_size_func);
spin_lock_init(&vb->stop_update_lock);
vb->stop_update = false;
vb->num_pages = 0; vb->num_pages = 0;
mutex_init(&vb->balloon_lock); mutex_init(&vb->balloon_lock);
init_waitqueue_head(&vb->config_change);
init_waitqueue_head(&vb->acked); init_waitqueue_head(&vb->acked);
vb->vdev = vdev; vb->vdev = vdev;
vb->need_stats_update = 0;
balloon_devinfo_init(&vb->vb_dev_info); balloon_devinfo_init(&vb->vb_dev_info);
#ifdef CONFIG_BALLOON_COMPACTION #ifdef CONFIG_BALLOON_COMPACTION
...@@ -529,16 +531,8 @@ static int virtballoon_probe(struct virtio_device *vdev) ...@@ -529,16 +531,8 @@ static int virtballoon_probe(struct virtio_device *vdev)
virtio_device_ready(vdev); virtio_device_ready(vdev);
vb->thread = kthread_run(balloon, vb, "vballoon");
if (IS_ERR(vb->thread)) {
err = PTR_ERR(vb->thread);
goto out_del_vqs;
}
return 0; return 0;
out_del_vqs:
unregister_oom_notifier(&vb->nb);
out_oom_notify: out_oom_notify:
vdev->config->del_vqs(vdev); vdev->config->del_vqs(vdev);
out_free_vb: out_free_vb:
...@@ -565,7 +559,13 @@ static void virtballoon_remove(struct virtio_device *vdev) ...@@ -565,7 +559,13 @@ static void virtballoon_remove(struct virtio_device *vdev)
struct virtio_balloon *vb = vdev->priv; struct virtio_balloon *vb = vdev->priv;
unregister_oom_notifier(&vb->nb); unregister_oom_notifier(&vb->nb);
kthread_stop(vb->thread);
spin_lock_irq(&vb->stop_update_lock);
vb->stop_update = true;
spin_unlock_irq(&vb->stop_update_lock);
cancel_work_sync(&vb->update_balloon_size_work);
cancel_work_sync(&vb->update_balloon_stats_work);
remove_common(vb); remove_common(vb);
kfree(vb); kfree(vb);
} }
...@@ -576,10 +576,9 @@ static int virtballoon_freeze(struct virtio_device *vdev) ...@@ -576,10 +576,9 @@ static int virtballoon_freeze(struct virtio_device *vdev)
struct virtio_balloon *vb = vdev->priv; struct virtio_balloon *vb = vdev->priv;
/* /*
* The kthread is already frozen by the PM core before this * The workqueue is already frozen by the PM core before this
* function is called. * function is called.
*/ */
remove_common(vb); remove_common(vb);
return 0; return 0;
} }
...@@ -595,7 +594,8 @@ static int virtballoon_restore(struct virtio_device *vdev) ...@@ -595,7 +594,8 @@ static int virtballoon_restore(struct virtio_device *vdev)
virtio_device_ready(vdev); virtio_device_ready(vdev);
fill_balloon(vb, towards_target(vb)); if (towards_target(vb))
virtballoon_changed(vdev);
update_balloon_size(vb); update_balloon_size(vb);
return 0; return 0;
} }
......
...@@ -99,12 +99,6 @@ struct virtio_mmio_vq_info { ...@@ -99,12 +99,6 @@ struct virtio_mmio_vq_info {
/* the actual virtqueue */ /* the actual virtqueue */
struct virtqueue *vq; struct virtqueue *vq;
/* the number of entries in the queue */
unsigned int num;
/* the virtual address of the ring queue */
void *queue;
/* the list node for the virtqueues list */ /* the list node for the virtqueues list */
struct list_head node; struct list_head node;
}; };
...@@ -322,15 +316,13 @@ static void vm_del_vq(struct virtqueue *vq) ...@@ -322,15 +316,13 @@ static void vm_del_vq(struct virtqueue *vq)
{ {
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev); struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev);
struct virtio_mmio_vq_info *info = vq->priv; struct virtio_mmio_vq_info *info = vq->priv;
unsigned long flags, size; unsigned long flags;
unsigned int index = vq->index; unsigned int index = vq->index;
spin_lock_irqsave(&vm_dev->lock, flags); spin_lock_irqsave(&vm_dev->lock, flags);
list_del(&info->node); list_del(&info->node);
spin_unlock_irqrestore(&vm_dev->lock, flags); spin_unlock_irqrestore(&vm_dev->lock, flags);
vring_del_virtqueue(vq);
/* Select and deactivate the queue */ /* Select and deactivate the queue */
writel(index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL); writel(index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL);
if (vm_dev->version == 1) { if (vm_dev->version == 1) {
...@@ -340,8 +332,8 @@ static void vm_del_vq(struct virtqueue *vq) ...@@ -340,8 +332,8 @@ static void vm_del_vq(struct virtqueue *vq)
WARN_ON(readl(vm_dev->base + VIRTIO_MMIO_QUEUE_READY)); WARN_ON(readl(vm_dev->base + VIRTIO_MMIO_QUEUE_READY));
} }
size = PAGE_ALIGN(vring_size(info->num, VIRTIO_MMIO_VRING_ALIGN)); vring_del_virtqueue(vq);
free_pages_exact(info->queue, size);
kfree(info); kfree(info);
} }
...@@ -356,8 +348,6 @@ static void vm_del_vqs(struct virtio_device *vdev) ...@@ -356,8 +348,6 @@ static void vm_del_vqs(struct virtio_device *vdev)
free_irq(platform_get_irq(vm_dev->pdev, 0), vm_dev); free_irq(platform_get_irq(vm_dev->pdev, 0), vm_dev);
} }
static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index, static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
void (*callback)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq),
const char *name) const char *name)
...@@ -365,7 +355,8 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index, ...@@ -365,7 +355,8 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
struct virtio_mmio_vq_info *info; struct virtio_mmio_vq_info *info;
struct virtqueue *vq; struct virtqueue *vq;
unsigned long flags, size; unsigned long flags;
unsigned int num;
int err; int err;
if (!name) if (!name)
...@@ -388,66 +379,40 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index, ...@@ -388,66 +379,40 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
goto error_kmalloc; goto error_kmalloc;
} }
/* Allocate pages for the queue - start with a queue as big as num = readl(vm_dev->base + VIRTIO_MMIO_QUEUE_NUM_MAX);
* possible (limited by maximum size allowed by device), drop down if (num == 0) {
* to a minimal size, just big enough to fit descriptor table
* and two rings (which makes it "alignment_size * 2")
*/
info->num = readl(vm_dev->base + VIRTIO_MMIO_QUEUE_NUM_MAX);
/* If the device reports a 0 entry queue, we won't be able to
* use it to perform I/O, and vring_new_virtqueue() can't create
* empty queues anyway, so don't bother to set up the device.
*/
if (info->num == 0) {
err = -ENOENT; err = -ENOENT;
goto error_alloc_pages; goto error_new_virtqueue;
}
while (1) {
size = PAGE_ALIGN(vring_size(info->num,
VIRTIO_MMIO_VRING_ALIGN));
/* Did the last iter shrink the queue below minimum size? */
if (size < VIRTIO_MMIO_VRING_ALIGN * 2) {
err = -ENOMEM;
goto error_alloc_pages;
}
info->queue = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
if (info->queue)
break;
info->num /= 2;
} }
/* Create the vring */ /* Create the vring */
vq = vring_new_virtqueue(index, info->num, VIRTIO_MMIO_VRING_ALIGN, vdev, vq = vring_create_virtqueue(index, num, VIRTIO_MMIO_VRING_ALIGN, vdev,
true, info->queue, vm_notify, callback, name); true, true, vm_notify, callback, name);
if (!vq) { if (!vq) {
err = -ENOMEM; err = -ENOMEM;
goto error_new_virtqueue; goto error_new_virtqueue;
} }
/* Activate the queue */ /* Activate the queue */
writel(info->num, vm_dev->base + VIRTIO_MMIO_QUEUE_NUM); writel(virtqueue_get_vring_size(vq), vm_dev->base + VIRTIO_MMIO_QUEUE_NUM);
if (vm_dev->version == 1) { if (vm_dev->version == 1) {
writel(PAGE_SIZE, vm_dev->base + VIRTIO_MMIO_QUEUE_ALIGN); writel(PAGE_SIZE, vm_dev->base + VIRTIO_MMIO_QUEUE_ALIGN);
writel(virt_to_phys(info->queue) >> PAGE_SHIFT, writel(virtqueue_get_desc_addr(vq) >> PAGE_SHIFT,
vm_dev->base + VIRTIO_MMIO_QUEUE_PFN); vm_dev->base + VIRTIO_MMIO_QUEUE_PFN);
} else { } else {
u64 addr; u64 addr;
addr = virt_to_phys(info->queue); addr = virtqueue_get_desc_addr(vq);
writel((u32)addr, vm_dev->base + VIRTIO_MMIO_QUEUE_DESC_LOW); writel((u32)addr, vm_dev->base + VIRTIO_MMIO_QUEUE_DESC_LOW);
writel((u32)(addr >> 32), writel((u32)(addr >> 32),
vm_dev->base + VIRTIO_MMIO_QUEUE_DESC_HIGH); vm_dev->base + VIRTIO_MMIO_QUEUE_DESC_HIGH);
addr = virt_to_phys(virtqueue_get_avail(vq)); addr = virtqueue_get_avail_addr(vq);
writel((u32)addr, vm_dev->base + VIRTIO_MMIO_QUEUE_AVAIL_LOW); writel((u32)addr, vm_dev->base + VIRTIO_MMIO_QUEUE_AVAIL_LOW);
writel((u32)(addr >> 32), writel((u32)(addr >> 32),
vm_dev->base + VIRTIO_MMIO_QUEUE_AVAIL_HIGH); vm_dev->base + VIRTIO_MMIO_QUEUE_AVAIL_HIGH);
addr = virt_to_phys(virtqueue_get_used(vq)); addr = virtqueue_get_used_addr(vq);
writel((u32)addr, vm_dev->base + VIRTIO_MMIO_QUEUE_USED_LOW); writel((u32)addr, vm_dev->base + VIRTIO_MMIO_QUEUE_USED_LOW);
writel((u32)(addr >> 32), writel((u32)(addr >> 32),
vm_dev->base + VIRTIO_MMIO_QUEUE_USED_HIGH); vm_dev->base + VIRTIO_MMIO_QUEUE_USED_HIGH);
...@@ -471,8 +436,6 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index, ...@@ -471,8 +436,6 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
writel(0, vm_dev->base + VIRTIO_MMIO_QUEUE_READY); writel(0, vm_dev->base + VIRTIO_MMIO_QUEUE_READY);
WARN_ON(readl(vm_dev->base + VIRTIO_MMIO_QUEUE_READY)); WARN_ON(readl(vm_dev->base + VIRTIO_MMIO_QUEUE_READY));
} }
free_pages_exact(info->queue, size);
error_alloc_pages:
kfree(info); kfree(info);
error_kmalloc: error_kmalloc:
error_available: error_available:
......
...@@ -35,12 +35,6 @@ struct virtio_pci_vq_info { ...@@ -35,12 +35,6 @@ struct virtio_pci_vq_info {
/* the actual virtqueue */ /* the actual virtqueue */
struct virtqueue *vq; struct virtqueue *vq;
/* the number of entries in the queue */
int num;
/* the virtual address of the ring queue */
void *queue;
/* the list node for the virtqueues list */ /* the list node for the virtqueues list */
struct list_head node; struct list_head node;
......
...@@ -119,7 +119,6 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, ...@@ -119,7 +119,6 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
u16 msix_vec) u16 msix_vec)
{ {
struct virtqueue *vq; struct virtqueue *vq;
unsigned long size;
u16 num; u16 num;
int err; int err;
...@@ -131,27 +130,19 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, ...@@ -131,27 +130,19 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN)) if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN))
return ERR_PTR(-ENOENT); return ERR_PTR(-ENOENT);
info->num = num;
info->msix_vector = msix_vec; info->msix_vector = msix_vec;
size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN)); /* create the vring */
info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO); vq = vring_create_virtqueue(index, num,
if (info->queue == NULL) VIRTIO_PCI_VRING_ALIGN, &vp_dev->vdev,
true, false, vp_notify, callback, name);
if (!vq)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
/* activate the queue */ /* activate the queue */
iowrite32(virt_to_phys(info->queue) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT, iowrite32(virtqueue_get_desc_addr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT,
vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
/* create the vring */
vq = vring_new_virtqueue(index, info->num,
VIRTIO_PCI_VRING_ALIGN, &vp_dev->vdev,
true, info->queue, vp_notify, callback, name);
if (!vq) {
err = -ENOMEM;
goto out_activate_queue;
}
vq->priv = (void __force *)vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY; vq->priv = (void __force *)vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY;
if (msix_vec != VIRTIO_MSI_NO_VECTOR) { if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
...@@ -159,17 +150,15 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, ...@@ -159,17 +150,15 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
msix_vec = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); msix_vec = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
if (msix_vec == VIRTIO_MSI_NO_VECTOR) { if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
err = -EBUSY; err = -EBUSY;
goto out_assign; goto out_deactivate;
} }
} }
return vq; return vq;
out_assign: out_deactivate:
vring_del_virtqueue(vq);
out_activate_queue:
iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
free_pages_exact(info->queue, size); vring_del_virtqueue(vq);
return ERR_PTR(err); return ERR_PTR(err);
} }
...@@ -177,7 +166,6 @@ static void del_vq(struct virtio_pci_vq_info *info) ...@@ -177,7 +166,6 @@ static void del_vq(struct virtio_pci_vq_info *info)
{ {
struct virtqueue *vq = info->vq; struct virtqueue *vq = info->vq;
struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
unsigned long size;
iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
...@@ -188,13 +176,10 @@ static void del_vq(struct virtio_pci_vq_info *info) ...@@ -188,13 +176,10 @@ static void del_vq(struct virtio_pci_vq_info *info)
ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR); ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR);
} }
vring_del_virtqueue(vq);
/* Select and deactivate the queue */ /* Select and deactivate the queue */
iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
size = PAGE_ALIGN(vring_size(info->num, VIRTIO_PCI_VRING_ALIGN)); vring_del_virtqueue(vq);
free_pages_exact(info->queue, size);
} }
static const struct virtio_config_ops virtio_pci_config_ops = { static const struct virtio_config_ops virtio_pci_config_ops = {
...@@ -227,6 +212,13 @@ int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev) ...@@ -227,6 +212,13 @@ int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev)
return -ENODEV; return -ENODEV;
} }
rc = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64));
if (rc)
rc = dma_set_mask_and_coherent(&pci_dev->dev,
DMA_BIT_MASK(32));
if (rc)
dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA. Trying to continue, but this might not work.\n");
rc = pci_request_region(pci_dev, 0, "virtio-pci-legacy"); rc = pci_request_region(pci_dev, 0, "virtio-pci-legacy");
if (rc) if (rc)
return rc; return rc;
......
...@@ -287,31 +287,6 @@ static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector) ...@@ -287,31 +287,6 @@ static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
return vp_ioread16(&vp_dev->common->msix_config); return vp_ioread16(&vp_dev->common->msix_config);
} }
static size_t vring_pci_size(u16 num)
{
/* We only need a cacheline separation. */
return PAGE_ALIGN(vring_size(num, SMP_CACHE_BYTES));
}
static void *alloc_virtqueue_pages(int *num)
{
void *pages;
/* TODO: allocate each queue chunk individually */
for (; *num && vring_pci_size(*num) > PAGE_SIZE; *num /= 2) {
pages = alloc_pages_exact(vring_pci_size(*num),
GFP_KERNEL|__GFP_ZERO|__GFP_NOWARN);
if (pages)
return pages;
}
if (!*num)
return NULL;
/* Try to get a single page. You are my only hope! */
return alloc_pages_exact(vring_pci_size(*num), GFP_KERNEL|__GFP_ZERO);
}
static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
struct virtio_pci_vq_info *info, struct virtio_pci_vq_info *info,
unsigned index, unsigned index,
...@@ -343,29 +318,22 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, ...@@ -343,29 +318,22 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
/* get offset of notification word for this vq */ /* get offset of notification word for this vq */
off = vp_ioread16(&cfg->queue_notify_off); off = vp_ioread16(&cfg->queue_notify_off);
info->num = num;
info->msix_vector = msix_vec; info->msix_vector = msix_vec;
info->queue = alloc_virtqueue_pages(&info->num);
if (info->queue == NULL)
return ERR_PTR(-ENOMEM);
/* create the vring */ /* create the vring */
vq = vring_new_virtqueue(index, info->num, vq = vring_create_virtqueue(index, num,
SMP_CACHE_BYTES, &vp_dev->vdev, SMP_CACHE_BYTES, &vp_dev->vdev,
true, info->queue, vp_notify, callback, name); true, true, vp_notify, callback, name);
if (!vq) { if (!vq)
err = -ENOMEM; return ERR_PTR(-ENOMEM);
goto err_new_queue;
}
/* activate the queue */ /* activate the queue */
vp_iowrite16(num, &cfg->queue_size); vp_iowrite16(virtqueue_get_vring_size(vq), &cfg->queue_size);
vp_iowrite64_twopart(virt_to_phys(info->queue), vp_iowrite64_twopart(virtqueue_get_desc_addr(vq),
&cfg->queue_desc_lo, &cfg->queue_desc_hi); &cfg->queue_desc_lo, &cfg->queue_desc_hi);
vp_iowrite64_twopart(virt_to_phys(virtqueue_get_avail(vq)), vp_iowrite64_twopart(virtqueue_get_avail_addr(vq),
&cfg->queue_avail_lo, &cfg->queue_avail_hi); &cfg->queue_avail_lo, &cfg->queue_avail_hi);
vp_iowrite64_twopart(virt_to_phys(virtqueue_get_used(vq)), vp_iowrite64_twopart(virtqueue_get_used_addr(vq),
&cfg->queue_used_lo, &cfg->queue_used_hi); &cfg->queue_used_lo, &cfg->queue_used_hi);
if (vp_dev->notify_base) { if (vp_dev->notify_base) {
...@@ -410,8 +378,6 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, ...@@ -410,8 +378,6 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
pci_iounmap(vp_dev->pci_dev, (void __iomem __force *)vq->priv); pci_iounmap(vp_dev->pci_dev, (void __iomem __force *)vq->priv);
err_map_notify: err_map_notify:
vring_del_virtqueue(vq); vring_del_virtqueue(vq);
err_new_queue:
free_pages_exact(info->queue, vring_pci_size(info->num));
return ERR_PTR(err); return ERR_PTR(err);
} }
...@@ -456,8 +422,6 @@ static void del_vq(struct virtio_pci_vq_info *info) ...@@ -456,8 +422,6 @@ static void del_vq(struct virtio_pci_vq_info *info)
pci_iounmap(vp_dev->pci_dev, (void __force __iomem *)vq->priv); pci_iounmap(vp_dev->pci_dev, (void __force __iomem *)vq->priv);
vring_del_virtqueue(vq); vring_del_virtqueue(vq);
free_pages_exact(info->queue, vring_pci_size(info->num));
} }
static const struct virtio_config_ops virtio_pci_config_nodev_ops = { static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
...@@ -641,6 +605,13 @@ int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev) ...@@ -641,6 +605,13 @@ int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev)
return -EINVAL; return -EINVAL;
} }
err = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64));
if (err)
err = dma_set_mask_and_coherent(&pci_dev->dev,
DMA_BIT_MASK(32));
if (err)
dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA. Trying to continue, but this might not work.\n");
/* Device capability is only mandatory for devices that have /* Device capability is only mandatory for devices that have
* device-specific configuration. * device-specific configuration.
*/ */
......
...@@ -24,6 +24,8 @@ ...@@ -24,6 +24,8 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/hrtimer.h> #include <linux/hrtimer.h>
#include <linux/kmemleak.h> #include <linux/kmemleak.h>
#include <linux/dma-mapping.h>
#include <xen/xen.h>
#ifdef DEBUG #ifdef DEBUG
/* For development, we want to crash whenever the ring is screwed. */ /* For development, we want to crash whenever the ring is screwed. */
...@@ -54,6 +56,11 @@ ...@@ -54,6 +56,11 @@
#define END_USE(vq) #define END_USE(vq)
#endif #endif
struct vring_desc_state {
void *data; /* Data for callback. */
struct vring_desc *indir_desc; /* Indirect descriptor, if any. */
};
struct vring_virtqueue { struct vring_virtqueue {
struct virtqueue vq; struct virtqueue vq;
...@@ -89,6 +96,11 @@ struct vring_virtqueue { ...@@ -89,6 +96,11 @@ struct vring_virtqueue {
/* How to notify other side. FIXME: commonalize hcalls! */ /* How to notify other side. FIXME: commonalize hcalls! */
bool (*notify)(struct virtqueue *vq); bool (*notify)(struct virtqueue *vq);
/* DMA, allocation, and size information */
bool we_own_ring;
size_t queue_size_in_bytes;
dma_addr_t queue_dma_addr;
#ifdef DEBUG #ifdef DEBUG
/* They're supposed to lock for us. */ /* They're supposed to lock for us. */
unsigned int in_use; unsigned int in_use;
...@@ -98,12 +110,120 @@ struct vring_virtqueue { ...@@ -98,12 +110,120 @@ struct vring_virtqueue {
ktime_t last_add_time; ktime_t last_add_time;
#endif #endif
/* Tokens for callbacks. */ /* Per-descriptor state. */
void *data[]; struct vring_desc_state desc_state[];
}; };
#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
/*
* The interaction between virtio and a possible IOMMU is a mess.
*
* On most systems with virtio, physical addresses match bus addresses,
* and it doesn't particularly matter whether we use the DMA API.
*
* On some systems, including Xen and any system with a physical device
* that speaks virtio behind a physical IOMMU, we must use the DMA API
* for virtio DMA to work at all.
*
* On other systems, including SPARC and PPC64, virtio-pci devices are
* enumerated as though they are behind an IOMMU, but the virtio host
* ignores the IOMMU, so we must either pretend that the IOMMU isn't
* there or somehow map everything as the identity.
*
* For the time being, we preserve historic behavior and bypass the DMA
* API.
*/
static bool vring_use_dma_api(struct virtio_device *vdev)
{
/*
* In theory, it's possible to have a buggy QEMU-supposed
* emulated Q35 IOMMU and Xen enabled at the same time. On
* such a configuration, virtio has never worked and will
* not work without an even larger kludge. Instead, enable
* the DMA API if we're a Xen guest, which at least allows
* all of the sensible Xen configurations to work correctly.
*/
if (xen_domain())
return true;
return false;
}
/*
* The DMA ops on various arches are rather gnarly right now, and
* making all of the arch DMA ops work on the vring device itself
* is a mess. For now, we use the parent device for DMA ops.
*/
struct device *vring_dma_dev(const struct vring_virtqueue *vq)
{
return vq->vq.vdev->dev.parent;
}
/* Map one sg entry. */
static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq,
struct scatterlist *sg,
enum dma_data_direction direction)
{
if (!vring_use_dma_api(vq->vq.vdev))
return (dma_addr_t)sg_phys(sg);
/*
* We can't use dma_map_sg, because we don't use scatterlists in
* the way it expects (we don't guarantee that the scatterlist
* will exist for the lifetime of the mapping).
*/
return dma_map_page(vring_dma_dev(vq),
sg_page(sg), sg->offset, sg->length,
direction);
}
static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
void *cpu_addr, size_t size,
enum dma_data_direction direction)
{
if (!vring_use_dma_api(vq->vq.vdev))
return (dma_addr_t)virt_to_phys(cpu_addr);
return dma_map_single(vring_dma_dev(vq),
cpu_addr, size, direction);
}
static void vring_unmap_one(const struct vring_virtqueue *vq,
struct vring_desc *desc)
{
u16 flags;
if (!vring_use_dma_api(vq->vq.vdev))
return;
flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
if (flags & VRING_DESC_F_INDIRECT) {
dma_unmap_single(vring_dma_dev(vq),
virtio64_to_cpu(vq->vq.vdev, desc->addr),
virtio32_to_cpu(vq->vq.vdev, desc->len),
(flags & VRING_DESC_F_WRITE) ?
DMA_FROM_DEVICE : DMA_TO_DEVICE);
} else {
dma_unmap_page(vring_dma_dev(vq),
virtio64_to_cpu(vq->vq.vdev, desc->addr),
virtio32_to_cpu(vq->vq.vdev, desc->len),
(flags & VRING_DESC_F_WRITE) ?
DMA_FROM_DEVICE : DMA_TO_DEVICE);
}
}
static int vring_mapping_error(const struct vring_virtqueue *vq,
dma_addr_t addr)
{
if (!vring_use_dma_api(vq->vq.vdev))
return 0;
return dma_mapping_error(vring_dma_dev(vq), addr);
}
static struct vring_desc *alloc_indirect(struct virtqueue *_vq, static struct vring_desc *alloc_indirect(struct virtqueue *_vq,
unsigned int total_sg, gfp_t gfp) unsigned int total_sg, gfp_t gfp)
{ {
...@@ -137,7 +257,7 @@ static inline int virtqueue_add(struct virtqueue *_vq, ...@@ -137,7 +257,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
struct vring_virtqueue *vq = to_vvq(_vq); struct vring_virtqueue *vq = to_vvq(_vq);
struct scatterlist *sg; struct scatterlist *sg;
struct vring_desc *desc; struct vring_desc *desc;
unsigned int i, n, avail, descs_used, uninitialized_var(prev); unsigned int i, n, avail, descs_used, uninitialized_var(prev), err_idx;
int head; int head;
bool indirect; bool indirect;
...@@ -177,21 +297,15 @@ static inline int virtqueue_add(struct virtqueue *_vq, ...@@ -177,21 +297,15 @@ static inline int virtqueue_add(struct virtqueue *_vq,
if (desc) { if (desc) {
/* Use a single buffer which doesn't continue */ /* Use a single buffer which doesn't continue */
vq->vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_INDIRECT); indirect = true;
vq->vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, virt_to_phys(desc));
/* avoid kmemleak false positive (hidden by virt_to_phys) */
kmemleak_ignore(desc);
vq->vring.desc[head].len = cpu_to_virtio32(_vq->vdev, total_sg * sizeof(struct vring_desc));
/* Set up rest to use this indirect table. */ /* Set up rest to use this indirect table. */
i = 0; i = 0;
descs_used = 1; descs_used = 1;
indirect = true;
} else { } else {
indirect = false;
desc = vq->vring.desc; desc = vq->vring.desc;
i = head; i = head;
descs_used = total_sg; descs_used = total_sg;
indirect = false;
} }
if (vq->vq.num_free < descs_used) { if (vq->vq.num_free < descs_used) {
...@@ -206,13 +320,14 @@ static inline int virtqueue_add(struct virtqueue *_vq, ...@@ -206,13 +320,14 @@ static inline int virtqueue_add(struct virtqueue *_vq,
return -ENOSPC; return -ENOSPC;
} }
/* We're about to use some buffers from the free list. */
vq->vq.num_free -= descs_used;
for (n = 0; n < out_sgs; n++) { for (n = 0; n < out_sgs; n++) {
for (sg = sgs[n]; sg; sg = sg_next(sg)) { for (sg = sgs[n]; sg; sg = sg_next(sg)) {
dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
if (vring_mapping_error(vq, addr))
goto unmap_release;
desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT); desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT);
desc[i].addr = cpu_to_virtio64(_vq->vdev, sg_phys(sg)); desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
prev = i; prev = i;
i = virtio16_to_cpu(_vq->vdev, desc[i].next); i = virtio16_to_cpu(_vq->vdev, desc[i].next);
...@@ -220,8 +335,12 @@ static inline int virtqueue_add(struct virtqueue *_vq, ...@@ -220,8 +335,12 @@ static inline int virtqueue_add(struct virtqueue *_vq,
} }
for (; n < (out_sgs + in_sgs); n++) { for (; n < (out_sgs + in_sgs); n++) {
for (sg = sgs[n]; sg; sg = sg_next(sg)) { for (sg = sgs[n]; sg; sg = sg_next(sg)) {
dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
if (vring_mapping_error(vq, addr))
goto unmap_release;
desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE); desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE);
desc[i].addr = cpu_to_virtio64(_vq->vdev, sg_phys(sg)); desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
prev = i; prev = i;
i = virtio16_to_cpu(_vq->vdev, desc[i].next); i = virtio16_to_cpu(_vq->vdev, desc[i].next);
...@@ -230,14 +349,33 @@ static inline int virtqueue_add(struct virtqueue *_vq, ...@@ -230,14 +349,33 @@ static inline int virtqueue_add(struct virtqueue *_vq,
/* Last one doesn't continue. */ /* Last one doesn't continue. */
desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
if (indirect) {
/* Now that the indirect table is filled in, map it. */
dma_addr_t addr = vring_map_single(
vq, desc, total_sg * sizeof(struct vring_desc),
DMA_TO_DEVICE);
if (vring_mapping_error(vq, addr))
goto unmap_release;
vq->vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_INDIRECT);
vq->vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, addr);
vq->vring.desc[head].len = cpu_to_virtio32(_vq->vdev, total_sg * sizeof(struct vring_desc));
}
/* We're using some buffers from the free list. */
vq->vq.num_free -= descs_used;
/* Update free pointer */ /* Update free pointer */
if (indirect) if (indirect)
vq->free_head = virtio16_to_cpu(_vq->vdev, vq->vring.desc[head].next); vq->free_head = virtio16_to_cpu(_vq->vdev, vq->vring.desc[head].next);
else else
vq->free_head = i; vq->free_head = i;
/* Set token. */ /* Store token and indirect buffer state. */
vq->data[head] = data; vq->desc_state[head].data = data;
if (indirect)
vq->desc_state[head].indir_desc = desc;
/* Put entry in available array (but don't update avail->idx until they /* Put entry in available array (but don't update avail->idx until they
* do sync). */ * do sync). */
...@@ -260,6 +398,24 @@ static inline int virtqueue_add(struct virtqueue *_vq, ...@@ -260,6 +398,24 @@ static inline int virtqueue_add(struct virtqueue *_vq,
virtqueue_kick(_vq); virtqueue_kick(_vq);
return 0; return 0;
unmap_release:
err_idx = i;
i = head;
for (n = 0; n < total_sg; n++) {
if (i == err_idx)
break;
vring_unmap_one(vq, &desc[i]);
i = vq->vring.desc[i].next;
}
vq->vq.num_free += total_sg;
if (indirect)
kfree(desc);
return -EIO;
} }
/** /**
...@@ -430,27 +586,43 @@ EXPORT_SYMBOL_GPL(virtqueue_kick); ...@@ -430,27 +586,43 @@ EXPORT_SYMBOL_GPL(virtqueue_kick);
static void detach_buf(struct vring_virtqueue *vq, unsigned int head) static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
{ {
unsigned int i; unsigned int i, j;
u16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
/* Clear data ptr. */ /* Clear data ptr. */
vq->data[head] = NULL; vq->desc_state[head].data = NULL;
/* Put back on free list: find end */ /* Put back on free list: unmap first-level descriptors and find end */
i = head; i = head;
/* Free the indirect table */ while (vq->vring.desc[i].flags & nextflag) {
if (vq->vring.desc[i].flags & cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)) vring_unmap_one(vq, &vq->vring.desc[i]);
kfree(phys_to_virt(virtio64_to_cpu(vq->vq.vdev, vq->vring.desc[i].addr)));
while (vq->vring.desc[i].flags & cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT)) {
i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next); i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next);
vq->vq.num_free++; vq->vq.num_free++;
} }
vring_unmap_one(vq, &vq->vring.desc[i]);
vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head); vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head);
vq->free_head = head; vq->free_head = head;
/* Plus final descriptor */ /* Plus final descriptor */
vq->vq.num_free++; vq->vq.num_free++;
/* Free the indirect table, if any, now that it's unmapped. */
if (vq->desc_state[head].indir_desc) {
struct vring_desc *indir_desc = vq->desc_state[head].indir_desc;
u32 len = virtio32_to_cpu(vq->vq.vdev, vq->vring.desc[head].len);
BUG_ON(!(vq->vring.desc[head].flags &
cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));
BUG_ON(len == 0 || len % sizeof(struct vring_desc));
for (j = 0; j < len / sizeof(struct vring_desc); j++)
vring_unmap_one(vq, &indir_desc[j]);
kfree(vq->desc_state[head].indir_desc);
vq->desc_state[head].indir_desc = NULL;
}
} }
static inline bool more_used(const struct vring_virtqueue *vq) static inline bool more_used(const struct vring_virtqueue *vq)
...@@ -505,13 +677,13 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) ...@@ -505,13 +677,13 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
BAD_RING(vq, "id %u out of range\n", i); BAD_RING(vq, "id %u out of range\n", i);
return NULL; return NULL;
} }
if (unlikely(!vq->data[i])) { if (unlikely(!vq->desc_state[i].data)) {
BAD_RING(vq, "id %u is not a head!\n", i); BAD_RING(vq, "id %u is not a head!\n", i);
return NULL; return NULL;
} }
/* detach_buf clears data, so grab it now. */ /* detach_buf clears data, so grab it now. */
ret = vq->data[i]; ret = vq->desc_state[i].data;
detach_buf(vq, i); detach_buf(vq, i);
vq->last_used_idx++; vq->last_used_idx++;
/* If we expect an interrupt for the next entry, tell host /* If we expect an interrupt for the next entry, tell host
...@@ -685,10 +857,10 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq) ...@@ -685,10 +857,10 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
START_USE(vq); START_USE(vq);
for (i = 0; i < vq->vring.num; i++) { for (i = 0; i < vq->vring.num; i++) {
if (!vq->data[i]) if (!vq->desc_state[i].data)
continue; continue;
/* detach_buf clears data, so grab it now. */ /* detach_buf clears data, so grab it now. */
buf = vq->data[i]; buf = vq->desc_state[i].data;
detach_buf(vq, i); detach_buf(vq, i);
vq->avail_idx_shadow--; vq->avail_idx_shadow--;
vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow); vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
...@@ -723,35 +895,31 @@ irqreturn_t vring_interrupt(int irq, void *_vq) ...@@ -723,35 +895,31 @@ irqreturn_t vring_interrupt(int irq, void *_vq)
} }
EXPORT_SYMBOL_GPL(vring_interrupt); EXPORT_SYMBOL_GPL(vring_interrupt);
struct virtqueue *vring_new_virtqueue(unsigned int index, struct virtqueue *__vring_new_virtqueue(unsigned int index,
unsigned int num, struct vring vring,
unsigned int vring_align, struct virtio_device *vdev,
struct virtio_device *vdev, bool weak_barriers,
bool weak_barriers, bool (*notify)(struct virtqueue *),
void *pages, void (*callback)(struct virtqueue *),
bool (*notify)(struct virtqueue *), const char *name)
void (*callback)(struct virtqueue *),
const char *name)
{ {
struct vring_virtqueue *vq;
unsigned int i; unsigned int i;
struct vring_virtqueue *vq;
/* We assume num is a power of 2. */ vq = kmalloc(sizeof(*vq) + vring.num * sizeof(struct vring_desc_state),
if (num & (num - 1)) { GFP_KERNEL);
dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
return NULL;
}
vq = kmalloc(sizeof(*vq) + sizeof(void *)*num, GFP_KERNEL);
if (!vq) if (!vq)
return NULL; return NULL;
vring_init(&vq->vring, num, pages, vring_align); vq->vring = vring;
vq->vq.callback = callback; vq->vq.callback = callback;
vq->vq.vdev = vdev; vq->vq.vdev = vdev;
vq->vq.name = name; vq->vq.name = name;
vq->vq.num_free = num; vq->vq.num_free = vring.num;
vq->vq.index = index; vq->vq.index = index;
vq->we_own_ring = false;
vq->queue_dma_addr = 0;
vq->queue_size_in_bytes = 0;
vq->notify = notify; vq->notify = notify;
vq->weak_barriers = weak_barriers; vq->weak_barriers = weak_barriers;
vq->broken = false; vq->broken = false;
...@@ -776,20 +944,145 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, ...@@ -776,20 +944,145 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
/* Put everything in free lists. */ /* Put everything in free lists. */
vq->free_head = 0; vq->free_head = 0;
for (i = 0; i < num-1; i++) { for (i = 0; i < vring.num-1; i++)
vq->vring.desc[i].next = cpu_to_virtio16(vdev, i + 1); vq->vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
vq->data[i] = NULL; memset(vq->desc_state, 0, vring.num * sizeof(struct vring_desc_state));
}
vq->data[i] = NULL;
return &vq->vq; return &vq->vq;
} }
EXPORT_SYMBOL_GPL(__vring_new_virtqueue);
static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
dma_addr_t *dma_handle, gfp_t flag)
{
if (vring_use_dma_api(vdev)) {
return dma_alloc_coherent(vdev->dev.parent, size,
dma_handle, flag);
} else {
void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
if (queue) {
phys_addr_t phys_addr = virt_to_phys(queue);
*dma_handle = (dma_addr_t)phys_addr;
/*
* Sanity check: make sure we dind't truncate
* the address. The only arches I can find that
* have 64-bit phys_addr_t but 32-bit dma_addr_t
* are certain non-highmem MIPS and x86
* configurations, but these configurations
* should never allocate physical pages above 32
* bits, so this is fine. Just in case, throw a
* warning and abort if we end up with an
* unrepresentable address.
*/
if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
free_pages_exact(queue, PAGE_ALIGN(size));
return NULL;
}
}
return queue;
}
}
static void vring_free_queue(struct virtio_device *vdev, size_t size,
void *queue, dma_addr_t dma_handle)
{
if (vring_use_dma_api(vdev)) {
dma_free_coherent(vdev->dev.parent, size, queue, dma_handle);
} else {
free_pages_exact(queue, PAGE_ALIGN(size));
}
}
struct virtqueue *vring_create_virtqueue(
unsigned int index,
unsigned int num,
unsigned int vring_align,
struct virtio_device *vdev,
bool weak_barriers,
bool may_reduce_num,
bool (*notify)(struct virtqueue *),
void (*callback)(struct virtqueue *),
const char *name)
{
struct virtqueue *vq;
void *queue;
dma_addr_t dma_addr;
size_t queue_size_in_bytes;
struct vring vring;
/* We assume num is a power of 2. */
if (num & (num - 1)) {
dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
return NULL;
}
/* TODO: allocate each queue chunk individually */
for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
&dma_addr,
GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
if (queue)
break;
}
if (!num)
return NULL;
if (!queue) {
/* Try to get a single page. You are my only hope! */
queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
&dma_addr, GFP_KERNEL|__GFP_ZERO);
}
if (!queue)
return NULL;
queue_size_in_bytes = vring_size(num, vring_align);
vring_init(&vring, num, queue, vring_align);
vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers,
notify, callback, name);
if (!vq) {
vring_free_queue(vdev, queue_size_in_bytes, queue,
dma_addr);
return NULL;
}
to_vvq(vq)->queue_dma_addr = dma_addr;
to_vvq(vq)->queue_size_in_bytes = queue_size_in_bytes;
to_vvq(vq)->we_own_ring = true;
return vq;
}
EXPORT_SYMBOL_GPL(vring_create_virtqueue);
struct virtqueue *vring_new_virtqueue(unsigned int index,
unsigned int num,
unsigned int vring_align,
struct virtio_device *vdev,
bool weak_barriers,
void *pages,
bool (*notify)(struct virtqueue *vq),
void (*callback)(struct virtqueue *vq),
const char *name)
{
struct vring vring;
vring_init(&vring, num, pages, vring_align);
return __vring_new_virtqueue(index, vring, vdev, weak_barriers,
notify, callback, name);
}
EXPORT_SYMBOL_GPL(vring_new_virtqueue); EXPORT_SYMBOL_GPL(vring_new_virtqueue);
void vring_del_virtqueue(struct virtqueue *vq) void vring_del_virtqueue(struct virtqueue *_vq)
{ {
list_del(&vq->list); struct vring_virtqueue *vq = to_vvq(_vq);
kfree(to_vvq(vq));
if (vq->we_own_ring) {
vring_free_queue(vq->vq.vdev, vq->queue_size_in_bytes,
vq->vring.desc, vq->queue_dma_addr);
}
list_del(&_vq->list);
kfree(vq);
} }
EXPORT_SYMBOL_GPL(vring_del_virtqueue); EXPORT_SYMBOL_GPL(vring_del_virtqueue);
...@@ -853,20 +1146,42 @@ void virtio_break_device(struct virtio_device *dev) ...@@ -853,20 +1146,42 @@ void virtio_break_device(struct virtio_device *dev)
} }
EXPORT_SYMBOL_GPL(virtio_break_device); EXPORT_SYMBOL_GPL(virtio_break_device);
void *virtqueue_get_avail(struct virtqueue *_vq) dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq)
{ {
struct vring_virtqueue *vq = to_vvq(_vq); struct vring_virtqueue *vq = to_vvq(_vq);
return vq->vring.avail; BUG_ON(!vq->we_own_ring);
return vq->queue_dma_addr;
} }
EXPORT_SYMBOL_GPL(virtqueue_get_avail); EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
void *virtqueue_get_used(struct virtqueue *_vq) dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
{ {
struct vring_virtqueue *vq = to_vvq(_vq); struct vring_virtqueue *vq = to_vvq(_vq);
return vq->vring.used; BUG_ON(!vq->we_own_ring);
return vq->queue_dma_addr +
((char *)vq->vring.avail - (char *)vq->vring.desc);
}
EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
BUG_ON(!vq->we_own_ring);
return vq->queue_dma_addr +
((char *)vq->vring.used - (char *)vq->vring.desc);
}
EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
const struct vring *virtqueue_get_vring(struct virtqueue *vq)
{
return &to_vvq(vq)->vring;
} }
EXPORT_SYMBOL_GPL(virtqueue_get_used); EXPORT_SYMBOL_GPL(virtqueue_get_vring);
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
...@@ -70,6 +70,8 @@ struct dma_map_ops { ...@@ -70,6 +70,8 @@ struct dma_map_ops {
int is_phys; int is_phys;
}; };
extern struct dma_map_ops dma_noop_ops;
#define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1))
#define DMA_MASK_NONE 0x0ULL #define DMA_MASK_NONE 0x0ULL
......
...@@ -75,8 +75,27 @@ unsigned int virtqueue_get_vring_size(struct virtqueue *vq); ...@@ -75,8 +75,27 @@ unsigned int virtqueue_get_vring_size(struct virtqueue *vq);
bool virtqueue_is_broken(struct virtqueue *vq); bool virtqueue_is_broken(struct virtqueue *vq);
void *virtqueue_get_avail(struct virtqueue *vq); const struct vring *virtqueue_get_vring(struct virtqueue *vq);
void *virtqueue_get_used(struct virtqueue *vq); dma_addr_t virtqueue_get_desc_addr(struct virtqueue *vq);
dma_addr_t virtqueue_get_avail_addr(struct virtqueue *vq);
dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
/*
* Legacy accessors -- in almost all cases, these are the wrong functions
* to use.
*/
static inline void *virtqueue_get_desc(struct virtqueue *vq)
{
return virtqueue_get_vring(vq)->desc;
}
static inline void *virtqueue_get_avail(struct virtqueue *vq)
{
return virtqueue_get_vring(vq)->avail;
}
static inline void *virtqueue_get_used(struct virtqueue *vq)
{
return virtqueue_get_vring(vq)->used;
}
/** /**
* virtio_device - representation of a device using virtio * virtio_device - representation of a device using virtio
......
...@@ -59,6 +59,35 @@ static inline void virtio_store_mb(bool weak_barriers, ...@@ -59,6 +59,35 @@ static inline void virtio_store_mb(bool weak_barriers,
struct virtio_device; struct virtio_device;
struct virtqueue; struct virtqueue;
/*
* Creates a virtqueue and allocates the descriptor ring. If
* may_reduce_num is set, then this may allocate a smaller ring than
* expected. The caller should query virtqueue_get_ring_size to learn
* the actual size of the ring.
*/
struct virtqueue *vring_create_virtqueue(unsigned int index,
unsigned int num,
unsigned int vring_align,
struct virtio_device *vdev,
bool weak_barriers,
bool may_reduce_num,
bool (*notify)(struct virtqueue *vq),
void (*callback)(struct virtqueue *vq),
const char *name);
/* Creates a virtqueue with a custom layout. */
struct virtqueue *__vring_new_virtqueue(unsigned int index,
struct vring vring,
struct virtio_device *vdev,
bool weak_barriers,
bool (*notify)(struct virtqueue *),
void (*callback)(struct virtqueue *),
const char *name);
/*
* Creates a virtqueue with a standard layout but a caller-allocated
* ring.
*/
struct virtqueue *vring_new_virtqueue(unsigned int index, struct virtqueue *vring_new_virtqueue(unsigned int index,
unsigned int num, unsigned int num,
unsigned int vring_align, unsigned int vring_align,
...@@ -68,7 +97,13 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, ...@@ -68,7 +97,13 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
bool (*notify)(struct virtqueue *vq), bool (*notify)(struct virtqueue *vq),
void (*callback)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq),
const char *name); const char *name);
/*
* Destroys a virtqueue. If created with vring_create_virtqueue, this
* also frees the ring.
*/
void vring_del_virtqueue(struct virtqueue *vq); void vring_del_virtqueue(struct virtqueue *vq);
/* Filter out transport-specific feature bits. */ /* Filter out transport-specific feature bits. */
void vring_transport_features(struct virtio_device *vdev); void vring_transport_features(struct virtio_device *vdev);
......
...@@ -126,6 +126,12 @@ struct vhost_memory { ...@@ -126,6 +126,12 @@ struct vhost_memory {
#define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file) #define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file)
/* Set eventfd to signal an error */ /* Set eventfd to signal an error */
#define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file) #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file)
/* Set busy loop timeout (in us) */
#define VHOST_SET_VRING_BUSYLOOP_TIMEOUT _IOW(VHOST_VIRTIO, 0x23, \
struct vhost_vring_state)
/* Get busy loop timeout (in us) */
#define VHOST_GET_VRING_BUSYLOOP_TIMEOUT _IOW(VHOST_VIRTIO, 0x24, \
struct vhost_vring_state)
/* VHOST_NET specific defines */ /* VHOST_NET specific defines */
......
...@@ -43,11 +43,11 @@ ...@@ -43,11 +43,11 @@
#ifndef VIRTIO_BLK_NO_LEGACY #ifndef VIRTIO_BLK_NO_LEGACY
#define VIRTIO_BLK_F_BARRIER 0 /* Does host support barriers? */ #define VIRTIO_BLK_F_BARRIER 0 /* Does host support barriers? */
#define VIRTIO_BLK_F_SCSI 7 /* Supports scsi command passthru */ #define VIRTIO_BLK_F_SCSI 7 /* Supports scsi command passthru */
#define VIRTIO_BLK_F_WCE 9 /* Writeback mode enabled after reset */ #define VIRTIO_BLK_F_FLUSH 9 /* Flush command supported */
#define VIRTIO_BLK_F_CONFIG_WCE 11 /* Writeback mode available in config */ #define VIRTIO_BLK_F_CONFIG_WCE 11 /* Writeback mode available in config */
#ifndef __KERNEL__ #ifndef __KERNEL__
/* Old (deprecated) name for VIRTIO_BLK_F_WCE. */ /* Old (deprecated) name for VIRTIO_BLK_F_FLUSH. */
#define VIRTIO_BLK_F_FLUSH VIRTIO_BLK_F_WCE #define VIRTIO_BLK_F_WCE VIRTIO_BLK_F_FLUSH
#endif #endif
#endif /* !VIRTIO_BLK_NO_LEGACY */ #endif /* !VIRTIO_BLK_NO_LEGACY */
......
...@@ -18,6 +18,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ ...@@ -18,6 +18,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o
lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_MMU) += ioremap.o
lib-$(CONFIG_SMP) += cpumask.o lib-$(CONFIG_SMP) += cpumask.o
lib-$(CONFIG_HAS_DMA) += dma-noop.o
lib-y += kobject.o klist.o lib-y += kobject.o klist.o
obj-y += lockref.o obj-y += lockref.o
......
/*
* lib/dma-noop.c
*
* Simple DMA noop-ops that map 1:1 with memory
*/
#include <linux/export.h>
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/scatterlist.h>
static void *dma_noop_alloc(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp,
struct dma_attrs *attrs)
{
void *ret;
ret = (void *)__get_free_pages(gfp, get_order(size));
if (ret)
*dma_handle = virt_to_phys(ret);
return ret;
}
static void dma_noop_free(struct device *dev, size_t size,
void *cpu_addr, dma_addr_t dma_addr,
struct dma_attrs *attrs)
{
free_pages((unsigned long)cpu_addr, get_order(size));
}
static dma_addr_t dma_noop_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction dir,
struct dma_attrs *attrs)
{
return page_to_phys(page) + offset;
}
static int dma_noop_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
enum dma_data_direction dir, struct dma_attrs *attrs)
{
int i;
struct scatterlist *sg;
for_each_sg(sgl, sg, nents, i) {
void *va;
BUG_ON(!sg_page(sg));
va = sg_virt(sg);
sg_dma_address(sg) = (dma_addr_t)virt_to_phys(va);
sg_dma_len(sg) = sg->length;
}
return nents;
}
static int dma_noop_mapping_error(struct device *dev, dma_addr_t dma_addr)
{
return 0;
}
static int dma_noop_supported(struct device *dev, u64 mask)
{
return 1;
}
struct dma_map_ops dma_noop_ops = {
.alloc = dma_noop_alloc,
.free = dma_noop_free,
.map_page = dma_noop_map_page,
.map_sg = dma_noop_map_sg,
.mapping_error = dma_noop_mapping_error,
.dma_supported = dma_noop_supported,
};
EXPORT_SYMBOL(dma_noop_ops);
#ifndef _LINUX_DMA_MAPPING_H
#define _LINUX_DMA_MAPPING_H
#ifdef CONFIG_HAS_DMA
# error Virtio userspace code does not support CONFIG_HAS_DMA
#endif
#define PCI_DMA_BUS_IS_PHYS 1
enum dma_data_direction {
DMA_BIDIRECTIONAL = 0,
DMA_TO_DEVICE = 1,
DMA_FROM_DEVICE = 2,
DMA_NONE = 3,
};
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment