Commit 0803e040 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio/vhost updates from Michael Tsirkin:

 - new vsock device support in host and guest

 - platform IOMMU support in host and guest, including compatibility
   quirks for legacy systems.

 - misc fixes and cleanups.

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
  VSOCK: Use kvfree()
  vhost: split out vringh Kconfig
  vhost: detect 32 bit integer wrap around
  vhost: new device IOTLB API
  vhost: drop vringh dependency
  vhost: convert pre sorted vhost memory array to interval tree
  vhost: introduce vhost memory accessors
  VSOCK: Add Makefile and Kconfig
  VSOCK: Introduce vhost_vsock.ko
  VSOCK: Introduce virtio_transport.ko
  VSOCK: Introduce virtio_vsock_common.ko
  VSOCK: defer sock removal to transports
  VSOCK: transport-specific vsock_transport functions
  vhost: drop vringh dependency
  vop: pull in vhost Kconfig
  virtio: new feature to detect IOMMU device quirk
  balloon: check the number of available pages in leak balloon
  vhost: lockless enqueuing
  vhost: simplify work flushing
parents 80fac0f5 b226acab
......@@ -12419,6 +12419,19 @@ S: Maintained
F: drivers/media/v4l2-core/videobuf2-*
F: include/media/videobuf2-*
VIRTIO AND VHOST VSOCK DRIVER
M: Stefan Hajnoczi <stefanha@redhat.com>
L: kvm@vger.kernel.org
L: virtualization@lists.linux-foundation.org
L: netdev@vger.kernel.org
S: Maintained
F: include/linux/virtio_vsock.h
F: include/uapi/linux/virtio_vsock.h
F: net/vmw_vsock/virtio_transport_common.c
F: net/vmw_vsock/virtio_transport.c
F: drivers/vhost/vsock.c
F: drivers/vhost/vsock.h
VIRTUAL SERIO DEVICE DRIVER
M: Stephen Chandler Paul <thatslyude@gmail.com>
S: Maintained
......
......@@ -138,6 +138,7 @@ obj-$(CONFIG_OF) += of/
obj-$(CONFIG_SSB) += ssb/
obj-$(CONFIG_BCMA) += bcma/
obj-$(CONFIG_VHOST_RING) += vhost/
obj-$(CONFIG_VHOST) += vhost/
obj-$(CONFIG_VLYNQ) += vlynq/
obj-$(CONFIG_STAGING) += staging/
obj-y += platform/
......
......@@ -146,3 +146,7 @@ config VOP
More information about the Intel MIC family as well as the Linux
OS and tools for MIC to use with this driver are available from
<http://software.intel.com/en-us/mic-developer>.
if VOP
source "drivers/vhost/Kconfig.vringh"
endif
......@@ -52,5 +52,5 @@ config CAIF_VIRTIO
The caif driver for CAIF over Virtio.
if CAIF_VIRTIO
source "drivers/vhost/Kconfig"
source "drivers/vhost/Kconfig.vringh"
endif
......@@ -2,7 +2,6 @@ config VHOST_NET
tristate "Host kernel accelerator for virtio net"
depends on NET && EVENTFD && (TUN || !TUN) && (MACVTAP || !MACVTAP)
select VHOST
select VHOST_RING
---help---
This kernel module can be loaded in host kernel to accelerate
guest networking with virtio_net. Not to be confused with virtio_net
......@@ -15,17 +14,24 @@ config VHOST_SCSI
tristate "VHOST_SCSI TCM fabric driver"
depends on TARGET_CORE && EVENTFD && m
select VHOST
select VHOST_RING
default n
---help---
Say M here to enable the vhost_scsi TCM fabric module
for use with virtio-scsi guests
config VHOST_RING
tristate
config VHOST_VSOCK
tristate "vhost virtio-vsock driver"
depends on VSOCKETS && EVENTFD
select VIRTIO_VSOCKETS_COMMON
select VHOST
default n
---help---
This option is selected by any driver which needs to access
the host side of a virtio ring.
This kernel module can be loaded in the host kernel to provide AF_VSOCK
sockets for communicating with guests. The guests must have the
virtio_transport.ko driver loaded to use the virtio-vsock device.
To compile this driver as a module, choose M here: the module will be called
vhost_vsock.
config VHOST
tristate
......
config VHOST_RING
tristate
---help---
This option is selected by any driver which needs to access
the host side of a virtio ring.
......@@ -4,5 +4,9 @@ vhost_net-y := net.o
obj-$(CONFIG_VHOST_SCSI) += vhost_scsi.o
vhost_scsi-y := scsi.o
obj-$(CONFIG_VHOST_VSOCK) += vhost_vsock.o
vhost_vsock-y := vsock.o
obj-$(CONFIG_VHOST_RING) += vringh.o
obj-$(CONFIG_VHOST) += vhost.o
......@@ -61,7 +61,8 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
enum {
VHOST_NET_FEATURES = VHOST_FEATURES |
(1ULL << VHOST_NET_F_VIRTIO_NET_HDR) |
(1ULL << VIRTIO_NET_F_MRG_RXBUF)
(1ULL << VIRTIO_NET_F_MRG_RXBUF) |
(1ULL << VIRTIO_F_IOMMU_PLATFORM)
};
enum {
......@@ -334,7 +335,7 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
{
unsigned long uninitialized_var(endtime);
int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
out_num, in_num, NULL, NULL);
out_num, in_num, NULL, NULL);
if (r == vq->num && vq->busyloop_timeout) {
preempt_disable();
......@@ -344,7 +345,7 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
cpu_relax_lowlatency();
preempt_enable();
r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
out_num, in_num, NULL, NULL);
out_num, in_num, NULL, NULL);
}
return r;
......@@ -377,6 +378,9 @@ static void handle_tx(struct vhost_net *net)
if (!sock)
goto out;
if (!vq_iotlb_prefetch(vq))
goto out;
vhost_disable_notify(&net->dev, vq);
hdr_size = nvq->vhost_hlen;
......@@ -652,6 +656,10 @@ static void handle_rx(struct vhost_net *net)
sock = vq->private_data;
if (!sock)
goto out;
if (!vq_iotlb_prefetch(vq))
goto out;
vhost_disable_notify(&net->dev, vq);
vhost_net_disable_vq(net, vq);
......@@ -1052,20 +1060,20 @@ static long vhost_net_reset_owner(struct vhost_net *n)
struct socket *tx_sock = NULL;
struct socket *rx_sock = NULL;
long err;
struct vhost_memory *memory;
struct vhost_umem *umem;
mutex_lock(&n->dev.mutex);
err = vhost_dev_check_owner(&n->dev);
if (err)
goto done;
memory = vhost_dev_reset_owner_prepare();
if (!memory) {
umem = vhost_dev_reset_owner_prepare();
if (!umem) {
err = -ENOMEM;
goto done;
}
vhost_net_stop(n, &tx_sock, &rx_sock);
vhost_net_flush(n);
vhost_dev_reset_owner(&n->dev, memory);
vhost_dev_reset_owner(&n->dev, umem);
vhost_net_vq_reset(n);
done:
mutex_unlock(&n->dev.mutex);
......@@ -1096,10 +1104,14 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features)
}
mutex_lock(&n->dev.mutex);
if ((features & (1 << VHOST_F_LOG_ALL)) &&
!vhost_log_access_ok(&n->dev)) {
mutex_unlock(&n->dev.mutex);
return -EFAULT;
!vhost_log_access_ok(&n->dev))
goto out_unlock;
if ((features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) {
if (vhost_init_device_iotlb(&n->dev, true))
goto out_unlock;
}
for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
mutex_lock(&n->vqs[i].vq.mutex);
n->vqs[i].vq.acked_features = features;
......@@ -1109,6 +1121,10 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features)
}
mutex_unlock(&n->dev.mutex);
return 0;
out_unlock:
mutex_unlock(&n->dev.mutex);
return -EFAULT;
}
static long vhost_net_set_owner(struct vhost_net *n)
......@@ -1182,9 +1198,40 @@ static long vhost_net_compat_ioctl(struct file *f, unsigned int ioctl,
}
#endif
static ssize_t vhost_net_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct file *file = iocb->ki_filp;
struct vhost_net *n = file->private_data;
struct vhost_dev *dev = &n->dev;
int noblock = file->f_flags & O_NONBLOCK;
return vhost_chr_read_iter(dev, to, noblock);
}
static ssize_t vhost_net_chr_write_iter(struct kiocb *iocb,
struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct vhost_net *n = file->private_data;
struct vhost_dev *dev = &n->dev;
return vhost_chr_write_iter(dev, from);
}
static unsigned int vhost_net_chr_poll(struct file *file, poll_table *wait)
{
struct vhost_net *n = file->private_data;
struct vhost_dev *dev = &n->dev;
return vhost_chr_poll(file, dev, wait);
}
static const struct file_operations vhost_net_fops = {
.owner = THIS_MODULE,
.release = vhost_net_release,
.read_iter = vhost_net_chr_read_iter,
.write_iter = vhost_net_chr_write_iter,
.poll = vhost_net_chr_poll,
.unlocked_ioctl = vhost_net_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = vhost_net_compat_ioctl,
......
This diff is collapsed.
......@@ -15,13 +15,15 @@
struct vhost_work;
typedef void (*vhost_work_fn_t)(struct vhost_work *work);
#define VHOST_WORK_QUEUED 1
struct vhost_work {
struct list_head node;
struct llist_node node;
vhost_work_fn_t fn;
wait_queue_head_t done;
int flushing;
unsigned queue_seq;
unsigned done_seq;
unsigned long flags;
};
/* Poll a file (eventfd or socket) */
......@@ -53,6 +55,27 @@ struct vhost_log {
u64 len;
};
#define START(node) ((node)->start)
#define LAST(node) ((node)->last)
struct vhost_umem_node {
struct rb_node rb;
struct list_head link;
__u64 start;
__u64 last;
__u64 size;
__u64 userspace_addr;
__u32 perm;
__u32 flags_padding;
__u64 __subtree_last;
};
struct vhost_umem {
struct rb_root umem_tree;
struct list_head umem_list;
int numem;
};
/* The virtqueue structure describes a queue attached to a device. */
struct vhost_virtqueue {
struct vhost_dev *dev;
......@@ -98,10 +121,12 @@ struct vhost_virtqueue {
u64 log_addr;
struct iovec iov[UIO_MAXIOV];
struct iovec iotlb_iov[64];
struct iovec *indirect;
struct vring_used_elem *heads;
/* Protected by virtqueue mutex. */
struct vhost_memory *memory;
struct vhost_umem *umem;
struct vhost_umem *iotlb;
void *private_data;
u64 acked_features;
/* Log write descriptors */
......@@ -118,25 +143,35 @@ struct vhost_virtqueue {
u32 busyloop_timeout;
};
struct vhost_msg_node {
struct vhost_msg msg;
struct vhost_virtqueue *vq;
struct list_head node;
};
struct vhost_dev {
struct vhost_memory *memory;
struct mm_struct *mm;
struct mutex mutex;
struct vhost_virtqueue **vqs;
int nvqs;
struct file *log_file;
struct eventfd_ctx *log_ctx;
spinlock_t work_lock;
struct list_head work_list;
struct llist_head work_list;
struct task_struct *worker;
struct vhost_umem *umem;
struct vhost_umem *iotlb;
spinlock_t iotlb_lock;
struct list_head read_list;
struct list_head pending_list;
wait_queue_head_t wait;
};
void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs);
long vhost_dev_set_owner(struct vhost_dev *dev);
bool vhost_dev_has_owner(struct vhost_dev *dev);
long vhost_dev_check_owner(struct vhost_dev *);
struct vhost_memory *vhost_dev_reset_owner_prepare(void);
void vhost_dev_reset_owner(struct vhost_dev *, struct vhost_memory *);
struct vhost_umem *vhost_dev_reset_owner_prepare(void);
void vhost_dev_reset_owner(struct vhost_dev *, struct vhost_umem *);
void vhost_dev_cleanup(struct vhost_dev *, bool locked);
void vhost_dev_stop(struct vhost_dev *);
long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp);
......@@ -165,6 +200,21 @@ bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *);
int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
unsigned int log_num, u64 len);
int vq_iotlb_prefetch(struct vhost_virtqueue *vq);
struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type);
void vhost_enqueue_msg(struct vhost_dev *dev,
struct list_head *head,
struct vhost_msg_node *node);
struct vhost_msg_node *vhost_dequeue_msg(struct vhost_dev *dev,
struct list_head *head);
unsigned int vhost_chr_poll(struct file *file, struct vhost_dev *dev,
poll_table *wait);
ssize_t vhost_chr_read_iter(struct vhost_dev *dev, struct iov_iter *to,
int noblock);
ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
struct iov_iter *from);
int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled);
#define vq_err(vq, fmt, ...) do { \
pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \
......
This diff is collapsed.
......@@ -207,6 +207,8 @@ static unsigned leak_balloon(struct virtio_balloon *vb, size_t num)
num = min(num, ARRAY_SIZE(vb->pfns));
mutex_lock(&vb->balloon_lock);
/* We can't release more pages than taken */
num = min(num, (size_t)vb->num_pages);
for (vb->num_pfns = 0; vb->num_pfns < num;
vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
page = balloon_page_dequeue(vb_dev_info);
......
......@@ -117,7 +117,10 @@ struct vring_virtqueue {
#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
/*
* The interaction between virtio and a possible IOMMU is a mess.
* Modern virtio devices have feature bits to specify whether they need a
* quirk and bypass the IOMMU. If not there, just use the DMA API.
*
* If there, the interaction between virtio and DMA API is messy.
*
* On most systems with virtio, physical addresses match bus addresses,
* and it doesn't particularly matter whether we use the DMA API.
......@@ -133,10 +136,18 @@ struct vring_virtqueue {
*
* For the time being, we preserve historic behavior and bypass the DMA
* API.
*
* TODO: install a per-device DMA ops structure that does the right thing
* taking into account all the above quirks, and use the DMA API
* unconditionally on data path.
*/
static bool vring_use_dma_api(struct virtio_device *vdev)
{
if (!virtio_has_iommu_quirk(vdev))
return true;
/* Otherwise, we are left to guess. */
/*
* In theory, it's possible to have a buggy QEMU-supposed
* emulated Q35 IOMMU and Xen enabled at the same time. On
......@@ -1099,6 +1110,8 @@ void vring_transport_features(struct virtio_device *vdev)
break;
case VIRTIO_F_VERSION_1:
break;
case VIRTIO_F_IOMMU_PLATFORM:
break;
default:
/* We don't understand this bit. */
__virtio_clear_bit(vdev, i);
......
......@@ -149,6 +149,19 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
return __virtio_test_bit(vdev, fbit);
}
/**
* virtio_has_iommu_quirk - determine whether this device has the iommu quirk
* @vdev: the device
*/
static inline bool virtio_has_iommu_quirk(const struct virtio_device *vdev)
{
/*
* Note the reverse polarity of the quirk feature (compared to most
* other features), this is for compatibility with legacy systems.
*/
return !virtio_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
}
static inline
struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
vq_callback_t *c, const char *n)
......
#ifndef _LINUX_VIRTIO_VSOCK_H
#define _LINUX_VIRTIO_VSOCK_H
#include <uapi/linux/virtio_vsock.h>
#include <linux/socket.h>
#include <net/sock.h>
#include <net/af_vsock.h>
#define VIRTIO_VSOCK_DEFAULT_MIN_BUF_SIZE 128
#define VIRTIO_VSOCK_DEFAULT_BUF_SIZE (1024 * 256)
#define VIRTIO_VSOCK_DEFAULT_MAX_BUF_SIZE (1024 * 256)
#define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE (1024 * 4)
#define VIRTIO_VSOCK_MAX_BUF_SIZE 0xFFFFFFFFUL
#define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64)
enum {
VSOCK_VQ_RX = 0, /* for host to guest data */
VSOCK_VQ_TX = 1, /* for guest to host data */
VSOCK_VQ_EVENT = 2,
VSOCK_VQ_MAX = 3,
};
/* Per-socket state (accessed via vsk->trans) */
struct virtio_vsock_sock {
struct vsock_sock *vsk;
/* Protected by lock_sock(sk_vsock(trans->vsk)) */
u32 buf_size;
u32 buf_size_min;
u32 buf_size_max;
spinlock_t tx_lock;
spinlock_t rx_lock;
/* Protected by tx_lock */
u32 tx_cnt;
u32 buf_alloc;
u32 peer_fwd_cnt;
u32 peer_buf_alloc;
/* Protected by rx_lock */
u32 fwd_cnt;
u32 rx_bytes;
struct list_head rx_queue;
};
struct virtio_vsock_pkt {
struct virtio_vsock_hdr hdr;
struct work_struct work;
struct list_head list;
void *buf;
u32 len;
u32 off;
bool reply;
};
struct virtio_vsock_pkt_info {
u32 remote_cid, remote_port;
struct msghdr *msg;
u32 pkt_len;
u16 type;
u16 op;
u32 flags;
bool reply;
};
struct virtio_transport {
/* This must be the first field */
struct vsock_transport transport;
/* Takes ownership of the packet */
int (*send_pkt)(struct virtio_vsock_pkt *pkt);
};
ssize_t
virtio_transport_stream_dequeue(struct vsock_sock *vsk,
struct msghdr *msg,
size_t len,
int type);
int
virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
struct msghdr *msg,
size_t len, int flags);
s64 virtio_transport_stream_has_data(struct vsock_sock *vsk);
s64 virtio_transport_stream_has_space(struct vsock_sock *vsk);
int virtio_transport_do_socket_init(struct vsock_sock *vsk,
struct vsock_sock *psk);
u64 virtio_transport_get_buffer_size(struct vsock_sock *vsk);
u64 virtio_transport_get_min_buffer_size(struct vsock_sock *vsk);
u64 virtio_transport_get_max_buffer_size(struct vsock_sock *vsk);
void virtio_transport_set_buffer_size(struct vsock_sock *vsk, u64 val);
void virtio_transport_set_min_buffer_size(struct vsock_sock *vsk, u64 val);
void virtio_transport_set_max_buffer_size(struct vsock_sock *vs, u64 val);
int
virtio_transport_notify_poll_in(struct vsock_sock *vsk,
size_t target,
bool *data_ready_now);
int
virtio_transport_notify_poll_out(struct vsock_sock *vsk,
size_t target,
bool *space_available_now);
int virtio_transport_notify_recv_init(struct vsock_sock *vsk,
size_t target, struct vsock_transport_recv_notify_data *data);
int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk,
size_t target, struct vsock_transport_recv_notify_data *data);
int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk,
size_t target, struct vsock_transport_recv_notify_data *data);
int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk,
size_t target, ssize_t copied, bool data_read,
struct vsock_transport_recv_notify_data *data);
int virtio_transport_notify_send_init(struct vsock_sock *vsk,
struct vsock_transport_send_notify_data *data);
int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk,
struct vsock_transport_send_notify_data *data);
int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk,
struct vsock_transport_send_notify_data *data);
int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk,
ssize_t written, struct vsock_transport_send_notify_data *data);
u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk);
bool virtio_transport_stream_is_active(struct vsock_sock *vsk);
bool virtio_transport_stream_allow(u32 cid, u32 port);
int virtio_transport_dgram_bind(struct vsock_sock *vsk,
struct sockaddr_vm *addr);
bool virtio_transport_dgram_allow(u32 cid, u32 port);
int virtio_transport_connect(struct vsock_sock *vsk);
int virtio_transport_shutdown(struct vsock_sock *vsk, int mode);
void virtio_transport_release(struct vsock_sock *vsk);
ssize_t
virtio_transport_stream_enqueue(struct vsock_sock *vsk,
struct msghdr *msg,
size_t len);
int
virtio_transport_dgram_enqueue(struct vsock_sock *vsk,
struct sockaddr_vm *remote_addr,
struct msghdr *msg,
size_t len);
void virtio_transport_destruct(struct vsock_sock *vsk);
void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt);
void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt);
void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt);
u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 wanted);
void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit);
#endif /* _LINUX_VIRTIO_VSOCK_H */
......@@ -63,6 +63,8 @@ struct vsock_sock {
struct list_head accept_queue;
bool rejected;
struct delayed_work dwork;
struct delayed_work close_work;
bool close_work_scheduled;
u32 peer_shutdown;
bool sent_request;
bool ignore_connecting_rst;
......@@ -165,6 +167,9 @@ static inline int vsock_core_init(const struct vsock_transport *t)
}
void vsock_core_exit(void);
/* The transport may downcast this to access transport-specific functions */
const struct vsock_transport *vsock_core_get_transport(void);
/**** UTILS ****/
void vsock_release_pending(struct sock *pending);
......@@ -177,6 +182,7 @@ void vsock_remove_connected(struct vsock_sock *vsk);
struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr);
struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
struct sockaddr_vm *dst);
void vsock_remove_sock(struct vsock_sock *vsk);
void vsock_for_each_connected_socket(void (*fn)(struct sock *sk));
#endif /* __AF_VSOCK_H__ */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM vsock
#if !defined(_TRACE_VSOCK_VIRTIO_TRANSPORT_COMMON_H) || \
defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_VSOCK_VIRTIO_TRANSPORT_COMMON_H
#include <linux/tracepoint.h>
TRACE_DEFINE_ENUM(VIRTIO_VSOCK_TYPE_STREAM);
#define show_type(val) \
__print_symbolic(val, { VIRTIO_VSOCK_TYPE_STREAM, "STREAM" })
TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_INVALID);
TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_REQUEST);
TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_RESPONSE);
TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_RST);
TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_SHUTDOWN);
TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_RW);
TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_CREDIT_UPDATE);
TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_CREDIT_REQUEST);
#define show_op(val) \
__print_symbolic(val, \
{ VIRTIO_VSOCK_OP_INVALID, "INVALID" }, \
{ VIRTIO_VSOCK_OP_REQUEST, "REQUEST" }, \
{ VIRTIO_VSOCK_OP_RESPONSE, "RESPONSE" }, \
{ VIRTIO_VSOCK_OP_RST, "RST" }, \
{ VIRTIO_VSOCK_OP_SHUTDOWN, "SHUTDOWN" }, \
{ VIRTIO_VSOCK_OP_RW, "RW" }, \
{ VIRTIO_VSOCK_OP_CREDIT_UPDATE, "CREDIT_UPDATE" }, \
{ VIRTIO_VSOCK_OP_CREDIT_REQUEST, "CREDIT_REQUEST" })
TRACE_EVENT(virtio_transport_alloc_pkt,
TP_PROTO(
__u32 src_cid, __u32 src_port,
__u32 dst_cid, __u32 dst_port,
__u32 len,
__u16 type,
__u16 op,
__u32 flags
),
TP_ARGS(
src_cid, src_port,
dst_cid, dst_port,
len,
type,
op,
flags
),
TP_STRUCT__entry(
__field(__u32, src_cid)
__field(__u32, src_port)
__field(__u32, dst_cid)
__field(__u32, dst_port)
__field(__u32, len)
__field(__u16, type)
__field(__u16, op)
__field(__u32, flags)
),
TP_fast_assign(
__entry->src_cid = src_cid;
__entry->src_port = src_port;
__entry->dst_cid = dst_cid;
__entry->dst_port = dst_port;
__entry->len = len;
__entry->type = type;
__entry->op = op;
__entry->flags = flags;
),
TP_printk("%u:%u -> %u:%u len=%u type=%s op=%s flags=%#x",
__entry->src_cid, __entry->src_port,
__entry->dst_cid, __entry->dst_port,
__entry->len,
show_type(__entry->type),
show_op(__entry->op),
__entry->flags)
);
TRACE_EVENT(virtio_transport_recv_pkt,
TP_PROTO(
__u32 src_cid, __u32 src_port,
__u32 dst_cid, __u32 dst_port,
__u32 len,
__u16 type,
__u16 op,
__u32 flags,
__u32 buf_alloc,
__u32 fwd_cnt
),
TP_ARGS(
src_cid, src_port,
dst_cid, dst_port,
len,
type,
op,
flags,
buf_alloc,
fwd_cnt
),
TP_STRUCT__entry(
__field(__u32, src_cid)
__field(__u32, src_port)
__field(__u32, dst_cid)
__field(__u32, dst_port)
__field(__u32, len)
__field(__u16, type)
__field(__u16, op)
__field(__u32, flags)
__field(__u32, buf_alloc)
__field(__u32, fwd_cnt)
),
TP_fast_assign(
__entry->src_cid = src_cid;
__entry->src_port = src_port;
__entry->dst_cid = dst_cid;
__entry->dst_port = dst_port;
__entry->len = len;
__entry->type = type;
__entry->op = op;
__entry->flags = flags;
__entry->buf_alloc = buf_alloc;
__entry->fwd_cnt = fwd_cnt;
),
TP_printk("%u:%u -> %u:%u len=%u type=%s op=%s flags=%#x "
"buf_alloc=%u fwd_cnt=%u",
__entry->src_cid, __entry->src_port,
__entry->dst_cid, __entry->dst_port,
__entry->len,
show_type(__entry->type),
show_op(__entry->op),
__entry->flags,
__entry->buf_alloc,
__entry->fwd_cnt)
);
#endif /* _TRACE_VSOCK_VIRTIO_TRANSPORT_COMMON_H */
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE vsock_virtio_transport_common
/* This part must be outside protection */
#include <trace/define_trace.h>
......@@ -454,6 +454,7 @@ header-y += virtio_ring.h
header-y += virtio_rng.h
header-y += virtio_scsi.h
header-y += virtio_types.h
header-y += virtio_vsock.h
header-y += vm_sockets.h
header-y += vt.h
header-y += vtpm_proxy.h
......
......@@ -47,6 +47,32 @@ struct vhost_vring_addr {
__u64 log_guest_addr;
};
/* no alignment requirement */
struct vhost_iotlb_msg {
__u64 iova;
__u64 size;
__u64 uaddr;
#define VHOST_ACCESS_RO 0x1
#define VHOST_ACCESS_WO 0x2
#define VHOST_ACCESS_RW 0x3
__u8 perm;
#define VHOST_IOTLB_MISS 1
#define VHOST_IOTLB_UPDATE 2
#define VHOST_IOTLB_INVALIDATE 3
#define VHOST_IOTLB_ACCESS_FAIL 4
__u8 type;
};
#define VHOST_IOTLB_MSG 0x1
struct vhost_msg {
int type;
union {
struct vhost_iotlb_msg iotlb;
__u8 padding[64];
};
};
struct vhost_memory_region {
__u64 guest_phys_addr;
__u64 memory_size; /* bytes */
......@@ -146,6 +172,8 @@ struct vhost_memory {
#define VHOST_F_LOG_ALL 26
/* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */
#define VHOST_NET_F_VIRTIO_NET_HDR 27
/* Vhost have device IOTLB */
#define VHOST_F_DEVICE_IOTLB 63
/* VHOST_SCSI specific definitions */
......@@ -175,4 +203,9 @@ struct vhost_scsi_target {
#define VHOST_SCSI_SET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x43, __u32)
#define VHOST_SCSI_GET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x44, __u32)
/* VHOST_VSOCK specific defines */
#define VHOST_VSOCK_SET_GUEST_CID _IOW(VHOST_VIRTIO, 0x60, __u64)
#define VHOST_VSOCK_SET_RUNNING _IOW(VHOST_VIRTIO, 0x61, int)
#endif
......@@ -49,7 +49,7 @@
* transport being used (eg. virtio_ring), the rest are per-device feature
* bits. */
#define VIRTIO_TRANSPORT_F_START 28
#define VIRTIO_TRANSPORT_F_END 33
#define VIRTIO_TRANSPORT_F_END 34
#ifndef VIRTIO_CONFIG_NO_LEGACY
/* Do we get callbacks when the ring is completely used, even if we've
......@@ -63,4 +63,12 @@
/* v1.0 compliant. */
#define VIRTIO_F_VERSION_1 32
/*
* If clear - device has the IOMMU bypass quirk feature.
* If set - use platform tools to detect the IOMMU.
*
* Note the reverse polarity (compared to most other features),
* this is for compatibility with legacy systems.
*/
#define VIRTIO_F_IOMMU_PLATFORM 33
#endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */
......@@ -41,5 +41,6 @@
#define VIRTIO_ID_CAIF 12 /* Virtio caif */
#define VIRTIO_ID_GPU 16 /* virtio GPU */
#define VIRTIO_ID_INPUT 18 /* virtio input */
#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */
#endif /* _LINUX_VIRTIO_IDS_H */
/*
* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
* anyone can use the definitions to implement compatible drivers/servers:
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of IBM nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* Copyright (C) Red Hat, Inc., 2013-2015
* Copyright (C) Asias He <asias@redhat.com>, 2013
* Copyright (C) Stefan Hajnoczi <stefanha@redhat.com>, 2015
*/
#ifndef _UAPI_LINUX_VIRTIO_VSOCK_H
#define _UAPI_LINUX_VIRTIO_VOSCK_H
#include <linux/types.h>
#include <linux/virtio_ids.h>
#include <linux/virtio_config.h>
struct virtio_vsock_config {
__le64 guest_cid;
} __attribute__((packed));
enum virtio_vsock_event_id {
VIRTIO_VSOCK_EVENT_TRANSPORT_RESET = 0,
};
struct virtio_vsock_event {
__le32 id;
} __attribute__((packed));
struct virtio_vsock_hdr {
__le64 src_cid;
__le64 dst_cid;
__le32 src_port;
__le32 dst_port;
__le32 len;
__le16 type; /* enum virtio_vsock_type */
__le16 op; /* enum virtio_vsock_op */
__le32 flags;
__le32 buf_alloc;
__le32 fwd_cnt;
} __attribute__((packed));
enum virtio_vsock_type {
VIRTIO_VSOCK_TYPE_STREAM = 1,
};
enum virtio_vsock_op {
VIRTIO_VSOCK_OP_INVALID = 0,
/* Connect operations */
VIRTIO_VSOCK_OP_REQUEST = 1,
VIRTIO_VSOCK_OP_RESPONSE = 2,
VIRTIO_VSOCK_OP_RST = 3,
VIRTIO_VSOCK_OP_SHUTDOWN = 4,
/* To send payload */
VIRTIO_VSOCK_OP_RW = 5,
/* Tell the peer our credit info */
VIRTIO_VSOCK_OP_CREDIT_UPDATE = 6,
/* Request the peer to send the credit info to us */
VIRTIO_VSOCK_OP_CREDIT_REQUEST = 7,
};
/* VIRTIO_VSOCK_OP_SHUTDOWN flags values */
enum virtio_vsock_shutdown {
VIRTIO_VSOCK_SHUTDOWN_RCV = 1,
VIRTIO_VSOCK_SHUTDOWN_SEND = 2,
};
#endif /* _UAPI_LINUX_VIRTIO_VSOCK_H */
......@@ -26,3 +26,23 @@ config VMWARE_VMCI_VSOCKETS
To compile this driver as a module, choose M here: the module
will be called vmw_vsock_vmci_transport. If unsure, say N.
config VIRTIO_VSOCKETS
tristate "virtio transport for Virtual Sockets"
depends on VSOCKETS && VIRTIO
select VIRTIO_VSOCKETS_COMMON
help
This module implements a virtio transport for Virtual Sockets.
Enable this transport if your Virtual Machine host supports Virtual
Sockets over virtio.
To compile this driver as a module, choose M here: the module will be
called vmw_vsock_virtio_transport. If unsure, say N.
config VIRTIO_VSOCKETS_COMMON
tristate
help
This option is selected by any driver which needs to access
the virtio_vsock. The module will be called
vmw_vsock_virtio_transport_common.
obj-$(CONFIG_VSOCKETS) += vsock.o
obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o
obj-$(CONFIG_VIRTIO_VSOCKETS) += vmw_vsock_virtio_transport.o
obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += vmw_vsock_virtio_transport_common.o
vsock-y += af_vsock.o vsock_addr.o
vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \
vmci_transport_notify_qstate.o
vmw_vsock_virtio_transport-y += virtio_transport.o
vmw_vsock_virtio_transport_common-y += virtio_transport_common.o
......@@ -344,6 +344,16 @@ static bool vsock_in_connected_table(struct vsock_sock *vsk)
return ret;
}
void vsock_remove_sock(struct vsock_sock *vsk)
{
if (vsock_in_bound_table(vsk))
vsock_remove_bound(vsk);
if (vsock_in_connected_table(vsk))
vsock_remove_connected(vsk);
}
EXPORT_SYMBOL_GPL(vsock_remove_sock);
void vsock_for_each_connected_socket(void (*fn)(struct sock *sk))
{
int i;
......@@ -660,12 +670,6 @@ static void __vsock_release(struct sock *sk)
vsk = vsock_sk(sk);
pending = NULL; /* Compiler warning. */
if (vsock_in_bound_table(vsk))
vsock_remove_bound(vsk);
if (vsock_in_connected_table(vsk))
vsock_remove_connected(vsk);
transport->release(vsk);
lock_sock(sk);
......@@ -1995,6 +1999,15 @@ void vsock_core_exit(void)
}
EXPORT_SYMBOL_GPL(vsock_core_exit);
const struct vsock_transport *vsock_core_get_transport(void)
{
/* vsock_register_mutex not taken since only the transport uses this
* function and only while registered.
*/
return transport;
}
EXPORT_SYMBOL_GPL(vsock_core_get_transport);
MODULE_AUTHOR("VMware, Inc.");
MODULE_DESCRIPTION("VMware Virtual Socket Family");
MODULE_VERSION("1.0.1.0-k");
......
This diff is collapsed.
This diff is collapsed.
......@@ -1644,6 +1644,8 @@ static void vmci_transport_destruct(struct vsock_sock *vsk)
static void vmci_transport_release(struct vsock_sock *vsk)
{
vsock_remove_sock(vsk);
if (!vmci_handle_is_invalid(vmci_trans(vsk)->dg_handle)) {
vmci_datagram_destroy_handle(vmci_trans(vsk)->dg_handle);
vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment