Commit ced7b713 authored by Arseny Krasnov's avatar Arseny Krasnov Committed by David S. Miller

vhost/vsock: support SEQPACKET for transport

When received packet is copied to guests's rx queue, data buffers
of rx queue could be smaller that data buffer of input packet, so
data of input packet is copied to each rx buffer, thus each rx
buffer will be a packet with dynamically created header. Fields
of such header are initialized from header of input packet(except
length field which value is depends on number of bytes copied to
rx buffer). But in SEQPACKET case, we also need to take care of
record delimeter bit: if input packet has this bit set, we don't
copy it to header of packet in rx buffer, except case when such
rx buffer is last part of input packet. Otherwise, we will get
sequence of packets with delimeter bit set, thus braking record
bounds.
Also remove ignore of non-stream type of packets, handle SEQPACKET
feature bit.
Signed-off-by: default avatarArseny Krasnov <arseny.krasnov@kaspersky.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 53efbba1
...@@ -31,7 +31,8 @@ ...@@ -31,7 +31,8 @@
enum { enum {
VHOST_VSOCK_FEATURES = VHOST_FEATURES | VHOST_VSOCK_FEATURES = VHOST_FEATURES |
(1ULL << VIRTIO_F_ACCESS_PLATFORM) (1ULL << VIRTIO_F_ACCESS_PLATFORM) |
(1ULL << VIRTIO_VSOCK_F_SEQPACKET)
}; };
enum { enum {
...@@ -56,6 +57,7 @@ struct vhost_vsock { ...@@ -56,6 +57,7 @@ struct vhost_vsock {
atomic_t queued_replies; atomic_t queued_replies;
u32 guest_cid; u32 guest_cid;
bool seqpacket_allow;
}; };
static u32 vhost_transport_get_local_cid(void) static u32 vhost_transport_get_local_cid(void)
...@@ -112,6 +114,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, ...@@ -112,6 +114,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
size_t nbytes; size_t nbytes;
size_t iov_len, payload_len; size_t iov_len, payload_len;
int head; int head;
bool restore_flag = false;
spin_lock_bh(&vsock->send_pkt_list_lock); spin_lock_bh(&vsock->send_pkt_list_lock);
if (list_empty(&vsock->send_pkt_list)) { if (list_empty(&vsock->send_pkt_list)) {
...@@ -168,9 +171,26 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, ...@@ -168,9 +171,26 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
/* If the packet is greater than the space available in the /* If the packet is greater than the space available in the
* buffer, we split it using multiple buffers. * buffer, we split it using multiple buffers.
*/ */
if (payload_len > iov_len - sizeof(pkt->hdr)) if (payload_len > iov_len - sizeof(pkt->hdr)) {
payload_len = iov_len - sizeof(pkt->hdr); payload_len = iov_len - sizeof(pkt->hdr);
/* As we are copying pieces of large packet's buffer to
* small rx buffers, headers of packets in rx queue are
* created dynamically and are initialized with header
* of current packet(except length). But in case of
* SOCK_SEQPACKET, we also must clear record delimeter
* bit(VIRTIO_VSOCK_SEQ_EOR). Otherwise, instead of one
* packet with delimeter(which marks end of record),
* there will be sequence of packets with delimeter
* bit set. After initialized header will be copied to
* rx buffer, this bit will be restored.
*/
if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) {
pkt->hdr.flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
restore_flag = true;
}
}
/* Set the correct length in the header */ /* Set the correct length in the header */
pkt->hdr.len = cpu_to_le32(payload_len); pkt->hdr.len = cpu_to_le32(payload_len);
...@@ -204,6 +224,9 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, ...@@ -204,6 +224,9 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
* to send it with the next available buffer. * to send it with the next available buffer.
*/ */
if (pkt->off < pkt->len) { if (pkt->off < pkt->len) {
if (restore_flag)
pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
/* We are queueing the same virtio_vsock_pkt to handle /* We are queueing the same virtio_vsock_pkt to handle
* the remaining bytes, and we want to deliver it * the remaining bytes, and we want to deliver it
* to monitoring devices in the next iteration. * to monitoring devices in the next iteration.
...@@ -354,7 +377,6 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq, ...@@ -354,7 +377,6 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
return NULL; return NULL;
} }
if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM)
pkt->len = le32_to_cpu(pkt->hdr.len); pkt->len = le32_to_cpu(pkt->hdr.len);
/* No payload */ /* No payload */
...@@ -398,6 +420,8 @@ static bool vhost_vsock_more_replies(struct vhost_vsock *vsock) ...@@ -398,6 +420,8 @@ static bool vhost_vsock_more_replies(struct vhost_vsock *vsock)
return val < vq->num; return val < vq->num;
} }
static bool vhost_transport_seqpacket_allow(u32 remote_cid);
static struct virtio_transport vhost_transport = { static struct virtio_transport vhost_transport = {
.transport = { .transport = {
.module = THIS_MODULE, .module = THIS_MODULE,
...@@ -424,6 +448,11 @@ static struct virtio_transport vhost_transport = { ...@@ -424,6 +448,11 @@ static struct virtio_transport vhost_transport = {
.stream_is_active = virtio_transport_stream_is_active, .stream_is_active = virtio_transport_stream_is_active,
.stream_allow = virtio_transport_stream_allow, .stream_allow = virtio_transport_stream_allow,
.seqpacket_dequeue = virtio_transport_seqpacket_dequeue,
.seqpacket_enqueue = virtio_transport_seqpacket_enqueue,
.seqpacket_allow = vhost_transport_seqpacket_allow,
.seqpacket_has_data = virtio_transport_seqpacket_has_data,
.notify_poll_in = virtio_transport_notify_poll_in, .notify_poll_in = virtio_transport_notify_poll_in,
.notify_poll_out = virtio_transport_notify_poll_out, .notify_poll_out = virtio_transport_notify_poll_out,
.notify_recv_init = virtio_transport_notify_recv_init, .notify_recv_init = virtio_transport_notify_recv_init,
...@@ -441,6 +470,22 @@ static struct virtio_transport vhost_transport = { ...@@ -441,6 +470,22 @@ static struct virtio_transport vhost_transport = {
.send_pkt = vhost_transport_send_pkt, .send_pkt = vhost_transport_send_pkt,
}; };
static bool vhost_transport_seqpacket_allow(u32 remote_cid)
{
struct vhost_vsock *vsock;
bool seqpacket_allow = false;
rcu_read_lock();
vsock = vhost_vsock_get(remote_cid);
if (vsock)
seqpacket_allow = vsock->seqpacket_allow;
rcu_read_unlock();
return seqpacket_allow;
}
static void vhost_vsock_handle_tx_kick(struct vhost_work *work) static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
{ {
struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
...@@ -785,6 +830,9 @@ static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features) ...@@ -785,6 +830,9 @@ static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features)
goto err; goto err;
} }
if (features & (1ULL << VIRTIO_VSOCK_F_SEQPACKET))
vsock->seqpacket_allow = true;
for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
vq = &vsock->vqs[i]; vq = &vsock->vqs[i];
mutex_lock(&vq->mutex); mutex_lock(&vq->mutex);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment