Commit 88ce6424 authored by Johannes Berg's avatar Johannes Berg Committed by Richard Weinberger

um: Implement time-travel=ext

This implements synchronized time-travel mode which - using a special
application on a unix socket - lets multiple machines take part in a
time-travelling simulation together.

The protocol for the unix domain socket is defined in the new file
include/uapi/linux/um_timetravel.h.
Signed-off-by: default avatarJohannes Berg <johannes.berg@intel.com>
Signed-off-by: default avatarRichard Weinberger <richard@nod.at>
parent dd9ada56
......@@ -26,6 +26,7 @@
#include <linux/virtio.h>
#include <linux/virtio_config.h>
#include <linux/virtio_ring.h>
#include <linux/time-internal.h>
#include <shared/as-layout.h>
#include <irq_kern.h>
#include <init.h>
......@@ -64,6 +65,11 @@ struct virtio_uml_device {
struct virtio_uml_vq_info {
int kick_fd, call_fd;
char name[32];
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
struct virtqueue *vq;
vq_callback_t *callback;
struct time_travel_event defer;
#endif
};
extern unsigned long long physmem_size, highmem;
......@@ -118,10 +124,27 @@ static int vhost_user_recv_header(int fd, struct vhost_user_msg *msg)
static int vhost_user_recv(struct virtio_uml_device *vu_dev,
int fd, struct vhost_user_msg *msg,
size_t max_payload_size)
size_t max_payload_size, bool wait)
{
size_t size;
int rc = vhost_user_recv_header(fd, msg);
int rc;
/*
* In virtio time-travel mode, we're handling all the vhost-user
* FDs by polling them whenever appropriate. However, we may get
* into a situation where we're sending out an interrupt message
* to a device (e.g. a net device) and need to handle a simulation
* time message while doing so, e.g. one that tells us to update
* our idea of how long we can run without scheduling.
*
* Thus, we need to not just read() from the given fd, but need
* to also handle messages for the simulation time - this function
* does that for us while waiting for the given fd to be readable.
*/
if (wait)
time_travel_wait_readable(fd);
rc = vhost_user_recv_header(fd, msg);
if (rc == -ECONNRESET && vu_dev->registered) {
struct virtio_uml_platform_data *pdata;
......@@ -143,7 +166,8 @@ static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev,
struct vhost_user_msg *msg,
size_t max_payload_size)
{
int rc = vhost_user_recv(vu_dev, vu_dev->sock, msg, max_payload_size);
int rc = vhost_user_recv(vu_dev, vu_dev->sock, msg,
max_payload_size, true);
if (rc)
return rc;
......@@ -173,7 +197,8 @@ static int vhost_user_recv_req(struct virtio_uml_device *vu_dev,
struct vhost_user_msg *msg,
size_t max_payload_size)
{
int rc = vhost_user_recv(vu_dev, vu_dev->req_fd, msg, max_payload_size);
int rc = vhost_user_recv(vu_dev, vu_dev->req_fd, msg,
max_payload_size, false);
if (rc)
return rc;
......@@ -700,6 +725,8 @@ static bool vu_notify(struct virtqueue *vq)
const uint64_t n = 1;
int rc;
time_travel_propagate_time();
if (info->kick_fd < 0) {
struct virtio_uml_device *vu_dev;
......@@ -847,6 +874,23 @@ static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev,
return rc;
}
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
static void vu_defer_irq_handle(struct time_travel_event *d)
{
struct virtio_uml_vq_info *info;
info = container_of(d, struct virtio_uml_vq_info, defer);
info->callback(info->vq);
}
static void vu_defer_irq_callback(struct virtqueue *vq)
{
struct virtio_uml_vq_info *info = vq->priv;
time_travel_add_irq_event(&info->defer);
}
#endif
static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
unsigned index, vq_callback_t *callback,
const char *name, bool ctx)
......@@ -866,6 +910,19 @@ static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name,
pdev->id, name);
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
/*
* When we get an interrupt, we must bounce it through the simulation
* calendar (the simtime device), except for the simtime device itself
* since that's part of the simulation control.
*/
if (time_travel_mode == TT_MODE_EXTERNAL && callback) {
info->callback = callback;
callback = vu_defer_irq_callback;
time_travel_set_event_fn(&info->defer, vu_defer_irq_handle);
}
#endif
vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true,
ctx, vu_notify, callback, info->name);
if (!vq) {
......@@ -874,6 +931,9 @@ static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
}
vq->priv = info;
num = virtqueue_get_vring_size(vq);
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
info->vq = vq;
#endif
if (vu_dev->protocol_features &
BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) {
......
......@@ -15,6 +15,7 @@ enum time_travel_mode {
TT_MODE_OFF,
TT_MODE_BASIC,
TT_MODE_INFCPU,
TT_MODE_EXTERNAL,
};
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
......@@ -35,6 +36,24 @@ time_travel_set_event_fn(struct time_travel_event *e,
{
e->fn = fn;
}
void __time_travel_propagate_time(void);
static inline void time_travel_propagate_time(void)
{
if (time_travel_mode == TT_MODE_EXTERNAL)
__time_travel_propagate_time();
}
void __time_travel_wait_readable(int fd);
static inline void time_travel_wait_readable(int fd)
{
if (time_travel_mode == TT_MODE_EXTERNAL)
__time_travel_wait_readable(fd);
}
void time_travel_add_irq_event(struct time_travel_event *e);
#else
struct time_travel_event {
};
......@@ -47,5 +66,13 @@ static inline void time_travel_sleep(unsigned long long duration)
/* this is a macro so the event/function need not exist */
#define time_travel_set_event_fn(e, fn) do {} while (0)
static inline void time_travel_propagate_time(void)
{
}
static inline void time_travel_wait_readable(int fd)
{
}
#endif /* CONFIG_UML_TIME_TRAVEL_SUPPORT */
#endif /* __TIMER_INTERNAL_H__ */
......@@ -181,6 +181,7 @@ extern int os_falloc_punch(int fd, unsigned long long offset, int count);
extern int os_eventfd(unsigned int initval, int flags);
extern int os_sendmsg_fds(int fd, const void *buf, unsigned int len,
const int *fds, unsigned int fds_num);
int os_poll(unsigned int n, const int *fds);
/* start_up.c */
extern void os_early_checks(void);
......
......@@ -24,7 +24,8 @@ void handle_syscall(struct uml_pt_regs *r)
* went to sleep, even if said userspace interacts with the kernel in
* various ways.
*/
if (time_travel_mode == TT_MODE_INFCPU)
if (time_travel_mode == TT_MODE_INFCPU ||
time_travel_mode == TT_MODE_EXTERNAL)
schedule();
/* Initialize the syscall number and default return value. */
......
This diff is collapsed.
......@@ -5,6 +5,7 @@
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
......@@ -17,6 +18,7 @@
#include <sys/un.h>
#include <sys/types.h>
#include <sys/eventfd.h>
#include <poll.h>
#include <os.h>
static void copy_stat(struct uml_stat *dst, const struct stat64 *src)
......@@ -665,3 +667,31 @@ int os_sendmsg_fds(int fd, const void *buf, unsigned int len, const int *fds,
return -errno;
return err;
}
int os_poll(unsigned int n, const int *fds)
{
/* currently need 2 FDs at most so avoid dynamic allocation */
struct pollfd pollfds[2] = {};
unsigned int i;
int ret;
if (n > ARRAY_SIZE(pollfds))
return -EINVAL;
for (i = 0; i < n; i++) {
pollfds[i].fd = fds[i];
pollfds[i].events = POLLIN;
}
ret = poll(pollfds, n, -1);
if (ret < 0)
return -errno;
/* Return the index of the available FD */
for (i = 0; i < n; i++) {
if (pollfds[i].revents)
return i;
}
return -EIO;
}
/*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
* Copyright (C) 2019 Intel Corporation
*/
#ifndef _UAPI_LINUX_UM_TIMETRAVEL_H
#define _UAPI_LINUX_UM_TIMETRAVEL_H
#include <linux/types.h>
/**
* struct um_timetravel_msg - UM time travel message
*
* This is the basic message type, going in both directions.
*
* This is the message passed between the host (user-mode Linux instance)
* and the calendar (the application on the other side of the socket) in
* order to implement common scheduling.
*
* Whenever UML has an event it will request runtime for it from the
* calendar, and then wait for its turn until it can run, etc. Note
* that it will only ever request the single next runtime, i.e. multiple
* REQUEST messages override each other.
*/
struct um_timetravel_msg {
/**
* @op: operation value from &enum um_timetravel_ops
*/
__u32 op;
/**
* @seq: sequence number for the message - shall be reflected in
* the ACK response, and should be checked while processing
* the response to see if it matches
*/
__u32 seq;
/**
* @time: time in nanoseconds
*/
__u64 time;
};
/**
* enum um_timetravel_ops - Operation codes
*/
enum um_timetravel_ops {
/**
* @UM_TIMETRAVEL_ACK: response (ACK) to any previous message,
* this usually doesn't carry any data in the 'time' field
* unless otherwise specified below
*/
UM_TIMETRAVEL_ACK = 0,
/**
* @UM_TIMETRAVEL_START: initialize the connection, the time
* field contains an (arbitrary) ID to possibly be able
* to distinguish the connections.
*/
UM_TIMETRAVEL_START = 1,
/**
* @UM_TIMETRAVEL_REQUEST: request to run at the given time
* (host -> calendar)
*/
UM_TIMETRAVEL_REQUEST = 2,
/**
* @UM_TIMETRAVEL_WAIT: Indicate waiting for the previously requested
* runtime, new requests may be made while waiting (e.g. due to
* interrupts); the time field is ignored. The calendar must process
* this message and later send a %UM_TIMETRAVEL_RUN message when
* the host can run again.
* (host -> calendar)
*/
UM_TIMETRAVEL_WAIT = 3,
/**
* @UM_TIMETRAVEL_GET: return the current time from the calendar in the
* ACK message, the time in the request message is ignored
* (host -> calendar)
*/
UM_TIMETRAVEL_GET = 4,
/**
* @UM_TIMETRAVEL_UPDATE: time update to the calendar, must be sent e.g.
* before kicking an interrupt to another calendar
* (host -> calendar)
*/
UM_TIMETRAVEL_UPDATE = 5,
/**
* @UM_TIMETRAVEL_RUN: run time request granted, current time is in
* the time field
* (calendar -> host)
*/
UM_TIMETRAVEL_RUN = 6,
/**
* @UM_TIMETRAVEL_FREE_UNTIL: Enable free-running until the given time,
* this is a message from the calendar telling the host that it can
* freely do its own scheduling for anything before the indicated
* time.
* Note that if a calendar sends this message once, the host may
* assume that it will also do so in the future, if it implements
* wraparound semantics for the time field.
* (calendar -> host)
*/
UM_TIMETRAVEL_FREE_UNTIL = 7,
/**
* @UM_TIMETRAVEL_GET_TOD: Return time of day, typically used once at
* boot by the virtual machines to get a synchronized time from
* the simulation.
*/
UM_TIMETRAVEL_GET_TOD = 8,
};
#endif /* _UAPI_LINUX_UM_TIMETRAVEL_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment