Commit d8ba8ba4 authored by David Woodhouse's avatar David Woodhouse Committed by Paolo Bonzini

KVM: x86/xen: Allow XEN_RUNSTATE_UPDATE flag behaviour to be configured

Closer inspection of the Xen code shows that we aren't supposed to be
using the XEN_RUNSTATE_UPDATE flag unconditionally. It should be
explicitly enabled by guests through the HYPERVISOR_vm_assist hypercall.
If we randomly set the top bit of ->state_entry_time for a guest that
hasn't asked for it and doesn't expect it, that could make the runtimes
fail to add up and confuse the guest. Without the flag it's perfectly
safe for a vCPU to read its own vcpu_runstate_info; just not for one
vCPU to read *another's*.

I briefly pondered adding a word for the whole set of VMASST_TYPE_*
flags but the only one we care about for HVM guests is this, so it
seemed a bit pointless.
Signed-off-by: default avatarDavid Woodhouse <dwmw@amazon.co.uk>
Message-Id: <20221127122210.248427-3-dwmw2@infradead.org>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 5ec3289b
......@@ -5339,6 +5339,7 @@ KVM_PV_ASYNC_CLEANUP_PERFORM
union {
__u8 long_mode;
__u8 vector;
__u8 runstate_update_flag;
struct {
__u64 gfn;
} shared_info;
......@@ -5416,6 +5417,14 @@ KVM_XEN_ATTR_TYPE_XEN_VERSION
event channel delivery, so responding within the kernel without
exiting to userspace is beneficial.
KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG
This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
support for KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG. It enables the
XEN_RUNSTATE_UPDATE flag which allows guest vCPUs to safely read
other vCPUs' vcpu_runstate_info. Xen guests enable this feature via
the VM_ASST_TYPE_runstate_update_flag of the HYPERVISOR_vm_assist
hypercall.
4.127 KVM_XEN_HVM_GET_ATTR
--------------------------
......@@ -8059,12 +8068,13 @@ to userspace.
This capability indicates the features that Xen supports for hosting Xen
PVHVM guests. Valid flags are::
#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0)
#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3)
#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4)
#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5)
#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0)
#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3)
#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4)
#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5)
#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6)
The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG
ioctl is available, for the guest to set its hypercall page.
......@@ -8096,6 +8106,18 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID/TIMER/UPCALL_VECTOR vCPU attributes.
related to event channel delivery, timers, and the XENVER_version
interception.
The KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG flag indicates that KVM supports
the KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG attribute in the KVM_XEN_SET_ATTR
and KVM_XEN_GET_ATTR ioctls. This controls whether KVM will set the
XEN_RUNSTATE_UPDATE flag in guest memory mapped vcpu_runstate_info during
updates of the runstate information. Note that versions of KVM which support
the RUNSTATE feature above, but not thie RUNSTATE_UPDATE_FLAG feature, will
always set the XEN_RUNSTATE_UPDATE flag when updating the guest structure,
which is perhaps counterintuitive. When this flag is advertised, KVM will
behave more correctly, not using the XEN_RUNSTATE_UPDATE flag until/unless
specifically enabled (by the guest making the hypercall, causing the VMM
to enable the KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG attribute).
8.31 KVM_CAP_PPC_MULTITCE
-------------------------
......
......@@ -1113,6 +1113,7 @@ struct msr_bitmap_range {
struct kvm_xen {
u32 xen_version;
bool long_mode;
bool runstate_update_flag;
u8 upcall_vector;
struct gfn_to_pfn_cache shinfo_cache;
struct idr evtchn_ports;
......
......@@ -4431,7 +4431,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL |
KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
if (sched_info_on())
r |= KVM_XEN_HVM_CONFIG_RUNSTATE;
r |= KVM_XEN_HVM_CONFIG_RUNSTATE |
KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG;
break;
#endif
case KVM_CAP_SYNC_REGS:
......
......@@ -179,7 +179,8 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
struct vcpu_runstate_info rs;
unsigned long flags;
size_t times_ofs;
uint8_t *update_bit;
uint8_t *update_bit = NULL;
uint64_t entry_time;
uint64_t *rs_times;
int *rs_state;
......@@ -297,7 +298,8 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
*/
rs_state = gpc1->khva;
rs_times = gpc1->khva + times_ofs;
update_bit = ((void *)(&rs_times[1])) - 1;
if (v->kvm->arch.xen.runstate_update_flag)
update_bit = ((void *)(&rs_times[1])) - 1;
} else {
/*
* The guest's runstate_info is split across two pages and we
......@@ -351,12 +353,14 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
* The update_bit is still directly in the guest memory,
* via one GPC or the other.
*/
if (user_len1 >= times_ofs + sizeof(uint64_t))
update_bit = gpc1->khva + times_ofs +
sizeof(uint64_t) - 1;
else
update_bit = gpc2->khva + times_ofs +
sizeof(uint64_t) - 1 - user_len1;
if (v->kvm->arch.xen.runstate_update_flag) {
if (user_len1 >= times_ofs + sizeof(uint64_t))
update_bit = gpc1->khva + times_ofs +
sizeof(uint64_t) - 1;
else
update_bit = gpc2->khva + times_ofs +
sizeof(uint64_t) - 1 - user_len1;
}
#ifdef CONFIG_X86_64
/*
......@@ -376,8 +380,12 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
* different cache line to the rest of the 64-bit word, due to
* the (lack of) alignment constraints.
*/
*update_bit = (vx->runstate_entry_time | XEN_RUNSTATE_UPDATE) >> 56;
smp_wmb();
entry_time = vx->runstate_entry_time;
if (update_bit) {
entry_time |= XEN_RUNSTATE_UPDATE;
*update_bit = (vx->runstate_entry_time | XEN_RUNSTATE_UPDATE) >> 56;
smp_wmb();
}
/*
* Now assemble the actual structure, either on our kernel stack
......@@ -385,7 +393,7 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
* rs_times pointers were set up above.
*/
*rs_state = vx->current_runstate;
rs_times[0] = vx->runstate_entry_time | XEN_RUNSTATE_UPDATE;
rs_times[0] = entry_time;
memcpy(rs_times + 1, vx->runstate_times, sizeof(vx->runstate_times));
/* For the split case, we have to then copy it to the guest. */
......@@ -396,8 +404,11 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
smp_wmb();
/* Finally, clear the XEN_RUNSTATE_UPDATE bit. */
*update_bit = vx->runstate_entry_time >> 56;
smp_wmb();
if (update_bit) {
entry_time &= ~XEN_RUNSTATE_UPDATE;
*update_bit = entry_time >> 56;
smp_wmb();
}
if (user_len2)
read_unlock(&gpc2->lock);
......@@ -619,6 +630,17 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
r = 0;
break;
case KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG:
if (!sched_info_on()) {
r = -EOPNOTSUPP;
break;
}
mutex_lock(&kvm->lock);
kvm->arch.xen.runstate_update_flag = !!data->u.runstate_update_flag;
mutex_unlock(&kvm->lock);
r = 0;
break;
default:
break;
}
......@@ -656,6 +678,15 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
r = 0;
break;
case KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG:
if (!sched_info_on()) {
r = -EOPNOTSUPP;
break;
}
data->u.runstate_update_flag = kvm->arch.xen.runstate_update_flag;
r = 0;
break;
default:
break;
}
......
......@@ -1271,6 +1271,7 @@ struct kvm_x86_mce {
#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3)
#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4)
#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5)
#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6)
struct kvm_xen_hvm_config {
__u32 flags;
......@@ -1776,6 +1777,7 @@ struct kvm_xen_hvm_attr {
union {
__u8 long_mode;
__u8 vector;
__u8 runstate_update_flag;
struct {
__u64 gfn;
} shared_info;
......@@ -1816,6 +1818,8 @@ struct kvm_xen_hvm_attr {
/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3
#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4
/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */
#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5
/* Per-vCPU Xen attributes */
#define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr)
......
......@@ -440,6 +440,7 @@ int main(int argc, char *argv[])
TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO);
bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
bool do_runstate_flag = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG);
bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL);
bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND);
......@@ -475,6 +476,19 @@ int main(int argc, char *argv[])
};
vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
if (do_runstate_flag) {
struct kvm_xen_hvm_attr ruf = {
.type = KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG,
.u.runstate_update_flag = 1,
};
vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ruf);
ruf.u.runstate_update_flag = 0;
vm_ioctl(vm, KVM_XEN_HVM_GET_ATTR, &ruf);
TEST_ASSERT(ruf.u.runstate_update_flag == 1,
"Failed to read back RUNSTATE_UPDATE_FLAG attr");
}
struct kvm_xen_hvm_attr ha = {
.type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
.u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment