Commit 1f058331 authored by Paolo Bonzini's avatar Paolo Bonzini

Merge branch 'kvm-sev-move-context' into kvm-master

Add support for AMD SEV and SEV-ES intra-host migration support.  Intra
host migration provides a low-cost mechanism for userspace VMM upgrades.

In the common case for intra host migration, we can rely on the normal
ioctls for passing data from one VMM to the next. SEV, SEV-ES, and other
confidential compute environments make most of this information opaque, and
render KVM ioctls such as "KVM_GET_REGS" irrelevant.  As a result, we need
the ability to pass this opaque metadata from one VMM to the next. The
easiest way to do this is to leave this data in the kernel, and transfer
ownership of the metadata from one KVM VM (or vCPU) to the next.  In-kernel
hand off makes it possible to move any data that would be
unsafe/impossible for the kernel to hand directly to userspace, and
cannot be reproduced using data that can be handed to userspace.
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parents b9ecb9a9 6a581508
......@@ -6911,6 +6911,20 @@ MAP_SHARED mmap will result in an -EINVAL return.
When enabled the VMM may make use of the ``KVM_ARM_MTE_COPY_TAGS`` ioctl to
perform a bulk copy of tags to/from the guest.
7.29 KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM
-------------------------------------
Architectures: x86 SEV enabled
Type: vm
Parameters: args[0] is the fd of the source vm
Returns: 0 on success
This capability enables userspace to migrate the encryption context from the VM
indicated by the fd to the VM this is called on.
This is intended to support intra-host migration of VMs between userspace VMMs,
upgrading the VMM process without interrupting the guest.
8. Other capabilities.
======================
......
......@@ -1476,6 +1476,7 @@ struct kvm_x86_ops {
int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
int (*get_msr_feature)(struct kvm_msr_entry *entry);
......
This diff is collapsed.
......@@ -1452,7 +1452,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
svm_switch_vmcb(svm, &svm->vmcb01);
if (vmsa_page)
svm->vmsa = page_address(vmsa_page);
svm->sev_es.vmsa = page_address(vmsa_page);
svm->guest_state_loaded = false;
......@@ -2835,11 +2835,11 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
static int svm_complete_emulated_msr(struct kvm_vcpu *vcpu, int err)
{
struct vcpu_svm *svm = to_svm(vcpu);
if (!err || !sev_es_guest(vcpu->kvm) || WARN_ON_ONCE(!svm->ghcb))
if (!err || !sev_es_guest(vcpu->kvm) || WARN_ON_ONCE(!svm->sev_es.ghcb))
return kvm_complete_insn_gp(vcpu, err);
ghcb_set_sw_exit_info_1(svm->ghcb, 1);
ghcb_set_sw_exit_info_2(svm->ghcb,
ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 1);
ghcb_set_sw_exit_info_2(svm->sev_es.ghcb,
X86_TRAP_GP |
SVM_EVTINJ_TYPE_EXEPT |
SVM_EVTINJ_VALID);
......@@ -4701,6 +4701,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.mem_enc_unreg_region = svm_unregister_enc_region,
.vm_copy_enc_context_from = svm_vm_copy_asid_from,
.vm_move_enc_context_from = svm_vm_migrate_from,
.can_emulate_instruction = svm_can_emulate_instruction,
......
......@@ -80,6 +80,7 @@ struct kvm_sev_info {
u64 ap_jump_table; /* SEV-ES AP Jump Table address */
struct kvm *enc_context_owner; /* Owner of copied encryption context */
struct misc_cg *misc_cg; /* For misc cgroup accounting */
atomic_t migration_in_progress;
};
struct kvm_svm {
......@@ -123,6 +124,20 @@ struct svm_nested_state {
bool initialized;
};
struct vcpu_sev_es_state {
/* SEV-ES support */
struct vmcb_save_area *vmsa;
struct ghcb *ghcb;
struct kvm_host_map ghcb_map;
bool received_first_sipi;
/* SEV-ES scratch area support */
void *ghcb_sa;
u32 ghcb_sa_len;
bool ghcb_sa_sync;
bool ghcb_sa_free;
};
struct vcpu_svm {
struct kvm_vcpu vcpu;
/* vmcb always points at current_vmcb->ptr, it's purely a shorthand. */
......@@ -186,17 +201,7 @@ struct vcpu_svm {
DECLARE_BITMAP(write, MAX_DIRECT_ACCESS_MSRS);
} shadow_msr_intercept;
/* SEV-ES support */
struct vmcb_save_area *vmsa;
struct ghcb *ghcb;
struct kvm_host_map ghcb_map;
bool received_first_sipi;
/* SEV-ES scratch area support */
void *ghcb_sa;
u32 ghcb_sa_len;
bool ghcb_sa_sync;
bool ghcb_sa_free;
struct vcpu_sev_es_state sev_es;
bool guest_state_loaded;
};
......@@ -558,6 +563,7 @@ int svm_register_enc_region(struct kvm *kvm,
int svm_unregister_enc_region(struct kvm *kvm,
struct kvm_enc_region *range);
int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd);
int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd);
void pre_sev_run(struct vcpu_svm *svm, int cpu);
void __init sev_set_cpu_caps(void);
void __init sev_hardware_setup(void);
......
......@@ -5728,6 +5728,12 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
if (kvm_x86_ops.vm_copy_enc_context_from)
r = kvm_x86_ops.vm_copy_enc_context_from(kvm, cap->args[0]);
return r;
case KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM:
r = -EINVAL;
if (kvm_x86_ops.vm_move_enc_context_from)
r = kvm_x86_ops.vm_move_enc_context_from(
kvm, cap->args[0]);
return r;
case KVM_CAP_EXIT_HYPERCALL:
if (cap->args[0] & ~KVM_EXIT_HYPERCALL_VALID_MASK) {
r = -EINVAL;
......@@ -9552,7 +9558,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
}
if (kvm_request_pending(vcpu)) {
if (kvm_check_request(KVM_REQ_VM_BUGGED, vcpu)) {
if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu)) {
r = -EIO;
goto out;
}
......
......@@ -150,7 +150,7 @@ static inline bool is_error_page(struct page *page)
#define KVM_REQ_MMU_RELOAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_UNBLOCK 2
#define KVM_REQ_UNHALT 3
#define KVM_REQ_VM_BUGGED (4 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_VM_DEAD (4 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQUEST_ARCH_BASE 8
#define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \
......@@ -617,6 +617,7 @@ struct kvm {
unsigned int max_halt_poll_ns;
u32 dirty_ring_size;
bool vm_bugged;
bool vm_dead;
#ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
struct notifier_block pm_notifier;
......@@ -650,12 +651,19 @@ struct kvm {
#define vcpu_err(vcpu, fmt, ...) \
kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
static inline void kvm_vm_dead(struct kvm *kvm)
{
kvm->vm_dead = true;
kvm_make_all_cpus_request(kvm, KVM_REQ_VM_DEAD);
}
static inline void kvm_vm_bugged(struct kvm *kvm)
{
kvm->vm_bugged = true;
kvm_make_all_cpus_request(kvm, KVM_REQ_VM_BUGGED);
kvm_vm_dead(kvm);
}
#define KVM_BUG(cond, kvm, fmt...) \
({ \
int __ret = (cond); \
......
......@@ -1130,6 +1130,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_BINARY_STATS_FD 203
#define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204
#define KVM_CAP_ARM_MTE 205
#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
#ifdef KVM_CAP_IRQ_ROUTING
......
......@@ -73,7 +73,8 @@ TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_msrs_test
TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test
TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_pi_mmio_test
TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests
TEST_GEN_PROGS_x86_64 += demand_paging_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
......
......@@ -82,6 +82,7 @@ struct vm_guest_mode_params {
};
extern const struct vm_guest_mode_params vm_guest_mode_params[];
int open_path_or_exit(const char *path, int flags);
int open_kvm_dev_path_or_exit(void);
int kvm_check_cap(long cap);
int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
......
......@@ -46,4 +46,6 @@ static inline bool cpu_has_svm(void)
return ecx & CPUID_SVM;
}
int open_sev_dev_path_or_exit(void);
#endif /* SELFTEST_KVM_SVM_UTILS_H */
......@@ -31,6 +31,19 @@ static void *align(void *x, size_t size)
return (void *) (((size_t) x + mask) & ~mask);
}
int open_path_or_exit(const char *path, int flags)
{
int fd;
fd = open(path, flags);
if (fd < 0) {
print_skip("%s not available (errno: %d)", path, errno);
exit(KSFT_SKIP);
}
return fd;
}
/*
* Open KVM_DEV_PATH if available, otherwise exit the entire program.
*
......@@ -42,16 +55,7 @@ static void *align(void *x, size_t size)
*/
static int _open_kvm_dev_path_or_exit(int flags)
{
int fd;
fd = open(KVM_DEV_PATH, flags);
if (fd < 0) {
print_skip("%s not available, is KVM loaded? (errno: %d)",
KVM_DEV_PATH, errno);
exit(KSFT_SKIP);
}
return fd;
return open_path_or_exit(KVM_DEV_PATH, flags);
}
int open_kvm_dev_path_or_exit(void)
......
......@@ -13,6 +13,8 @@
#include "processor.h"
#include "svm_util.h"
#define SEV_DEV_PATH "/dev/sev"
struct gpr64_regs guest_regs;
u64 rflags;
......@@ -172,3 +174,14 @@ void nested_svm_check_supported(void)
exit(KSFT_SKIP);
}
}
/*
* Open SEV_DEV_PATH if available, otherwise exit the entire program.
*
* Return:
* The opened file descriptor of /dev/sev.
*/
int open_sev_dev_path_or_exit(void)
{
return open_path_or_exit(SEV_DEV_PATH, 0);
}
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/kvm.h>
#include <linux/psp-sev.h>
#include <stdio.h>
#include <sys/ioctl.h>
#include <stdlib.h>
#include <errno.h>
#include <pthread.h>
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
#include "svm_util.h"
#include "kselftest.h"
#include "../lib/kvm_util_internal.h"
#define SEV_POLICY_ES 0b100
#define NR_MIGRATE_TEST_VCPUS 4
#define NR_MIGRATE_TEST_VMS 3
#define NR_LOCK_TESTING_THREADS 3
#define NR_LOCK_TESTING_ITERATIONS 10000
static void sev_ioctl(int vm_fd, int cmd_id, void *data)
{
struct kvm_sev_cmd cmd = {
.id = cmd_id,
.data = (uint64_t)data,
.sev_fd = open_sev_dev_path_or_exit(),
};
int ret;
ret = ioctl(vm_fd, KVM_MEMORY_ENCRYPT_OP, &cmd);
TEST_ASSERT((ret == 0 || cmd.error == SEV_RET_SUCCESS),
"%d failed: return code: %d, errno: %d, fw error: %d",
cmd_id, ret, errno, cmd.error);
}
static struct kvm_vm *sev_vm_create(bool es)
{
struct kvm_vm *vm;
struct kvm_sev_launch_start start = { 0 };
int i;
vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
sev_ioctl(vm->fd, es ? KVM_SEV_ES_INIT : KVM_SEV_INIT, NULL);
for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
vm_vcpu_add(vm, i);
if (es)
start.policy |= SEV_POLICY_ES;
sev_ioctl(vm->fd, KVM_SEV_LAUNCH_START, &start);
if (es)
sev_ioctl(vm->fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
return vm;
}
static struct kvm_vm *__vm_create(void)
{
struct kvm_vm *vm;
int i;
vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
vm_vcpu_add(vm, i);
return vm;
}
static int __sev_migrate_from(int dst_fd, int src_fd)
{
struct kvm_enable_cap cap = {
.cap = KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM,
.args = { src_fd }
};
return ioctl(dst_fd, KVM_ENABLE_CAP, &cap);
}
static void sev_migrate_from(int dst_fd, int src_fd)
{
int ret;
ret = __sev_migrate_from(dst_fd, src_fd);
TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d\n", ret, errno);
}
static void test_sev_migrate_from(bool es)
{
struct kvm_vm *src_vm;
struct kvm_vm *dst_vms[NR_MIGRATE_TEST_VMS];
int i;
src_vm = sev_vm_create(es);
for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
dst_vms[i] = __vm_create();
/* Initial migration from the src to the first dst. */
sev_migrate_from(dst_vms[0]->fd, src_vm->fd);
for (i = 1; i < NR_MIGRATE_TEST_VMS; i++)
sev_migrate_from(dst_vms[i]->fd, dst_vms[i - 1]->fd);
/* Migrate the guest back to the original VM. */
sev_migrate_from(src_vm->fd, dst_vms[NR_MIGRATE_TEST_VMS - 1]->fd);
kvm_vm_free(src_vm);
for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
kvm_vm_free(dst_vms[i]);
}
struct locking_thread_input {
struct kvm_vm *vm;
int source_fds[NR_LOCK_TESTING_THREADS];
};
static void *locking_test_thread(void *arg)
{
int i, j;
struct locking_thread_input *input = (struct locking_thread_input *)arg;
for (i = 0; i < NR_LOCK_TESTING_ITERATIONS; ++i) {
j = i % NR_LOCK_TESTING_THREADS;
__sev_migrate_from(input->vm->fd, input->source_fds[j]);
}
return NULL;
}
static void test_sev_migrate_locking(void)
{
struct locking_thread_input input[NR_LOCK_TESTING_THREADS];
pthread_t pt[NR_LOCK_TESTING_THREADS];
int i;
for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) {
input[i].vm = sev_vm_create(/* es= */ false);
input[0].source_fds[i] = input[i].vm->fd;
}
for (i = 1; i < NR_LOCK_TESTING_THREADS; ++i)
memcpy(input[i].source_fds, input[0].source_fds,
sizeof(input[i].source_fds));
for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
pthread_create(&pt[i], NULL, locking_test_thread, &input[i]);
for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
pthread_join(pt[i], NULL);
}
static void test_sev_migrate_parameters(void)
{
struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_no_sev,
*sev_es_vm_no_vmsa;
int ret;
sev_vm = sev_vm_create(/* es= */ false);
sev_es_vm = sev_vm_create(/* es= */ true);
vm_no_vcpu = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
vm_no_sev = __vm_create();
sev_es_vm_no_vmsa = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
sev_ioctl(sev_es_vm_no_vmsa->fd, KVM_SEV_ES_INIT, NULL);
vm_vcpu_add(sev_es_vm_no_vmsa, 1);
ret = __sev_migrate_from(sev_vm->fd, sev_es_vm->fd);
TEST_ASSERT(
ret == -1 && errno == EINVAL,
"Should not be able migrate to SEV enabled VM. ret: %d, errno: %d\n",
ret, errno);
ret = __sev_migrate_from(sev_es_vm->fd, sev_vm->fd);
TEST_ASSERT(
ret == -1 && errno == EINVAL,
"Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d\n",
ret, errno);
ret = __sev_migrate_from(vm_no_vcpu->fd, sev_es_vm->fd);
TEST_ASSERT(
ret == -1 && errno == EINVAL,
"SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d\n",
ret, errno);
ret = __sev_migrate_from(vm_no_vcpu->fd, sev_es_vm_no_vmsa->fd);
TEST_ASSERT(
ret == -1 && errno == EINVAL,
"SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d\n",
ret, errno);
ret = __sev_migrate_from(vm_no_vcpu->fd, vm_no_sev->fd);
TEST_ASSERT(ret == -1 && errno == EINVAL,
"Migrations require SEV enabled. ret %d, errno: %d\n", ret,
errno);
}
int main(int argc, char *argv[])
{
test_sev_migrate_from(/* es= */ false);
test_sev_migrate_from(/* es= */ true);
test_sev_migrate_locking();
test_sev_migrate_parameters();
return 0;
}
......@@ -3747,7 +3747,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
struct kvm_fpu *fpu = NULL;
struct kvm_sregs *kvm_sregs = NULL;
if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_bugged)
if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_dead)
return -EIO;
if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
......@@ -3957,7 +3957,7 @@ static long kvm_vcpu_compat_ioctl(struct file *filp,
void __user *argp = compat_ptr(arg);
int r;
if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_bugged)
if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_dead)
return -EIO;
switch (ioctl) {
......@@ -4023,7 +4023,7 @@ static long kvm_device_ioctl(struct file *filp, unsigned int ioctl,
{
struct kvm_device *dev = filp->private_data;
if (dev->kvm->mm != current->mm || dev->kvm->vm_bugged)
if (dev->kvm->mm != current->mm || dev->kvm->vm_dead)
return -EIO;
switch (ioctl) {
......@@ -4345,7 +4345,7 @@ static long kvm_vm_ioctl(struct file *filp,
void __user *argp = (void __user *)arg;
int r;
if (kvm->mm != current->mm || kvm->vm_bugged)
if (kvm->mm != current->mm || kvm->vm_dead)
return -EIO;
switch (ioctl) {
case KVM_CREATE_VCPU:
......@@ -4556,7 +4556,7 @@ static long kvm_vm_compat_ioctl(struct file *filp,
struct kvm *kvm = filp->private_data;
int r;
if (kvm->mm != current->mm || kvm->vm_bugged)
if (kvm->mm != current->mm || kvm->vm_dead)
return -EIO;
switch (ioctl) {
#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment