Commit b9ecb9a9 authored by Paolo Bonzini's avatar Paolo Bonzini

Merge branch 'kvm-guest-sev-migration' into kvm-master

Add guest api and guest kernel support for SEV live migration.

Introduces a new hypercall to notify the host of changes to the page
encryption status.  If the page is encrypted then it must be migrated
through the SEV firmware or a helper VM sharing the key.  If page is
not encrypted then it can be migrated normally by userspace.  This new
hypercall is invoked using paravirt_ops.

Conflicts: sev_active() replaced by cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT).
parents debe436e 73f1b4fe
...@@ -83,6 +83,18 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, ...@@ -83,6 +83,18 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
return ret; return ret;
} }
static inline long kvm_sev_hypercall3(unsigned int nr, unsigned long p1,
unsigned long p2, unsigned long p3)
{
long ret;
asm volatile("vmmcall"
: "=a"(ret)
: "a"(nr), "b"(p1), "c"(p2), "d"(p3)
: "memory");
return ret;
}
#ifdef CONFIG_KVM_GUEST #ifdef CONFIG_KVM_GUEST
void kvmclock_init(void); void kvmclock_init(void);
void kvmclock_disable(void); void kvmclock_disable(void);
......
...@@ -44,6 +44,8 @@ void __init sme_enable(struct boot_params *bp); ...@@ -44,6 +44,8 @@ void __init sme_enable(struct boot_params *bp);
int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size); int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size);
int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size); int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages,
bool enc);
void __init mem_encrypt_free_decrypted_mem(void); void __init mem_encrypt_free_decrypted_mem(void);
...@@ -78,6 +80,8 @@ static inline int __init ...@@ -78,6 +80,8 @@ static inline int __init
early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0; } early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0; }
static inline int __init static inline int __init
early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; } early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; }
static inline void __init
early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) {}
static inline void mem_encrypt_free_decrypted_mem(void) { } static inline void mem_encrypt_free_decrypted_mem(void) { }
......
...@@ -97,6 +97,12 @@ static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) ...@@ -97,6 +97,12 @@ static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
PVOP_VCALL1(mmu.exit_mmap, mm); PVOP_VCALL1(mmu.exit_mmap, mm);
} }
static inline void notify_page_enc_status_changed(unsigned long pfn,
int npages, bool enc)
{
PVOP_VCALL3(mmu.notify_page_enc_status_changed, pfn, npages, enc);
}
#ifdef CONFIG_PARAVIRT_XXL #ifdef CONFIG_PARAVIRT_XXL
static inline void load_sp0(unsigned long sp0) static inline void load_sp0(unsigned long sp0)
{ {
......
...@@ -168,6 +168,7 @@ struct pv_mmu_ops { ...@@ -168,6 +168,7 @@ struct pv_mmu_ops {
/* Hook for intercepting the destruction of an mm_struct. */ /* Hook for intercepting the destruction of an mm_struct. */
void (*exit_mmap)(struct mm_struct *mm); void (*exit_mmap)(struct mm_struct *mm);
void (*notify_page_enc_status_changed)(unsigned long pfn, int npages, bool enc);
#ifdef CONFIG_PARAVIRT_XXL #ifdef CONFIG_PARAVIRT_XXL
struct paravirt_callee_save read_cr2; struct paravirt_callee_save read_cr2;
......
...@@ -83,6 +83,7 @@ int set_pages_rw(struct page *page, int numpages); ...@@ -83,6 +83,7 @@ int set_pages_rw(struct page *page, int numpages);
int set_direct_map_invalid_noflush(struct page *page); int set_direct_map_invalid_noflush(struct page *page);
int set_direct_map_default_noflush(struct page *page); int set_direct_map_default_noflush(struct page *page);
bool kernel_page_present(struct page *page); bool kernel_page_present(struct page *page);
void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc);
extern int kernel_set_to_readonly; extern int kernel_set_to_readonly;
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include <linux/swait.h> #include <linux/swait.h>
#include <linux/syscore_ops.h> #include <linux/syscore_ops.h>
#include <linux/cc_platform.h> #include <linux/cc_platform.h>
#include <linux/efi.h>
#include <asm/timer.h> #include <asm/timer.h>
#include <asm/cpu.h> #include <asm/cpu.h>
#include <asm/traps.h> #include <asm/traps.h>
...@@ -41,6 +42,7 @@ ...@@ -41,6 +42,7 @@
#include <asm/ptrace.h> #include <asm/ptrace.h>
#include <asm/reboot.h> #include <asm/reboot.h>
#include <asm/svm.h> #include <asm/svm.h>
#include <asm/e820/api.h>
DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled); DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
...@@ -434,6 +436,8 @@ static void kvm_guest_cpu_offline(bool shutdown) ...@@ -434,6 +436,8 @@ static void kvm_guest_cpu_offline(bool shutdown)
kvm_disable_steal_time(); kvm_disable_steal_time();
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
wrmsrl(MSR_KVM_PV_EOI_EN, 0); wrmsrl(MSR_KVM_PV_EOI_EN, 0);
if (kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL))
wrmsrl(MSR_KVM_MIGRATION_CONTROL, 0);
kvm_pv_disable_apf(); kvm_pv_disable_apf();
if (!shutdown) if (!shutdown)
apf_task_wake_all(); apf_task_wake_all();
...@@ -548,6 +552,55 @@ static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector) ...@@ -548,6 +552,55 @@ static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
__send_ipi_mask(local_mask, vector); __send_ipi_mask(local_mask, vector);
} }
static int __init setup_efi_kvm_sev_migration(void)
{
efi_char16_t efi_sev_live_migration_enabled[] = L"SevLiveMigrationEnabled";
efi_guid_t efi_variable_guid = AMD_SEV_MEM_ENCRYPT_GUID;
efi_status_t status;
unsigned long size;
bool enabled;
if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) ||
!kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL))
return 0;
if (!efi_enabled(EFI_BOOT))
return 0;
if (!efi_enabled(EFI_RUNTIME_SERVICES)) {
pr_info("%s : EFI runtime services are not enabled\n", __func__);
return 0;
}
size = sizeof(enabled);
/* Get variable contents into buffer */
status = efi.get_variable(efi_sev_live_migration_enabled,
&efi_variable_guid, NULL, &size, &enabled);
if (status == EFI_NOT_FOUND) {
pr_info("%s : EFI live migration variable not found\n", __func__);
return 0;
}
if (status != EFI_SUCCESS) {
pr_info("%s : EFI variable retrieval failed\n", __func__);
return 0;
}
if (enabled == 0) {
pr_info("%s: live migration disabled in EFI\n", __func__);
return 0;
}
pr_info("%s : live migration enabled in EFI\n", __func__);
wrmsrl(MSR_KVM_MIGRATION_CONTROL, KVM_MIGRATION_READY);
return 1;
}
late_initcall(setup_efi_kvm_sev_migration);
/* /*
* Set the IPI entry points * Set the IPI entry points
*/ */
...@@ -806,8 +859,62 @@ static bool __init kvm_msi_ext_dest_id(void) ...@@ -806,8 +859,62 @@ static bool __init kvm_msi_ext_dest_id(void)
return kvm_para_has_feature(KVM_FEATURE_MSI_EXT_DEST_ID); return kvm_para_has_feature(KVM_FEATURE_MSI_EXT_DEST_ID);
} }
static void kvm_sev_hc_page_enc_status(unsigned long pfn, int npages, bool enc)
{
kvm_sev_hypercall3(KVM_HC_MAP_GPA_RANGE, pfn << PAGE_SHIFT, npages,
KVM_MAP_GPA_RANGE_ENC_STAT(enc) | KVM_MAP_GPA_RANGE_PAGE_SZ_4K);
}
static void __init kvm_init_platform(void) static void __init kvm_init_platform(void)
{ {
if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) &&
kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL)) {
unsigned long nr_pages;
int i;
pv_ops.mmu.notify_page_enc_status_changed =
kvm_sev_hc_page_enc_status;
/*
* Reset the host's shared pages list related to kernel
* specific page encryption status settings before we load a
* new kernel by kexec. Reset the page encryption status
* during early boot intead of just before kexec to avoid SMP
* races during kvm_pv_guest_cpu_reboot().
* NOTE: We cannot reset the complete shared pages list
* here as we need to retain the UEFI/OVMF firmware
* specific settings.
*/
for (i = 0; i < e820_table->nr_entries; i++) {
struct e820_entry *entry = &e820_table->entries[i];
if (entry->type != E820_TYPE_RAM)
continue;
nr_pages = DIV_ROUND_UP(entry->size, PAGE_SIZE);
kvm_sev_hypercall3(KVM_HC_MAP_GPA_RANGE, entry->addr,
nr_pages,
KVM_MAP_GPA_RANGE_ENCRYPTED | KVM_MAP_GPA_RANGE_PAGE_SZ_4K);
}
/*
* Ensure that _bss_decrypted section is marked as decrypted in the
* shared pages list.
*/
nr_pages = DIV_ROUND_UP(__end_bss_decrypted - __start_bss_decrypted,
PAGE_SIZE);
early_set_mem_enc_dec_hypercall((unsigned long)__start_bss_decrypted,
nr_pages, 0);
/*
* If not booted using EFI, enable Live migration support.
*/
if (!efi_enabled(EFI_BOOT))
wrmsrl(MSR_KVM_MIGRATION_CONTROL,
KVM_MIGRATION_READY);
}
kvmclock_init(); kvmclock_init();
x86_platform.apic_post_init = kvm_apic_init; x86_platform.apic_post_init = kvm_apic_init;
} }
......
...@@ -337,6 +337,7 @@ struct paravirt_patch_template pv_ops = { ...@@ -337,6 +337,7 @@ struct paravirt_patch_template pv_ops = {
(void (*)(struct mmu_gather *, void *))tlb_remove_page, (void (*)(struct mmu_gather *, void *))tlb_remove_page,
.mmu.exit_mmap = paravirt_nop, .mmu.exit_mmap = paravirt_nop,
.mmu.notify_page_enc_status_changed = paravirt_nop,
#ifdef CONFIG_PARAVIRT_XXL #ifdef CONFIG_PARAVIRT_XXL
.mmu.read_cr2 = __PV_IS_CALLEE_SAVE(pv_native_read_cr2), .mmu.read_cr2 = __PV_IS_CALLEE_SAVE(pv_native_read_cr2),
......
...@@ -229,29 +229,76 @@ void __init sev_setup_arch(void) ...@@ -229,29 +229,76 @@ void __init sev_setup_arch(void)
swiotlb_adjust_size(size); swiotlb_adjust_size(size);
} }
static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot)
{ {
pgprot_t old_prot, new_prot; unsigned long pfn = 0;
unsigned long pfn, pa, size; pgprot_t prot;
pte_t new_pte;
switch (level) { switch (level) {
case PG_LEVEL_4K: case PG_LEVEL_4K:
pfn = pte_pfn(*kpte); pfn = pte_pfn(*kpte);
old_prot = pte_pgprot(*kpte); prot = pte_pgprot(*kpte);
break; break;
case PG_LEVEL_2M: case PG_LEVEL_2M:
pfn = pmd_pfn(*(pmd_t *)kpte); pfn = pmd_pfn(*(pmd_t *)kpte);
old_prot = pmd_pgprot(*(pmd_t *)kpte); prot = pmd_pgprot(*(pmd_t *)kpte);
break; break;
case PG_LEVEL_1G: case PG_LEVEL_1G:
pfn = pud_pfn(*(pud_t *)kpte); pfn = pud_pfn(*(pud_t *)kpte);
old_prot = pud_pgprot(*(pud_t *)kpte); prot = pud_pgprot(*(pud_t *)kpte);
break; break;
default: default:
WARN_ONCE(1, "Invalid level for kpte\n");
return 0;
}
if (ret_prot)
*ret_prot = prot;
return pfn;
}
void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc)
{
#ifdef CONFIG_PARAVIRT
unsigned long sz = npages << PAGE_SHIFT;
unsigned long vaddr_end = vaddr + sz;
while (vaddr < vaddr_end) {
int psize, pmask, level;
unsigned long pfn;
pte_t *kpte;
kpte = lookup_address(vaddr, &level);
if (!kpte || pte_none(*kpte)) {
WARN_ONCE(1, "kpte lookup for vaddr\n");
return; return;
} }
pfn = pg_level_to_pfn(level, kpte, NULL);
if (!pfn)
continue;
psize = page_level_size(level);
pmask = page_level_mask(level);
notify_page_enc_status_changed(pfn, psize >> PAGE_SHIFT, enc);
vaddr = (vaddr & pmask) + psize;
}
#endif
}
static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
{
pgprot_t old_prot, new_prot;
unsigned long pfn, pa, size;
pte_t new_pte;
pfn = pg_level_to_pfn(level, kpte, &old_prot);
if (!pfn)
return;
new_prot = old_prot; new_prot = old_prot;
if (enc) if (enc)
pgprot_val(new_prot) |= _PAGE_ENC; pgprot_val(new_prot) |= _PAGE_ENC;
...@@ -286,12 +333,13 @@ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) ...@@ -286,12 +333,13 @@ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
static int __init early_set_memory_enc_dec(unsigned long vaddr, static int __init early_set_memory_enc_dec(unsigned long vaddr,
unsigned long size, bool enc) unsigned long size, bool enc)
{ {
unsigned long vaddr_end, vaddr_next; unsigned long vaddr_end, vaddr_next, start;
unsigned long psize, pmask; unsigned long psize, pmask;
int split_page_size_mask; int split_page_size_mask;
int level, ret; int level, ret;
pte_t *kpte; pte_t *kpte;
start = vaddr;
vaddr_next = vaddr; vaddr_next = vaddr;
vaddr_end = vaddr + size; vaddr_end = vaddr + size;
...@@ -346,6 +394,7 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr, ...@@ -346,6 +394,7 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr,
ret = 0; ret = 0;
notify_range_enc_status_changed(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc);
out: out:
__flush_tlb_all(); __flush_tlb_all();
return ret; return ret;
...@@ -361,6 +410,11 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size) ...@@ -361,6 +410,11 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size)
return early_set_memory_enc_dec(vaddr, size, true); return early_set_memory_enc_dec(vaddr, size, true);
} }
void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc)
{
notify_range_enc_status_changed(vaddr, npages, enc);
}
/* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */ /* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */
bool force_dma_unencrypted(struct device *dev) bool force_dma_unencrypted(struct device *dev)
{ {
......
...@@ -2023,6 +2023,12 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) ...@@ -2023,6 +2023,12 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
*/ */
cpa_flush(&cpa, 0); cpa_flush(&cpa, 0);
/*
* Notify hypervisor that a given memory range is mapped encrypted
* or decrypted.
*/
notify_range_enc_status_changed(addr, numpages, enc);
return ret; return ret;
} }
......
...@@ -362,6 +362,7 @@ void efi_native_runtime_setup(void); ...@@ -362,6 +362,7 @@ void efi_native_runtime_setup(void);
/* OEM GUIDs */ /* OEM GUIDs */
#define DELLEMC_EFI_RCI2_TABLE_GUID EFI_GUID(0x2d9f28a2, 0xa886, 0x456a, 0x97, 0xa8, 0xf1, 0x1e, 0xf2, 0x4f, 0xf4, 0x55) #define DELLEMC_EFI_RCI2_TABLE_GUID EFI_GUID(0x2d9f28a2, 0xa886, 0x456a, 0x97, 0xa8, 0xf1, 0x1e, 0xf2, 0x4f, 0xf4, 0x55)
#define AMD_SEV_MEM_ENCRYPT_GUID EFI_GUID(0x0cf29b71, 0x9e51, 0x433a, 0xa3, 0xb7, 0x81, 0xf3, 0xab, 0x16, 0xb8, 0x75)
typedef struct { typedef struct {
efi_guid_t guid; efi_guid_t guid;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment