Commit 5932ca41 authored by Paolo Bonzini's avatar Paolo Bonzini

KVM: x86: disallow pre-fault for SNP VMs before initialization

KVM_PRE_FAULT_MEMORY for an SNP guest can race with
sev_gmem_post_populate() in bad ways. The following sequence for
instance can potentially trigger an RMP fault:

  thread A, sev_gmem_post_populate: called
  thread B, sev_gmem_prepare: places below 'pfn' in a private state in RMP
  thread A, sev_gmem_post_populate: *vaddr = kmap_local_pfn(pfn + i);
  thread A, sev_gmem_post_populate: copy_from_user(vaddr, src + i * PAGE_SIZE, PAGE_SIZE);
  RMP #PF

Fix this by only allowing KVM_PRE_FAULT_MEMORY to run after a guest's
initial private memory contents have been finalized via
KVM_SEV_SNP_LAUNCH_FINISH.

Beyond fixing this issue, it just sort of makes sense to enforce this,
since the KVM_PRE_FAULT_MEMORY documentation states:

  "KVM maps memory as if the vCPU generated a stage-2 read page fault"

which sort of implies we should be acting on the same guest state that a
vCPU would see post-launch after the initial guest memory is all set up.
Co-developed-by: default avatarMichael Roth <michael.roth@amd.com>
Signed-off-by: default avatarMichael Roth <michael.roth@amd.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent c2adcf05
...@@ -6402,6 +6402,12 @@ for the current vCPU state. KVM maps memory as if the vCPU generated a ...@@ -6402,6 +6402,12 @@ for the current vCPU state. KVM maps memory as if the vCPU generated a
stage-2 read page fault, e.g. faults in memory as needed, but doesn't break stage-2 read page fault, e.g. faults in memory as needed, but doesn't break
CoW. However, KVM does not mark any newly created stage-2 PTE as Accessed. CoW. However, KVM does not mark any newly created stage-2 PTE as Accessed.
In the case of confidential VM types where there is an initial set up of
private guest memory before the guest is 'finalized'/measured, this ioctl
should only be issued after completing all the necessary setup to put the
guest into a 'finalized' state so that the above semantics can be reliably
ensured.
In some cases, multiple vCPUs might share the page tables. In this In some cases, multiple vCPUs might share the page tables. In this
case, the ioctl can be called in parallel. case, the ioctl can be called in parallel.
......
...@@ -1305,6 +1305,7 @@ struct kvm_arch { ...@@ -1305,6 +1305,7 @@ struct kvm_arch {
u8 vm_type; u8 vm_type;
bool has_private_mem; bool has_private_mem;
bool has_protected_state; bool has_protected_state;
bool pre_fault_allowed;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
struct list_head active_mmu_pages; struct list_head active_mmu_pages;
struct list_head zapped_obsolete_pages; struct list_head zapped_obsolete_pages;
......
...@@ -4743,6 +4743,9 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, ...@@ -4743,6 +4743,9 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
u64 end; u64 end;
int r; int r;
if (!vcpu->kvm->arch.pre_fault_allowed)
return -EOPNOTSUPP;
/* /*
* reload is efficient when called repeatedly, so we can do it on * reload is efficient when called repeatedly, so we can do it on
* every iteration. * every iteration.
......
...@@ -2549,6 +2549,14 @@ static int snp_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) ...@@ -2549,6 +2549,14 @@ static int snp_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
data->gctx_paddr = __psp_pa(sev->snp_context); data->gctx_paddr = __psp_pa(sev->snp_context);
ret = sev_issue_cmd(kvm, SEV_CMD_SNP_LAUNCH_FINISH, data, &argp->error); ret = sev_issue_cmd(kvm, SEV_CMD_SNP_LAUNCH_FINISH, data, &argp->error);
/*
* Now that there will be no more SNP_LAUNCH_UPDATE ioctls, private pages
* can be given to the guest simply by marking the RMP entry as private.
* This can happen on first access and also with KVM_PRE_FAULT_MEMORY.
*/
if (!ret)
kvm->arch.pre_fault_allowed = true;
kfree(id_auth); kfree(id_auth);
e_free_id_block: e_free_id_block:
......
...@@ -4949,6 +4949,7 @@ static int svm_vm_init(struct kvm *kvm) ...@@ -4949,6 +4949,7 @@ static int svm_vm_init(struct kvm *kvm)
to_kvm_sev_info(kvm)->need_init = true; to_kvm_sev_info(kvm)->need_init = true;
kvm->arch.has_private_mem = (type == KVM_X86_SNP_VM); kvm->arch.has_private_mem = (type == KVM_X86_SNP_VM);
kvm->arch.pre_fault_allowed = !kvm->arch.has_private_mem;
} }
if (!pause_filter_count || !pause_filter_thresh) if (!pause_filter_count || !pause_filter_thresh)
......
...@@ -12646,6 +12646,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) ...@@ -12646,6 +12646,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.vm_type = type; kvm->arch.vm_type = type;
kvm->arch.has_private_mem = kvm->arch.has_private_mem =
(type == KVM_X86_SW_PROTECTED_VM); (type == KVM_X86_SW_PROTECTED_VM);
/* Decided by the vendor code for other VM types. */
kvm->arch.pre_fault_allowed =
type == KVM_X86_DEFAULT_VM || type == KVM_X86_SW_PROTECTED_VM;
ret = kvm_page_track_init(kvm); ret = kvm_page_track_init(kvm);
if (ret) if (ret)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment