Commit 1773014a authored by Paolo Bonzini's avatar Paolo Bonzini

Merge branch 'kvm-fixes' into HEAD

* fix latent bug in how usage of large pages is determined for
  confidential VMs

* fix "underline too short" in docs

* eliminate log spam from limited APIC timer periods

* disallow pre-faulting of memory before SEV-SNP VMs are initialized

* delay clearing and encrypting private memory until it is added to
  guest page tables

* this change also enables another small cleanup: the checks in
  SNP_LAUNCH_UPDATE that limit it to non-populated, private pages
  can now be moved in the common kvm_gmem_populate() function
parents 29b5bbf7 aca0ec97
......@@ -6368,7 +6368,7 @@ a single guest_memfd file, but the bound ranges must not overlap).
See KVM_SET_USER_MEMORY_REGION2 for additional details.
4.143 KVM_PRE_FAULT_MEMORY
------------------------
---------------------------
:Capability: KVM_CAP_PRE_FAULT_MEMORY
:Architectures: none
......@@ -6405,6 +6405,12 @@ for the current vCPU state. KVM maps memory as if the vCPU generated a
stage-2 read page fault, e.g. faults in memory as needed, but doesn't break
CoW. However, KVM does not mark any newly created stage-2 PTE as Accessed.
In the case of confidential VM types where there is an initial set up of
private guest memory before the guest is 'finalized'/measured, this ioctl
should only be issued after completing all the necessary setup to put the
guest into a 'finalized' state so that the above semantics can be reliably
ensured.
In some cases, multiple vCPUs might share the page tables. In this
case, the ioctl can be called in parallel.
......
......@@ -1305,6 +1305,7 @@ struct kvm_arch {
u8 vm_type;
bool has_private_mem;
bool has_protected_state;
bool pre_fault_allowed;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
struct list_head active_mmu_pages;
struct list_head zapped_obsolete_pages;
......
......@@ -141,8 +141,8 @@ config KVM_AMD_SEV
depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m)
select ARCH_HAS_CC_PLATFORM
select KVM_GENERIC_PRIVATE_MEM
select HAVE_KVM_GMEM_PREPARE
select HAVE_KVM_GMEM_INVALIDATE
select HAVE_KVM_ARCH_GMEM_PREPARE
select HAVE_KVM_ARCH_GMEM_INVALIDATE
help
Provides support for launching Encrypted VMs (SEV) and Encrypted VMs
with Encrypted State (SEV-ES) on AMD processors.
......
......@@ -1743,7 +1743,7 @@ static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
s64 min_period = min_timer_period_us * 1000LL;
if (apic->lapic_timer.period < min_period) {
pr_info_ratelimited(
pr_info_once(
"vcpu %i: requested %lld ns "
"lapic timer period limited to %lld ns\n",
apic->vcpu->vcpu_id,
......
......@@ -4335,7 +4335,7 @@ static u8 kvm_max_private_mapping_level(struct kvm *kvm, kvm_pfn_t pfn,
if (req_max_level)
max_level = min(max_level, req_max_level);
return req_max_level;
return max_level;
}
static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu,
......@@ -4743,6 +4743,9 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
u64 end;
int r;
if (!vcpu->kvm->arch.pre_fault_allowed)
return -EOPNOTSUPP;
/*
* reload is efficient when called repeatedly, so we can do it on
* every iteration.
......@@ -7510,7 +7513,7 @@ static bool hugepage_has_attrs(struct kvm *kvm, struct kvm_memory_slot *slot,
const unsigned long end = start + KVM_PAGES_PER_HPAGE(level);
if (level == PG_LEVEL_2M)
return kvm_range_has_memory_attributes(kvm, start, end, attrs);
return kvm_range_has_memory_attributes(kvm, start, end, ~0, attrs);
for (gfn = start; gfn < end; gfn += KVM_PAGES_PER_HPAGE(level - 1)) {
if (hugepage_test_mixed(slot, gfn, level - 1) ||
......
......@@ -2279,18 +2279,11 @@ static int sev_gmem_post_populate(struct kvm *kvm, gfn_t gfn_start, kvm_pfn_t pf
bool assigned;
int level;
if (!kvm_mem_is_private(kvm, gfn)) {
pr_debug("%s: Failed to ensure GFN 0x%llx has private memory attribute set\n",
__func__, gfn);
ret = -EINVAL;
goto err;
}
ret = snp_lookup_rmpentry((u64)pfn + i, &assigned, &level);
if (ret || assigned) {
pr_debug("%s: Failed to ensure GFN 0x%llx RMP entry is initial shared state, ret: %d assigned: %d\n",
__func__, gfn, ret, assigned);
ret = -EINVAL;
ret = ret ? -EINVAL : -EEXIST;
goto err;
}
......@@ -2549,6 +2542,14 @@ static int snp_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
data->gctx_paddr = __psp_pa(sev->snp_context);
ret = sev_issue_cmd(kvm, SEV_CMD_SNP_LAUNCH_FINISH, data, &argp->error);
/*
* Now that there will be no more SNP_LAUNCH_UPDATE ioctls, private pages
* can be given to the guest simply by marking the RMP entry as private.
* This can happen on first access and also with KVM_PRE_FAULT_MEMORY.
*/
if (!ret)
kvm->arch.pre_fault_allowed = true;
kfree(id_auth);
e_free_id_block:
......
......@@ -4949,6 +4949,7 @@ static int svm_vm_init(struct kvm *kvm)
to_kvm_sev_info(kvm)->need_init = true;
kvm->arch.has_private_mem = (type == KVM_X86_SNP_VM);
kvm->arch.pre_fault_allowed = !kvm->arch.has_private_mem;
}
if (!pause_filter_count || !pause_filter_thresh)
......
......@@ -12646,6 +12646,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.vm_type = type;
kvm->arch.has_private_mem =
(type == KVM_X86_SW_PROTECTED_VM);
/* Decided by the vendor code for other VM types. */
kvm->arch.pre_fault_allowed =
type == KVM_X86_DEFAULT_VM || type == KVM_X86_SW_PROTECTED_VM;
ret = kvm_page_track_init(kvm);
if (ret)
......@@ -13641,19 +13644,14 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_arch_no_poll);
#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE
bool kvm_arch_gmem_prepare_needed(struct kvm *kvm)
{
return kvm->arch.vm_type == KVM_X86_SNP_VM;
}
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE
int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order)
{
return kvm_x86_call(gmem_prepare)(kvm, pfn, gfn, max_order);
}
#endif
#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end)
{
kvm_x86_call(gmem_invalidate)(start, end);
......
......@@ -2414,7 +2414,7 @@ static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn
}
bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
unsigned long attrs);
unsigned long mask, unsigned long attrs);
bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm,
struct kvm_gfn_range *range);
bool kvm_arch_post_set_memory_attributes(struct kvm *kvm,
......@@ -2445,11 +2445,11 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm,
}
#endif /* CONFIG_KVM_PRIVATE_MEM */
#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE
int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order);
bool kvm_arch_gmem_prepare_needed(struct kvm *kvm);
#endif
#ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM
/**
* kvm_gmem_populate() - Populate/prepare a GPA range with guest data
*
......@@ -2476,8 +2476,9 @@ typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages,
kvm_gmem_populate_cb post_populate, void *opaque);
#endif
#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end);
#endif
......
......@@ -113,10 +113,10 @@ config KVM_GENERIC_PRIVATE_MEM
select KVM_PRIVATE_MEM
bool
config HAVE_KVM_GMEM_PREPARE
config HAVE_KVM_ARCH_GMEM_PREPARE
bool
depends on KVM_PRIVATE_MEM
config HAVE_KVM_GMEM_INVALIDATE
config HAVE_KVM_ARCH_GMEM_INVALIDATE
bool
depends on KVM_PRIVATE_MEM
......@@ -13,84 +13,93 @@ struct kvm_gmem {
struct list_head entry;
};
static int kvm_gmem_prepare_folio(struct inode *inode, pgoff_t index, struct folio *folio)
/**
* folio_file_pfn - like folio_file_page, but return a pfn.
* @folio: The folio which contains this index.
* @index: The index we want to look up.
*
* Return: The pfn for this index.
*/
static inline kvm_pfn_t folio_file_pfn(struct folio *folio, pgoff_t index)
{
#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE
struct list_head *gmem_list = &inode->i_mapping->i_private_list;
struct kvm_gmem *gmem;
list_for_each_entry(gmem, gmem_list, entry) {
struct kvm_memory_slot *slot;
struct kvm *kvm = gmem->kvm;
struct page *page;
kvm_pfn_t pfn;
gfn_t gfn;
int rc;
if (!kvm_arch_gmem_prepare_needed(kvm))
continue;
slot = xa_load(&gmem->bindings, index);
if (!slot)
continue;
return folio_pfn(folio) + (index & (folio_nr_pages(folio) - 1));
}
page = folio_file_page(folio, index);
pfn = page_to_pfn(page);
gfn = slot->base_gfn + index - slot->gmem.pgoff;
rc = kvm_arch_gmem_prepare(kvm, gfn, pfn, compound_order(compound_head(page)));
static int __kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
pgoff_t index, struct folio *folio)
{
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE
kvm_pfn_t pfn = folio_file_pfn(folio, index);
gfn_t gfn = slot->base_gfn + index - slot->gmem.pgoff;
int rc = kvm_arch_gmem_prepare(kvm, gfn, pfn, folio_order(folio));
if (rc) {
pr_warn_ratelimited("gmem: Failed to prepare folio for index %lx GFN %llx PFN %llx error %d.\n",
index, gfn, pfn, rc);
return rc;
}
}
#endif
return 0;
}
static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, bool prepare)
static inline void kvm_gmem_mark_prepared(struct folio *folio)
{
struct folio *folio;
/* TODO: Support huge pages. */
folio = filemap_grab_folio(inode->i_mapping, index);
if (IS_ERR(folio))
return folio;
folio_mark_uptodate(folio);
}
/*
* Use the up-to-date flag to track whether or not the memory has been
* zeroed before being handed off to the guest. There is no backing
* storage for the memory, so the folio will remain up-to-date until
* it's removed.
*
* TODO: Skip clearing pages when trusted firmware will do it when
* assigning memory to the guest.
/*
* Process @folio, which contains @gfn, so that the guest can use it.
* The folio must be locked and the gfn must be contained in @slot.
* On successful return the guest sees a zero page so as to avoid
* leaking host data and the up-to-date flag is set.
*/
if (!folio_test_uptodate(folio)) {
unsigned long nr_pages = folio_nr_pages(folio);
unsigned long i;
static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
gfn_t gfn, struct folio *folio)
{
unsigned long nr_pages, i;
pgoff_t index;
int r;
nr_pages = folio_nr_pages(folio);
for (i = 0; i < nr_pages; i++)
clear_highpage(folio_page(folio, i));
folio_mark_uptodate(folio);
}
/*
* Preparing huge folios should always be safe, since it should
* be possible to split them later if needed.
*
* Right now the folio order is always going to be zero, but the
* code is ready for huge folios. The only assumption is that
* the base pgoff of memslots is naturally aligned with the
* requested page order, ensuring that huge folios can also use
* huge page table entries for GPA->HPA mapping.
*
* The order will be passed when creating the guest_memfd, and
* checked when creating memslots.
*/
WARN_ON(!IS_ALIGNED(slot->gmem.pgoff, 1 << folio_order(folio)));
index = gfn - slot->base_gfn + slot->gmem.pgoff;
index = ALIGN_DOWN(index, 1 << folio_order(folio));
r = __kvm_gmem_prepare_folio(kvm, slot, index, folio);
if (!r)
kvm_gmem_mark_prepared(folio);
if (prepare) {
int r = kvm_gmem_prepare_folio(inode, index, folio);
if (r < 0) {
folio_unlock(folio);
folio_put(folio);
return ERR_PTR(r);
}
}
return r;
}
/*
/*
* Returns a locked folio on success. The caller is responsible for
* setting the up-to-date flag before the memory is mapped into the guest.
* There is no backing storage for the memory, so the folio will remain
* up-to-date until it's removed.
*
* Ignore accessed, referenced, and dirty flags. The memory is
* unevictable and there is no storage to write back to.
*/
return folio;
static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
{
/* TODO: Support huge pages. */
return filemap_grab_folio(inode->i_mapping, index);
}
static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start,
......@@ -190,7 +199,7 @@ static long kvm_gmem_allocate(struct inode *inode, loff_t offset, loff_t len)
break;
}
folio = kvm_gmem_get_folio(inode, index, true);
folio = kvm_gmem_get_folio(inode, index);
if (IS_ERR(folio)) {
r = PTR_ERR(folio);
break;
......@@ -343,7 +352,7 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol
return MF_DELAYED;
}
#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
static void kvm_gmem_free_folio(struct folio *folio)
{
struct page *page = folio_page(folio, 0);
......@@ -358,7 +367,7 @@ static const struct address_space_operations kvm_gmem_aops = {
.dirty_folio = noop_dirty_folio,
.migrate_folio = kvm_gmem_migrate_folio,
.error_remove_folio = kvm_gmem_error_folio,
#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
.free_folio = kvm_gmem_free_folio,
#endif
};
......@@ -541,64 +550,76 @@ void kvm_gmem_unbind(struct kvm_memory_slot *slot)
fput(file);
}
static int __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot,
gfn_t gfn, kvm_pfn_t *pfn, int *max_order, bool prepare)
/* Returns a locked folio on success. */
static struct folio *
__kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot,
gfn_t gfn, kvm_pfn_t *pfn, bool *is_prepared,
int *max_order)
{
pgoff_t index = gfn - slot->base_gfn + slot->gmem.pgoff;
struct kvm_gmem *gmem = file->private_data;
struct folio *folio;
struct page *page;
int r;
if (file != slot->gmem.file) {
WARN_ON_ONCE(slot->gmem.file);
return -EFAULT;
return ERR_PTR(-EFAULT);
}
gmem = file->private_data;
if (xa_load(&gmem->bindings, index) != slot) {
WARN_ON_ONCE(xa_load(&gmem->bindings, index));
return -EIO;
return ERR_PTR(-EIO);
}
folio = kvm_gmem_get_folio(file_inode(file), index, prepare);
folio = kvm_gmem_get_folio(file_inode(file), index);
if (IS_ERR(folio))
return PTR_ERR(folio);
return folio;
if (folio_test_hwpoison(folio)) {
folio_unlock(folio);
folio_put(folio);
return -EHWPOISON;
return ERR_PTR(-EHWPOISON);
}
page = folio_file_page(folio, index);
*pfn = page_to_pfn(page);
*pfn = folio_file_pfn(folio, index);
if (max_order)
*max_order = 0;
r = 0;
folio_unlock(folio);
return r;
*is_prepared = folio_test_uptodate(folio);
return folio;
}
int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
gfn_t gfn, kvm_pfn_t *pfn, int *max_order)
{
struct file *file = kvm_gmem_get_file(slot);
int r;
struct folio *folio;
bool is_prepared = false;
int r = 0;
if (!file)
return -EFAULT;
r = __kvm_gmem_get_pfn(file, slot, gfn, pfn, max_order, true);
folio = __kvm_gmem_get_pfn(file, slot, gfn, pfn, &is_prepared, max_order);
if (IS_ERR(folio)) {
r = PTR_ERR(folio);
goto out;
}
if (!is_prepared)
r = kvm_gmem_prepare_folio(kvm, slot, gfn, folio);
folio_unlock(folio);
if (r < 0)
folio_put(folio);
out:
fput(file);
return r;
}
EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn);
#ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM
long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages,
kvm_gmem_populate_cb post_populate, void *opaque)
{
......@@ -625,7 +646,9 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long
npages = min_t(ulong, slot->npages - (start_gfn - slot->base_gfn), npages);
for (i = 0; i < npages; i += (1 << max_order)) {
struct folio *folio;
gfn_t gfn = start_gfn + i;
bool is_prepared = false;
kvm_pfn_t pfn;
if (signal_pending(current)) {
......@@ -633,18 +656,39 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long
break;
}
ret = __kvm_gmem_get_pfn(file, slot, gfn, &pfn, &max_order, false);
if (ret)
folio = __kvm_gmem_get_pfn(file, slot, gfn, &pfn, &is_prepared, &max_order);
if (IS_ERR(folio)) {
ret = PTR_ERR(folio);
break;
}
if (!IS_ALIGNED(gfn, (1 << max_order)) ||
(npages - i) < (1 << max_order))
max_order = 0;
if (is_prepared) {
folio_unlock(folio);
folio_put(folio);
ret = -EEXIST;
break;
}
folio_unlock(folio);
WARN_ON(!IS_ALIGNED(gfn, 1 << max_order) ||
(npages - i) < (1 << max_order));
ret = -EINVAL;
while (!kvm_range_has_memory_attributes(kvm, gfn, gfn + (1 << max_order),
KVM_MEMORY_ATTRIBUTE_PRIVATE,
KVM_MEMORY_ATTRIBUTE_PRIVATE)) {
if (!max_order)
goto put_folio_and_exit;
max_order--;
}
p = src ? src + i * PAGE_SIZE : NULL;
ret = post_populate(kvm, gfn, pfn, p, max_order, opaque);
if (!ret)
kvm_gmem_mark_prepared(folio);
put_page(pfn_to_page(pfn));
put_folio_and_exit:
folio_put(folio);
if (ret)
break;
}
......@@ -655,3 +699,4 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long
return ret && !i ? ret : i;
}
EXPORT_SYMBOL_GPL(kvm_gmem_populate);
#endif
......@@ -2398,48 +2398,47 @@ static int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm,
#endif /* CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */
#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
static u64 kvm_supported_mem_attributes(struct kvm *kvm)
{
if (!kvm || kvm_arch_has_private_mem(kvm))
return KVM_MEMORY_ATTRIBUTE_PRIVATE;
return 0;
}
/*
* Returns true if _all_ gfns in the range [@start, @end) have attributes
* matching @attrs.
* such that the bits in @mask match @attrs.
*/
bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
unsigned long attrs)
unsigned long mask, unsigned long attrs)
{
XA_STATE(xas, &kvm->mem_attr_array, start);
unsigned long index;
bool has_attrs;
void *entry;
rcu_read_lock();
mask &= kvm_supported_mem_attributes(kvm);
if (attrs & ~mask)
return false;
if (!attrs) {
has_attrs = !xas_find(&xas, end - 1);
goto out;
}
if (end == start + 1)
return (kvm_get_memory_attributes(kvm, start) & mask) == attrs;
guard(rcu)();
if (!attrs)
return !xas_find(&xas, end - 1);
has_attrs = true;
for (index = start; index < end; index++) {
do {
entry = xas_next(&xas);
} while (xas_retry(&xas, entry));
if (xas.xa_index != index || xa_to_value(entry) != attrs) {
has_attrs = false;
break;
}
if (xas.xa_index != index ||
(xa_to_value(entry) & mask) != attrs)
return false;
}
out:
rcu_read_unlock();
return has_attrs;
}
static u64 kvm_supported_mem_attributes(struct kvm *kvm)
{
if (!kvm || kvm_arch_has_private_mem(kvm))
return KVM_MEMORY_ATTRIBUTE_PRIVATE;
return 0;
return true;
}
static __always_inline void kvm_handle_gfn_range(struct kvm *kvm,
......@@ -2534,7 +2533,7 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
mutex_lock(&kvm->slots_lock);
/* Nothing to do if the entire range as the desired attributes. */
if (kvm_range_has_memory_attributes(kvm, start, end, attributes))
if (kvm_range_has_memory_attributes(kvm, start, end, ~0, attributes))
goto out_unlock;
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment