Commit 0b90c563 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'hyperv-next-signed-20230902' of...

Merge tag 'hyperv-next-signed-20230902' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux

Pull hyperv updates from Wei Liu:

 - Support for SEV-SNP guests on Hyper-V (Tianyu Lan)

 - Support for TDX guests on Hyper-V (Dexuan Cui)

 - Use SBRM API in Hyper-V balloon driver (Mitchell Levy)

 - Avoid dereferencing ACPI root object handle in VMBus driver (Maciej
   Szmigiero)

 - A few misecllaneous fixes (Jiapeng Chong, Nathan Chancellor, Saurabh
   Sengar)

* tag 'hyperv-next-signed-20230902' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux: (24 commits)
  x86/hyperv: Remove duplicate include
  x86/hyperv: Move the code in ivm.c around to avoid unnecessary ifdef's
  x86/hyperv: Remove hv_isolation_type_en_snp
  x86/hyperv: Use TDX GHCI to access some MSRs in a TDX VM with the paravisor
  Drivers: hv: vmbus: Bring the post_msg_page back for TDX VMs with the paravisor
  x86/hyperv: Introduce a global variable hyperv_paravisor_present
  Drivers: hv: vmbus: Support >64 VPs for a fully enlightened TDX/SNP VM
  x86/hyperv: Fix serial console interrupts for fully enlightened TDX guests
  Drivers: hv: vmbus: Support fully enlightened TDX guests
  x86/hyperv: Support hypercalls for fully enlightened TDX guests
  x86/hyperv: Add hv_isolation_type_tdx() to detect TDX guests
  x86/hyperv: Fix undefined reference to isolation_type_en_snp without CONFIG_HYPERV
  x86/hyperv: Add missing 'inline' to hv_snp_boot_ap() stub
  hv: hyperv.h: Replace one-element array with flexible-array member
  Drivers: hv: vmbus: Don't dereference ACPI root object handle
  x86/hyperv: Add hyperv-specific handling for VMMCALL under SEV-ES
  x86/hyperv: Add smp support for SEV-SNP guest
  clocksource: hyper-v: Mark hyperv tsc page unencrypted in sev-snp enlightened guest
  x86/hyperv: Use vmmcall to implement Hyper-V hypercall in sev-snp enlightened guest
  drivers: hv: Mark percpu hvcall input arg page unencrypted in SEV-SNP enlightened guest
  ...
parents e4f1b820 284930a0
......@@ -175,8 +175,11 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector,
(exclude_self && weight == 1 && cpumask_test_cpu(this_cpu, mask)))
return true;
if (!hv_hypercall_pg)
return false;
/* A fully enlightened TDX VM uses GHCI rather than hv_hypercall_pg. */
if (!hv_hypercall_pg) {
if (ms_hyperv.paravisor_present || !hv_isolation_type_tdx())
return false;
}
if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
return false;
......@@ -229,9 +232,15 @@ static bool __send_ipi_one(int cpu, int vector)
trace_hyperv_send_ipi_one(cpu, vector);
if (!hv_hypercall_pg || (vp == VP_INVAL))
if (vp == VP_INVAL)
return false;
/* A fully enlightened TDX VM uses GHCI rather than hv_hypercall_pg. */
if (!hv_hypercall_pg) {
if (ms_hyperv.paravisor_present || !hv_isolation_type_tdx())
return false;
}
if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
return false;
......
......@@ -19,6 +19,7 @@
#include <asm/hyperv-tlfs.h>
#include <asm/mshyperv.h>
#include <asm/idtentry.h>
#include <asm/set_memory.h>
#include <linux/kexec.h>
#include <linux/version.h>
#include <linux/vmalloc.h>
......@@ -52,7 +53,7 @@ static int hyperv_init_ghcb(void)
void *ghcb_va;
void **ghcb_base;
if (!hv_isolation_type_snp())
if (!ms_hyperv.paravisor_present || !hv_isolation_type_snp())
return 0;
if (!hv_ghcb_pg)
......@@ -80,7 +81,7 @@ static int hyperv_init_ghcb(void)
static int hv_cpu_init(unsigned int cpu)
{
union hv_vp_assist_msr_contents msr = { 0 };
struct hv_vp_assist_page **hvp = &hv_vp_assist_page[cpu];
struct hv_vp_assist_page **hvp;
int ret;
ret = hv_common_cpu_init(cpu);
......@@ -90,6 +91,7 @@ static int hv_cpu_init(unsigned int cpu)
if (!hv_vp_assist_page)
return 0;
hvp = &hv_vp_assist_page[cpu];
if (hv_root_partition) {
/*
* For root partition we get the hypervisor provided VP assist
......@@ -107,8 +109,21 @@ static int hv_cpu_init(unsigned int cpu)
* in hv_cpu_die(), otherwise a CPU may not be stopped in the
* case of CPU offlining and the VM will hang.
*/
if (!*hvp)
if (!*hvp) {
*hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO);
/*
* Hyper-V should never specify a VM that is a Confidential
* VM and also running in the root partition. Root partition
* is blocked to run in Confidential VM. So only decrypt assist
* page in non-root partition here.
*/
if (*hvp && !ms_hyperv.paravisor_present && hv_isolation_type_snp()) {
WARN_ON_ONCE(set_memory_decrypted((unsigned long)(*hvp), 1));
memset(*hvp, 0, PAGE_SIZE);
}
}
if (*hvp)
msr.pfn = vmalloc_to_pfn(*hvp);
......@@ -379,6 +394,36 @@ static void __init hv_get_partition_id(void)
local_irq_restore(flags);
}
static u8 __init get_vtl(void)
{
u64 control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_REGISTERS;
struct hv_get_vp_registers_input *input;
struct hv_get_vp_registers_output *output;
unsigned long flags;
u64 ret;
local_irq_save(flags);
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
output = (struct hv_get_vp_registers_output *)input;
memset(input, 0, struct_size(input, element, 1));
input->header.partitionid = HV_PARTITION_ID_SELF;
input->header.vpindex = HV_VP_INDEX_SELF;
input->header.inputvtl = 0;
input->element[0].name0 = HV_X64_REGISTER_VSM_VP_STATUS;
ret = hv_do_hypercall(control, input, output);
if (hv_result_success(ret)) {
ret = output->as64.low & HV_X64_VTL_MASK;
} else {
pr_err("Failed to get VTL(%lld) and set VTL to zero by default.\n", ret);
ret = 0;
}
local_irq_restore(flags);
return ret;
}
/*
* This function is to be invoked early in the boot sequence after the
* hypervisor has been detected.
......@@ -399,14 +444,24 @@ void __init hyperv_init(void)
if (hv_common_init())
return;
hv_vp_assist_page = kcalloc(num_possible_cpus(),
sizeof(*hv_vp_assist_page), GFP_KERNEL);
/*
* The VP assist page is useless to a TDX guest: the only use we
* would have for it is lazy EOI, which can not be used with TDX.
*/
if (hv_isolation_type_tdx())
hv_vp_assist_page = NULL;
else
hv_vp_assist_page = kcalloc(num_possible_cpus(),
sizeof(*hv_vp_assist_page),
GFP_KERNEL);
if (!hv_vp_assist_page) {
ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
goto common_free;
if (!hv_isolation_type_tdx())
goto common_free;
}
if (hv_isolation_type_snp()) {
if (ms_hyperv.paravisor_present && hv_isolation_type_snp()) {
/* Negotiate GHCB Version. */
if (!hv_ghcb_negotiate_protocol())
hv_ghcb_terminate(SEV_TERM_SET_GEN,
......@@ -426,12 +481,32 @@ void __init hyperv_init(void)
* Setup the hypercall page and enable hypercalls.
* 1. Register the guest ID
* 2. Enable the hypercall and register the hypercall page
*
* A TDX VM with no paravisor only uses TDX GHCI rather than hv_hypercall_pg:
* when the hypercall input is a page, such a VM must pass a decrypted
* page to Hyper-V, e.g. hv_post_message() uses the per-CPU page
* hyperv_pcpu_input_arg, which is decrypted if no paravisor is present.
*
* A TDX VM with the paravisor uses hv_hypercall_pg for most hypercalls,
* which are handled by the paravisor and the VM must use an encrypted
* input page: in such a VM, the hyperv_pcpu_input_arg is encrypted and
* used in the hypercalls, e.g. see hv_mark_gpa_visibility() and
* hv_arch_irq_unmask(). Such a VM uses TDX GHCI for two hypercalls:
* 1. HVCALL_SIGNAL_EVENT: see vmbus_set_event() and _hv_do_fast_hypercall8().
* 2. HVCALL_POST_MESSAGE: the input page must be a decrypted page, i.e.
* hv_post_message() in such a VM can't use the encrypted hyperv_pcpu_input_arg;
* instead, hv_post_message() uses the post_msg_page, which is decrypted
* in such a VM and is only used in such a VM.
*/
guest_id = hv_generate_guest_id(LINUX_VERSION_CODE);
wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id);
/* Hyper-V requires to write guest os id via ghcb in SNP IVM. */
hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id);
/* With the paravisor, the VM must also write the ID via GHCB/GHCI */
hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id);
/* A TDX VM with no paravisor only uses TDX GHCI rather than hv_hypercall_pg */
if (hv_isolation_type_tdx() && !ms_hyperv.paravisor_present)
goto skip_hypercall_pg_init;
hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START,
VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX,
......@@ -472,6 +547,7 @@ void __init hyperv_init(void)
wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
}
skip_hypercall_pg_init:
/*
* Some versions of Hyper-V that provide IBT in guest VMs have a bug
* in that there's no ENDBR64 instruction at the entry to the
......@@ -527,11 +603,15 @@ void __init hyperv_init(void)
/* Query the VMs extended capability once, so that it can be cached. */
hv_query_ext_cap(0);
/* Find the VTL */
if (!ms_hyperv.paravisor_present && hv_isolation_type_snp())
ms_hyperv.vtl = get_vtl();
return;
clean_guest_os_id:
wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, 0);
hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0);
cpuhp_remove_state(cpuhp);
free_ghcb_page:
free_percpu(hv_ghcb_pg);
......@@ -552,7 +632,7 @@ void hyperv_cleanup(void)
/* Reset our OS id */
wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, 0);
hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0);
/*
* Reset hypercall page reference before reset the page,
......@@ -615,6 +695,9 @@ bool hv_is_hyperv_initialized(void)
if (x86_hyper_type != X86_HYPER_MS_HYPERV)
return false;
/* A TDX VM with no paravisor uses TDX GHCI call rather than hv_hypercall_pg */
if (hv_isolation_type_tdx() && !ms_hyperv.paravisor_present)
return true;
/*
* Verify that earlier initialization succeeded by checking
* that the hypercall page is setup
......
......@@ -18,6 +18,11 @@
#include <asm/mshyperv.h>
#include <asm/hypervisor.h>
#include <asm/mtrr.h>
#include <asm/io_apic.h>
#include <asm/realmode.h>
#include <asm/e820/api.h>
#include <asm/desc.h>
#include <uapi/asm/vmx.h>
#ifdef CONFIG_AMD_MEM_ENCRYPT
......@@ -56,8 +61,10 @@ union hv_ghcb {
} hypercall;
} __packed __aligned(HV_HYP_PAGE_SIZE);
/* Only used in an SNP VM with the paravisor */
static u16 hv_ghcb_version __ro_after_init;
/* Functions only used in an SNP VM with the paravisor go here. */
u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size)
{
union hv_ghcb *hv_ghcb;
......@@ -175,7 +182,7 @@ bool hv_ghcb_negotiate_protocol(void)
return true;
}
void hv_ghcb_msr_write(u64 msr, u64 value)
static void hv_ghcb_msr_write(u64 msr, u64 value)
{
union hv_ghcb *hv_ghcb;
void **ghcb_base;
......@@ -203,9 +210,8 @@ void hv_ghcb_msr_write(u64 msr, u64 value)
local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(hv_ghcb_msr_write);
void hv_ghcb_msr_read(u64 msr, u64 *value)
static void hv_ghcb_msr_read(u64 msr, u64 *value)
{
union hv_ghcb *hv_ghcb;
void **ghcb_base;
......@@ -235,7 +241,217 @@ void hv_ghcb_msr_read(u64 msr, u64 *value)
| ((u64)lower_32_bits(hv_ghcb->ghcb.save.rdx) << 32);
local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(hv_ghcb_msr_read);
/* Only used in a fully enlightened SNP VM, i.e. without the paravisor */
static u8 ap_start_input_arg[PAGE_SIZE] __bss_decrypted __aligned(PAGE_SIZE);
static u8 ap_start_stack[PAGE_SIZE] __aligned(PAGE_SIZE);
static DEFINE_PER_CPU(struct sev_es_save_area *, hv_sev_vmsa);
/* Functions only used in an SNP VM without the paravisor go here. */
#define hv_populate_vmcb_seg(seg, gdtr_base) \
do { \
if (seg.selector) { \
seg.base = 0; \
seg.limit = HV_AP_SEGMENT_LIMIT; \
seg.attrib = *(u16 *)(gdtr_base + seg.selector + 5); \
seg.attrib = (seg.attrib & 0xFF) | ((seg.attrib >> 4) & 0xF00); \
} \
} while (0) \
static int snp_set_vmsa(void *va, bool vmsa)
{
u64 attrs;
/*
* Running at VMPL0 allows the kernel to change the VMSA bit for a page
* using the RMPADJUST instruction. However, for the instruction to
* succeed it must target the permissions of a lesser privileged
* (higher numbered) VMPL level, so use VMPL1 (refer to the RMPADJUST
* instruction in the AMD64 APM Volume 3).
*/
attrs = 1;
if (vmsa)
attrs |= RMPADJUST_VMSA_PAGE_BIT;
return rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs);
}
static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa)
{
int err;
err = snp_set_vmsa(vmsa, false);
if (err)
pr_err("clear VMSA page failed (%u), leaking page\n", err);
else
free_page((unsigned long)vmsa);
}
int hv_snp_boot_ap(int cpu, unsigned long start_ip)
{
struct sev_es_save_area *vmsa = (struct sev_es_save_area *)
__get_free_page(GFP_KERNEL | __GFP_ZERO);
struct sev_es_save_area *cur_vmsa;
struct desc_ptr gdtr;
u64 ret, retry = 5;
struct hv_enable_vp_vtl *start_vp_input;
unsigned long flags;
if (!vmsa)
return -ENOMEM;
native_store_gdt(&gdtr);
vmsa->gdtr.base = gdtr.address;
vmsa->gdtr.limit = gdtr.size;
asm volatile("movl %%es, %%eax;" : "=a" (vmsa->es.selector));
hv_populate_vmcb_seg(vmsa->es, vmsa->gdtr.base);
asm volatile("movl %%cs, %%eax;" : "=a" (vmsa->cs.selector));
hv_populate_vmcb_seg(vmsa->cs, vmsa->gdtr.base);
asm volatile("movl %%ss, %%eax;" : "=a" (vmsa->ss.selector));
hv_populate_vmcb_seg(vmsa->ss, vmsa->gdtr.base);
asm volatile("movl %%ds, %%eax;" : "=a" (vmsa->ds.selector));
hv_populate_vmcb_seg(vmsa->ds, vmsa->gdtr.base);
vmsa->efer = native_read_msr(MSR_EFER);
asm volatile("movq %%cr4, %%rax;" : "=a" (vmsa->cr4));
asm volatile("movq %%cr3, %%rax;" : "=a" (vmsa->cr3));
asm volatile("movq %%cr0, %%rax;" : "=a" (vmsa->cr0));
vmsa->xcr0 = 1;
vmsa->g_pat = HV_AP_INIT_GPAT_DEFAULT;
vmsa->rip = (u64)secondary_startup_64_no_verify;
vmsa->rsp = (u64)&ap_start_stack[PAGE_SIZE];
/*
* Set the SNP-specific fields for this VMSA:
* VMPL level
* SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits)
*/
vmsa->vmpl = 0;
vmsa->sev_features = sev_status >> 2;
ret = snp_set_vmsa(vmsa, true);
if (!ret) {
pr_err("RMPADJUST(%llx) failed: %llx\n", (u64)vmsa, ret);
free_page((u64)vmsa);
return ret;
}
local_irq_save(flags);
start_vp_input = (struct hv_enable_vp_vtl *)ap_start_input_arg;
memset(start_vp_input, 0, sizeof(*start_vp_input));
start_vp_input->partition_id = -1;
start_vp_input->vp_index = cpu;
start_vp_input->target_vtl.target_vtl = ms_hyperv.vtl;
*(u64 *)&start_vp_input->vp_context = __pa(vmsa) | 1;
do {
ret = hv_do_hypercall(HVCALL_START_VP,
start_vp_input, NULL);
} while (hv_result(ret) == HV_STATUS_TIME_OUT && retry--);
local_irq_restore(flags);
if (!hv_result_success(ret)) {
pr_err("HvCallStartVirtualProcessor failed: %llx\n", ret);
snp_cleanup_vmsa(vmsa);
vmsa = NULL;
}
cur_vmsa = per_cpu(hv_sev_vmsa, cpu);
/* Free up any previous VMSA page */
if (cur_vmsa)
snp_cleanup_vmsa(cur_vmsa);
/* Record the current VMSA page */
per_cpu(hv_sev_vmsa, cpu) = vmsa;
return ret;
}
#else
static inline void hv_ghcb_msr_write(u64 msr, u64 value) {}
static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {}
#endif /* CONFIG_AMD_MEM_ENCRYPT */
#ifdef CONFIG_INTEL_TDX_GUEST
static void hv_tdx_msr_write(u64 msr, u64 val)
{
struct tdx_hypercall_args args = {
.r10 = TDX_HYPERCALL_STANDARD,
.r11 = EXIT_REASON_MSR_WRITE,
.r12 = msr,
.r13 = val,
};
u64 ret = __tdx_hypercall(&args);
WARN_ONCE(ret, "Failed to emulate MSR write: %lld\n", ret);
}
static void hv_tdx_msr_read(u64 msr, u64 *val)
{
struct tdx_hypercall_args args = {
.r10 = TDX_HYPERCALL_STANDARD,
.r11 = EXIT_REASON_MSR_READ,
.r12 = msr,
};
u64 ret = __tdx_hypercall_ret(&args);
if (WARN_ONCE(ret, "Failed to emulate MSR read: %lld\n", ret))
*val = 0;
else
*val = args.r11;
}
u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2)
{
struct tdx_hypercall_args args = { };
args.r10 = control;
args.rdx = param1;
args.r8 = param2;
(void)__tdx_hypercall_ret(&args);
return args.r11;
}
#else
static inline void hv_tdx_msr_write(u64 msr, u64 value) {}
static inline void hv_tdx_msr_read(u64 msr, u64 *value) {}
#endif /* CONFIG_INTEL_TDX_GUEST */
#if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST)
void hv_ivm_msr_write(u64 msr, u64 value)
{
if (!ms_hyperv.paravisor_present)
return;
if (hv_isolation_type_tdx())
hv_tdx_msr_write(msr, value);
else if (hv_isolation_type_snp())
hv_ghcb_msr_write(msr, value);
}
void hv_ivm_msr_read(u64 msr, u64 *value)
{
if (!ms_hyperv.paravisor_present)
return;
if (hv_isolation_type_tdx())
hv_tdx_msr_read(msr, value);
else if (hv_isolation_type_snp())
hv_ghcb_msr_read(msr, value);
}
/*
* hv_mark_gpa_visibility - Set pages visible to host via hvcall.
......@@ -358,13 +574,34 @@ static bool hv_is_private_mmio(u64 addr)
void __init hv_vtom_init(void)
{
enum hv_isolation_type type = hv_get_isolation_type();
switch (type) {
case HV_ISOLATION_TYPE_VBS:
fallthrough;
/*
* By design, a VM using vTOM doesn't see the SEV setting,
* so SEV initialization is bypassed and sev_status isn't set.
* Set it here to indicate a vTOM VM.
*
* Note: if CONFIG_AMD_MEM_ENCRYPT is not set, sev_status is
* defined as 0ULL, to which we can't assigned a value.
*/
sev_status = MSR_AMD64_SNP_VTOM;
cc_vendor = CC_VENDOR_AMD;
#ifdef CONFIG_AMD_MEM_ENCRYPT
case HV_ISOLATION_TYPE_SNP:
sev_status = MSR_AMD64_SNP_VTOM;
cc_vendor = CC_VENDOR_AMD;
break;
#endif
case HV_ISOLATION_TYPE_TDX:
cc_vendor = CC_VENDOR_INTEL;
break;
default:
panic("hv_vtom_init: unsupported isolation type %d\n", type);
}
cc_set_mask(ms_hyperv.shared_gpa_boundary);
physical_mask &= ms_hyperv.shared_gpa_boundary - 1;
......@@ -377,7 +614,7 @@ void __init hv_vtom_init(void)
mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);
}
#endif /* CONFIG_AMD_MEM_ENCRYPT */
#endif /* defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) */
enum hv_isolation_type hv_get_isolation_type(void)
{
......@@ -405,10 +642,20 @@ bool hv_is_isolation_supported(void)
DEFINE_STATIC_KEY_FALSE(isolation_type_snp);
/*
* hv_isolation_type_snp - Check system runs in the AMD SEV-SNP based
* hv_isolation_type_snp - Check if the system runs in an AMD SEV-SNP based
* isolation VM.
*/
bool hv_isolation_type_snp(void)
{
return static_branch_unlikely(&isolation_type_snp);
}
DEFINE_STATIC_KEY_FALSE(isolation_type_tdx);
/*
* hv_isolation_type_tdx - Check if the system runs in an Intel TDX based
* isolated VM.
*/
bool hv_isolation_type_tdx(void)
{
return static_branch_unlikely(&isolation_type_tdx);
}
......@@ -169,7 +169,8 @@
enum hv_isolation_type {
HV_ISOLATION_TYPE_NONE = 0,
HV_ISOLATION_TYPE_VBS = 1,
HV_ISOLATION_TYPE_SNP = 2
HV_ISOLATION_TYPE_SNP = 2,
HV_ISOLATION_TYPE_TDX = 3
};
/* Hyper-V specific model specific registers (MSRs) */
......@@ -301,6 +302,13 @@ enum hv_isolation_type {
#define HV_X64_MSR_TIME_REF_COUNT HV_REGISTER_TIME_REF_COUNT
#define HV_X64_MSR_REFERENCE_TSC HV_REGISTER_REFERENCE_TSC
/*
* Registers are only accessible via HVCALL_GET_VP_REGISTERS hvcall and
* there is not associated MSR address.
*/
#define HV_X64_REGISTER_VSM_VP_STATUS 0x000D0003
#define HV_X64_VTL_MASK GENMASK(3, 0)
/* Hyper-V memory host visibility */
enum hv_mem_host_visibility {
VMBUS_PAGE_NOT_VISIBLE = 0,
......
......@@ -26,6 +26,7 @@
union hv_ghcb;
DECLARE_STATIC_KEY_FALSE(isolation_type_snp);
DECLARE_STATIC_KEY_FALSE(isolation_type_tdx);
typedef int (*hyperv_fill_flush_list_func)(
struct hv_guest_mapping_flush_list *flush,
......@@ -40,6 +41,7 @@ static inline unsigned char hv_get_nmi_reason(void)
#if IS_ENABLED(CONFIG_HYPERV)
extern int hyperv_init_cpuhp;
extern bool hyperv_paravisor_present;
extern void *hv_hypercall_pg;
......@@ -47,10 +49,25 @@ extern u64 hv_current_partition_id;
extern union hv_ghcb * __percpu *hv_ghcb_pg;
bool hv_isolation_type_snp(void);
bool hv_isolation_type_tdx(void);
u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2);
/*
* DEFAULT INIT GPAT and SEGMENT LIMIT value in struct VMSA
* to start AP in enlightened SEV guest.
*/
#define HV_AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL
#define HV_AP_SEGMENT_LIMIT 0xffffffff
int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);
/*
* If the hypercall involves no input or output parameters, the hypervisor
* ignores the corresponding GPA pointer.
*/
static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
{
u64 input_address = input ? virt_to_phys(input) : 0;
......@@ -58,6 +75,19 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
u64 hv_status;
#ifdef CONFIG_X86_64
if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
return hv_tdx_hypercall(control, input_address, output_address);
if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
__asm__ __volatile__("mov %4, %%r8\n"
"vmmcall"
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
"+c" (control), "+d" (input_address)
: "r" (output_address)
: "cc", "memory", "r8", "r9", "r10", "r11");
return hv_status;
}
if (!hv_hypercall_pg)
return U64_MAX;
......@@ -101,7 +131,16 @@ static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1)
u64 hv_status;
#ifdef CONFIG_X86_64
{
if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
return hv_tdx_hypercall(control, input1, 0);
if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
__asm__ __volatile__(
"vmmcall"
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
"+c" (control), "+d" (input1)
:: "cc", "r8", "r9", "r10", "r11");
} else {
__asm__ __volatile__(CALL_NOSPEC
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
"+c" (control), "+d" (input1)
......@@ -146,7 +185,17 @@ static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2)
u64 hv_status;
#ifdef CONFIG_X86_64
{
if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
return hv_tdx_hypercall(control, input1, input2);
if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
__asm__ __volatile__("mov %4, %%r8\n"
"vmmcall"
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
"+c" (control), "+d" (input1)
: "r" (input2)
: "cc", "r8", "r9", "r10", "r11");
} else {
__asm__ __volatile__("mov %4, %%r8\n"
CALL_NOSPEC
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
......@@ -225,20 +274,24 @@ int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector,
int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry);
#ifdef CONFIG_AMD_MEM_ENCRYPT
void hv_ghcb_msr_write(u64 msr, u64 value);
void hv_ghcb_msr_read(u64 msr, u64 *value);
bool hv_ghcb_negotiate_protocol(void);
void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason);
void hv_vtom_init(void);
int hv_snp_boot_ap(int cpu, unsigned long start_ip);
#else
static inline void hv_ghcb_msr_write(u64 msr, u64 value) {}
static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {}
static inline bool hv_ghcb_negotiate_protocol(void) { return false; }
static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {}
static inline void hv_vtom_init(void) {}
static inline int hv_snp_boot_ap(int cpu, unsigned long start_ip) { return 0; }
#endif
extern bool hv_isolation_type_snp(void);
#if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST)
void hv_vtom_init(void);
void hv_ivm_msr_write(u64 msr, u64 value);
void hv_ivm_msr_read(u64 msr, u64 *value);
#else
static inline void hv_vtom_init(void) {}
static inline void hv_ivm_msr_write(u64 msr, u64 value) {}
static inline void hv_ivm_msr_read(u64 msr, u64 *value) {}
#endif
static inline bool hv_is_synic_reg(unsigned int reg)
{
......
......@@ -32,6 +32,7 @@
#include <asm/nmi.h>
#include <clocksource/hyperv_timer.h>
#include <asm/numa.h>
#include <asm/svm.h>
/* Is Linux running as the root partition? */
bool hv_root_partition;
......@@ -39,6 +40,10 @@ bool hv_root_partition;
bool hv_nested;
struct ms_hyperv_info ms_hyperv;
/* Used in modules via hv_do_hypercall(): see arch/x86/include/asm/mshyperv.h */
bool hyperv_paravisor_present __ro_after_init;
EXPORT_SYMBOL_GPL(hyperv_paravisor_present);
#if IS_ENABLED(CONFIG_HYPERV)
static inline unsigned int hv_get_nested_reg(unsigned int reg)
{
......@@ -65,8 +70,8 @@ u64 hv_get_non_nested_register(unsigned int reg)
{
u64 value;
if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
hv_ghcb_msr_read(reg, &value);
if (hv_is_synic_reg(reg) && ms_hyperv.paravisor_present)
hv_ivm_msr_read(reg, &value);
else
rdmsrl(reg, value);
return value;
......@@ -75,8 +80,8 @@ EXPORT_SYMBOL_GPL(hv_get_non_nested_register);
void hv_set_non_nested_register(unsigned int reg, u64 value)
{
if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
hv_ghcb_msr_write(reg, value);
if (hv_is_synic_reg(reg) && ms_hyperv.paravisor_present) {
hv_ivm_msr_write(reg, value);
/* Write proxy bit via wrmsl instruction */
if (hv_is_sint_reg(reg))
......@@ -295,6 +300,15 @@ static void __init hv_smp_prepare_cpus(unsigned int max_cpus)
native_smp_prepare_cpus(max_cpus);
/*
* Override wakeup_secondary_cpu_64 callback for SEV-SNP
* enlightened guest.
*/
if (!ms_hyperv.paravisor_present && hv_isolation_type_snp()) {
apic->wakeup_secondary_cpu_64 = hv_snp_boot_ap;
return;
}
#ifdef CONFIG_X86_64
for_each_present_cpu(i) {
if (i == 0)
......@@ -313,6 +327,26 @@ static void __init hv_smp_prepare_cpus(unsigned int max_cpus)
}
#endif
/*
* When a fully enlightened TDX VM runs on Hyper-V, the firmware sets the
* HW_REDUCED flag: refer to acpi_tb_create_local_fadt(). Consequently ttyS0
* interrupts can't work because request_irq() -> ... -> irq_to_desc() returns
* NULL for ttyS0. This happens because mp_config_acpi_legacy_irqs() sees a
* nr_legacy_irqs() of 0, so it doesn't initialize the array 'mp_irqs[]', and
* later setup_IO_APIC_irqs() -> find_irq_entry() fails to find the legacy irqs
* from the array and hence doesn't create the necessary irq description info.
*
* Clone arch/x86/kernel/acpi/boot.c: acpi_generic_reduced_hw_init() here,
* except don't change 'legacy_pic', which keeps its default value
* 'default_legacy_pic'. This way, mp_config_acpi_legacy_irqs() sees a non-zero
* nr_legacy_irqs() and eventually serial console interrupts works properly.
*/
static void __init reduced_hw_init(void)
{
x86_init.timers.timer_init = x86_init_noop;
x86_init.irqs.pre_vector_init = x86_init_noop;
}
static void __init ms_hyperv_init_platform(void)
{
int hv_max_functions_eax;
......@@ -399,11 +433,33 @@ static void __init ms_hyperv_init_platform(void)
ms_hyperv.shared_gpa_boundary =
BIT_ULL(ms_hyperv.shared_gpa_boundary_bits);
hyperv_paravisor_present = !!ms_hyperv.paravisor_present;
pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n",
ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b);
if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP)
if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) {
static_branch_enable(&isolation_type_snp);
} else if (hv_get_isolation_type() == HV_ISOLATION_TYPE_TDX) {
static_branch_enable(&isolation_type_tdx);
/* A TDX VM must use x2APIC and doesn't use lazy EOI. */
ms_hyperv.hints &= ~HV_X64_APIC_ACCESS_RECOMMENDED;
if (!ms_hyperv.paravisor_present) {
/* To be supported: more work is required. */
ms_hyperv.features &= ~HV_MSR_REFERENCE_TSC_AVAILABLE;
/* HV_REGISTER_CRASH_CTL is unsupported. */
ms_hyperv.misc_features &= ~HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
/* Don't trust Hyper-V's TLB-flushing hypercalls. */
ms_hyperv.hints &= ~HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
x86_init.acpi.reduced_hw_early_init = reduced_hw_init;
}
}
}
if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) {
......@@ -473,7 +529,7 @@ static void __init ms_hyperv_init_platform(void)
#if IS_ENABLED(CONFIG_HYPERV)
if ((hv_get_isolation_type() == HV_ISOLATION_TYPE_VBS) ||
(hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP))
ms_hyperv.paravisor_present)
hv_vtom_init();
/*
* Setup the hook to get control post apic initialization.
......@@ -497,7 +553,8 @@ static void __init ms_hyperv_init_platform(void)
# ifdef CONFIG_SMP
smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu;
if (hv_root_partition)
if (hv_root_partition ||
(!ms_hyperv.paravisor_present && hv_isolation_type_snp()))
smp_ops.smp_prepare_cpus = hv_smp_prepare_cpus;
# endif
......@@ -560,6 +617,22 @@ static bool __init ms_hyperv_msi_ext_dest_id(void)
return eax & HYPERV_VS_PROPERTIES_EAX_EXTENDED_IOAPIC_RTE;
}
#ifdef CONFIG_AMD_MEM_ENCRYPT
static void hv_sev_es_hcall_prepare(struct ghcb *ghcb, struct pt_regs *regs)
{
/* RAX and CPL are already in the GHCB */
ghcb_set_rcx(ghcb, regs->cx);
ghcb_set_rdx(ghcb, regs->dx);
ghcb_set_r8(ghcb, regs->r8);
}
static bool hv_sev_es_hcall_finish(struct ghcb *ghcb, struct pt_regs *regs)
{
/* No checking of the return state needed */
return true;
}
#endif
const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
.name = "Microsoft Hyper-V",
.detect = ms_hyperv_platform,
......@@ -567,4 +640,8 @@ const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
.init.x2apic_available = ms_hyperv_x2apic_available,
.init.msi_ext_dest_id = ms_hyperv_msi_ext_dest_id,
.init.init_platform = ms_hyperv_init_platform,
#ifdef CONFIG_AMD_MEM_ENCRYPT
.runtime.sev_es_hcall_prepare = hv_sev_es_hcall_prepare,
.runtime.sev_es_hcall_finish = hv_sev_es_hcall_finish,
#endif
};
......@@ -390,7 +390,7 @@ static __always_inline u64 read_hv_clock_msr(void)
static union {
struct ms_hyperv_tsc_page page;
u8 reserved[PAGE_SIZE];
} tsc_pg __aligned(PAGE_SIZE);
} tsc_pg __bss_decrypted __aligned(PAGE_SIZE);
static struct ms_hyperv_tsc_page *tsc_page = &tsc_pg.page;
static unsigned long tsc_pfn;
......
......@@ -98,6 +98,7 @@ int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version)
*/
if (version >= VERSION_WIN10_V5) {
msg->msg_sint = VMBUS_MESSAGE_SINT;
msg->msg_vtl = ms_hyperv.vtl;
vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID_4;
} else {
msg->interrupt_page = virt_to_phys(vmbus_connection.int_page);
......@@ -482,10 +483,17 @@ void vmbus_set_event(struct vmbus_channel *channel)
++channel->sig_events;
if (hv_isolation_type_snp())
hv_ghcb_hypercall(HVCALL_SIGNAL_EVENT, &channel->sig_event,
NULL, sizeof(channel->sig_event));
else
if (ms_hyperv.paravisor_present) {
if (hv_isolation_type_snp())
hv_ghcb_hypercall(HVCALL_SIGNAL_EVENT, &channel->sig_event,
NULL, sizeof(channel->sig_event));
else if (hv_isolation_type_tdx())
hv_tdx_hypercall(HVCALL_SIGNAL_EVENT | HV_HYPERCALL_FAST_BIT,
channel->sig_event, 0);
else
WARN_ON_ONCE(1);
} else {
hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, channel->sig_event);
}
}
EXPORT_SYMBOL_GPL(vmbus_set_event);
......@@ -20,6 +20,7 @@
#include <linux/interrupt.h>
#include <clocksource/hyperv_timer.h>
#include <asm/mshyperv.h>
#include <linux/set_memory.h>
#include "hyperv_vmbus.h"
/* The one and only */
......@@ -56,20 +57,37 @@ int hv_post_message(union hv_connection_id connection_id,
local_irq_save(flags);
aligned_msg = *this_cpu_ptr(hyperv_pcpu_input_arg);
/*
* A TDX VM with the paravisor must use the decrypted post_msg_page: see
* the comment in struct hv_per_cpu_context. A SNP VM with the paravisor
* can use the encrypted hyperv_pcpu_input_arg because it copies the
* input into the GHCB page, which has been decrypted by the paravisor.
*/
if (hv_isolation_type_tdx() && ms_hyperv.paravisor_present)
aligned_msg = this_cpu_ptr(hv_context.cpu_context)->post_msg_page;
else
aligned_msg = *this_cpu_ptr(hyperv_pcpu_input_arg);
aligned_msg->connectionid = connection_id;
aligned_msg->reserved = 0;
aligned_msg->message_type = message_type;
aligned_msg->payload_size = payload_size;
memcpy((void *)aligned_msg->payload, payload, payload_size);
if (hv_isolation_type_snp())
status = hv_ghcb_hypercall(HVCALL_POST_MESSAGE,
(void *)aligned_msg, NULL,
sizeof(*aligned_msg));
else
if (ms_hyperv.paravisor_present) {
if (hv_isolation_type_tdx())
status = hv_tdx_hypercall(HVCALL_POST_MESSAGE,
virt_to_phys(aligned_msg), 0);
else if (hv_isolation_type_snp())
status = hv_ghcb_hypercall(HVCALL_POST_MESSAGE,
aligned_msg, NULL,
sizeof(*aligned_msg));
else
status = HV_STATUS_INVALID_PARAMETER;
} else {
status = hv_do_hypercall(HVCALL_POST_MESSAGE,
aligned_msg, NULL);
}
local_irq_restore(flags);
......@@ -78,7 +96,7 @@ int hv_post_message(union hv_connection_id connection_id,
int hv_synic_alloc(void)
{
int cpu;
int cpu, ret = -ENOMEM;
struct hv_per_cpu_context *hv_cpu;
/*
......@@ -104,11 +122,29 @@ int hv_synic_alloc(void)
tasklet_init(&hv_cpu->msg_dpc,
vmbus_on_msg_dpc, (unsigned long) hv_cpu);
if (ms_hyperv.paravisor_present && hv_isolation_type_tdx()) {
hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC);
if (hv_cpu->post_msg_page == NULL) {
pr_err("Unable to allocate post msg page\n");
goto err;
}
ret = set_memory_decrypted((unsigned long)hv_cpu->post_msg_page, 1);
if (ret) {
pr_err("Failed to decrypt post msg page: %d\n", ret);
/* Just leak the page, as it's unsafe to free the page. */
hv_cpu->post_msg_page = NULL;
goto err;
}
memset(hv_cpu->post_msg_page, 0, PAGE_SIZE);
}
/*
* Synic message and event pages are allocated by paravisor.
* Skip these pages allocation here.
*/
if (!hv_isolation_type_snp() && !hv_root_partition) {
if (!ms_hyperv.paravisor_present && !hv_root_partition) {
hv_cpu->synic_message_page =
(void *)get_zeroed_page(GFP_ATOMIC);
if (hv_cpu->synic_message_page == NULL) {
......@@ -120,29 +156,96 @@ int hv_synic_alloc(void)
(void *)get_zeroed_page(GFP_ATOMIC);
if (hv_cpu->synic_event_page == NULL) {
pr_err("Unable to allocate SYNIC event page\n");
free_page((unsigned long)hv_cpu->synic_message_page);
hv_cpu->synic_message_page = NULL;
goto err;
}
}
if (!ms_hyperv.paravisor_present &&
(hv_isolation_type_snp() || hv_isolation_type_tdx())) {
ret = set_memory_decrypted((unsigned long)
hv_cpu->synic_message_page, 1);
if (ret) {
pr_err("Failed to decrypt SYNIC msg page: %d\n", ret);
hv_cpu->synic_message_page = NULL;
/*
* Free the event page here so that hv_synic_free()
* won't later try to re-encrypt it.
*/
free_page((unsigned long)hv_cpu->synic_event_page);
hv_cpu->synic_event_page = NULL;
goto err;
}
ret = set_memory_decrypted((unsigned long)
hv_cpu->synic_event_page, 1);
if (ret) {
pr_err("Failed to decrypt SYNIC event page: %d\n", ret);
hv_cpu->synic_event_page = NULL;
goto err;
}
memset(hv_cpu->synic_message_page, 0, PAGE_SIZE);
memset(hv_cpu->synic_event_page, 0, PAGE_SIZE);
}
}
return 0;
err:
/*
* Any memory allocations that succeeded will be freed when
* the caller cleans up by calling hv_synic_free()
*/
return -ENOMEM;
return ret;
}
void hv_synic_free(void)
{
int cpu;
int cpu, ret;
for_each_present_cpu(cpu) {
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);
/* It's better to leak the page if the encryption fails. */
if (ms_hyperv.paravisor_present && hv_isolation_type_tdx()) {
if (hv_cpu->post_msg_page) {
ret = set_memory_encrypted((unsigned long)
hv_cpu->post_msg_page, 1);
if (ret) {
pr_err("Failed to encrypt post msg page: %d\n", ret);
hv_cpu->post_msg_page = NULL;
}
}
}
if (!ms_hyperv.paravisor_present &&
(hv_isolation_type_snp() || hv_isolation_type_tdx())) {
if (hv_cpu->synic_message_page) {
ret = set_memory_encrypted((unsigned long)
hv_cpu->synic_message_page, 1);
if (ret) {
pr_err("Failed to encrypt SYNIC msg page: %d\n", ret);
hv_cpu->synic_message_page = NULL;
}
}
if (hv_cpu->synic_event_page) {
ret = set_memory_encrypted((unsigned long)
hv_cpu->synic_event_page, 1);
if (ret) {
pr_err("Failed to encrypt SYNIC event page: %d\n", ret);
hv_cpu->synic_event_page = NULL;
}
}
}
free_page((unsigned long)hv_cpu->post_msg_page);
free_page((unsigned long)hv_cpu->synic_event_page);
free_page((unsigned long)hv_cpu->synic_message_page);
}
......@@ -170,7 +273,7 @@ void hv_synic_enable_regs(unsigned int cpu)
simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
simp.simp_enabled = 1;
if (hv_isolation_type_snp() || hv_root_partition) {
if (ms_hyperv.paravisor_present || hv_root_partition) {
/* Mask out vTOM bit. ioremap_cache() maps decrypted */
u64 base = (simp.base_simp_gpa << HV_HYP_PAGE_SHIFT) &
~ms_hyperv.shared_gpa_boundary;
......@@ -189,7 +292,7 @@ void hv_synic_enable_regs(unsigned int cpu)
siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
siefp.siefp_enabled = 1;
if (hv_isolation_type_snp() || hv_root_partition) {
if (ms_hyperv.paravisor_present || hv_root_partition) {
/* Mask out vTOM bit. ioremap_cache() maps decrypted */
u64 base = (siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT) &
~ms_hyperv.shared_gpa_boundary;
......@@ -272,7 +375,7 @@ void hv_synic_disable_regs(unsigned int cpu)
* addresses.
*/
simp.simp_enabled = 0;
if (hv_isolation_type_snp() || hv_root_partition) {
if (ms_hyperv.paravisor_present || hv_root_partition) {
iounmap(hv_cpu->synic_message_page);
hv_cpu->synic_message_page = NULL;
} else {
......@@ -284,7 +387,7 @@ void hv_synic_disable_regs(unsigned int cpu)
siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
siefp.siefp_enabled = 0;
if (hv_isolation_type_snp() || hv_root_partition) {
if (ms_hyperv.paravisor_present || hv_root_partition) {
iounmap(hv_cpu->synic_event_page);
hv_cpu->synic_event_page = NULL;
} else {
......
......@@ -8,6 +8,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/cleanup.h>
#include <linux/kernel.h>
#include <linux/jiffies.h>
#include <linux/mman.h>
......@@ -646,7 +647,7 @@ static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
void *v)
{
struct memory_notify *mem = (struct memory_notify *)v;
unsigned long flags, pfn_count;
unsigned long pfn_count;
switch (val) {
case MEM_ONLINE:
......@@ -655,21 +656,22 @@ static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
break;
case MEM_OFFLINE:
spin_lock_irqsave(&dm_device.ha_lock, flags);
pfn_count = hv_page_offline_check(mem->start_pfn,
mem->nr_pages);
if (pfn_count <= dm_device.num_pages_onlined) {
dm_device.num_pages_onlined -= pfn_count;
} else {
/*
* We're offlining more pages than we managed to online.
* This is unexpected. In any case don't let
* num_pages_onlined wrap around zero.
*/
WARN_ON_ONCE(1);
dm_device.num_pages_onlined = 0;
scoped_guard(spinlock_irqsave, &dm_device.ha_lock) {
pfn_count = hv_page_offline_check(mem->start_pfn,
mem->nr_pages);
if (pfn_count <= dm_device.num_pages_onlined) {
dm_device.num_pages_onlined -= pfn_count;
} else {
/*
* We're offlining more pages than we
* managed to online. This is
* unexpected. In any case don't let
* num_pages_onlined wrap around zero.
*/
WARN_ON_ONCE(1);
dm_device.num_pages_onlined = 0;
}
}
spin_unlock_irqrestore(&dm_device.ha_lock, flags);
break;
case MEM_GOING_ONLINE:
case MEM_GOING_OFFLINE:
......@@ -721,24 +723,23 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
unsigned long start_pfn;
unsigned long processed_pfn;
unsigned long total_pfn = pfn_count;
unsigned long flags;
for (i = 0; i < (size/HA_CHUNK); i++) {
start_pfn = start + (i * HA_CHUNK);
spin_lock_irqsave(&dm_device.ha_lock, flags);
has->ha_end_pfn += HA_CHUNK;
scoped_guard(spinlock_irqsave, &dm_device.ha_lock) {
has->ha_end_pfn += HA_CHUNK;
if (total_pfn > HA_CHUNK) {
processed_pfn = HA_CHUNK;
total_pfn -= HA_CHUNK;
} else {
processed_pfn = total_pfn;
total_pfn = 0;
}
if (total_pfn > HA_CHUNK) {
processed_pfn = HA_CHUNK;
total_pfn -= HA_CHUNK;
} else {
processed_pfn = total_pfn;
total_pfn = 0;
}
has->covered_end_pfn += processed_pfn;
spin_unlock_irqrestore(&dm_device.ha_lock, flags);
has->covered_end_pfn += processed_pfn;
}
reinit_completion(&dm_device.ol_waitevent);
......@@ -758,10 +759,10 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
*/
do_hot_add = false;
}
spin_lock_irqsave(&dm_device.ha_lock, flags);
has->ha_end_pfn -= HA_CHUNK;
has->covered_end_pfn -= processed_pfn;
spin_unlock_irqrestore(&dm_device.ha_lock, flags);
scoped_guard(spinlock_irqsave, &dm_device.ha_lock) {
has->ha_end_pfn -= HA_CHUNK;
has->covered_end_pfn -= processed_pfn;
}
break;
}
......@@ -781,10 +782,9 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
static void hv_online_page(struct page *pg, unsigned int order)
{
struct hv_hotadd_state *has;
unsigned long flags;
unsigned long pfn = page_to_pfn(pg);
spin_lock_irqsave(&dm_device.ha_lock, flags);
guard(spinlock_irqsave)(&dm_device.ha_lock);
list_for_each_entry(has, &dm_device.ha_region_list, list) {
/* The page belongs to a different HAS. */
if ((pfn < has->start_pfn) ||
......@@ -794,7 +794,6 @@ static void hv_online_page(struct page *pg, unsigned int order)
hv_bring_pgs_online(has, pfn, 1UL << order);
break;
}
spin_unlock_irqrestore(&dm_device.ha_lock, flags);
}
static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
......@@ -803,9 +802,8 @@ static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
struct hv_hotadd_gap *gap;
unsigned long residual, new_inc;
int ret = 0;
unsigned long flags;
spin_lock_irqsave(&dm_device.ha_lock, flags);
guard(spinlock_irqsave)(&dm_device.ha_lock);
list_for_each_entry(has, &dm_device.ha_region_list, list) {
/*
* If the pfn range we are dealing with is not in the current
......@@ -852,7 +850,6 @@ static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
ret = 1;
break;
}
spin_unlock_irqrestore(&dm_device.ha_lock, flags);
return ret;
}
......@@ -947,7 +944,6 @@ static unsigned long process_hot_add(unsigned long pg_start,
{
struct hv_hotadd_state *ha_region = NULL;
int covered;
unsigned long flags;
if (pfn_cnt == 0)
return 0;
......@@ -979,9 +975,9 @@ static unsigned long process_hot_add(unsigned long pg_start,
ha_region->covered_end_pfn = pg_start;
ha_region->end_pfn = rg_start + rg_size;
spin_lock_irqsave(&dm_device.ha_lock, flags);
list_add_tail(&ha_region->list, &dm_device.ha_region_list);
spin_unlock_irqrestore(&dm_device.ha_lock, flags);
scoped_guard(spinlock_irqsave, &dm_device.ha_lock) {
list_add_tail(&ha_region->list, &dm_device.ha_region_list);
}
}
do_pg_range:
......@@ -2047,7 +2043,6 @@ static void balloon_remove(struct hv_device *dev)
struct hv_dynmem_device *dm = hv_get_drvdata(dev);
struct hv_hotadd_state *has, *tmp;
struct hv_hotadd_gap *gap, *tmp_gap;
unsigned long flags;
if (dm->num_pages_ballooned != 0)
pr_warn("Ballooned pages: %d\n", dm->num_pages_ballooned);
......@@ -2073,7 +2068,7 @@ static void balloon_remove(struct hv_device *dev)
#endif
}
spin_lock_irqsave(&dm_device.ha_lock, flags);
guard(spinlock_irqsave)(&dm_device.ha_lock);
list_for_each_entry_safe(has, tmp, &dm->ha_region_list, list) {
list_for_each_entry_safe(gap, tmp_gap, &has->gap_list, list) {
list_del(&gap->list);
......@@ -2082,7 +2077,6 @@ static void balloon_remove(struct hv_device *dev)
list_del(&has->list);
kfree(has);
}
spin_unlock_irqrestore(&dm_device.ha_lock, flags);
}
static int balloon_suspend(struct hv_device *hv_dev)
......
......@@ -24,6 +24,7 @@
#include <linux/kmsg_dump.h>
#include <linux/slab.h>
#include <linux/dma-map-ops.h>
#include <linux/set_memory.h>
#include <asm/hyperv-tlfs.h>
#include <asm/mshyperv.h>
......@@ -359,6 +360,8 @@ int hv_common_cpu_init(unsigned int cpu)
u64 msr_vp_index;
gfp_t flags;
int pgcount = hv_root_partition ? 2 : 1;
void *mem;
int ret;
/* hv_cpu_init() can be called with IRQs disabled from hv_resume() */
flags = irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL;
......@@ -370,14 +373,41 @@ int hv_common_cpu_init(unsigned int cpu)
* allocated if this CPU was previously online and then taken offline
*/
if (!*inputarg) {
*inputarg = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
if (!(*inputarg))
mem = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
if (!mem)
return -ENOMEM;
if (hv_root_partition) {
outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
*outputarg = (char *)(*inputarg) + HV_HYP_PAGE_SIZE;
*outputarg = (char *)mem + HV_HYP_PAGE_SIZE;
}
if (!ms_hyperv.paravisor_present &&
(hv_isolation_type_snp() || hv_isolation_type_tdx())) {
ret = set_memory_decrypted((unsigned long)mem, pgcount);
if (ret) {
/* It may be unsafe to free 'mem' */
return ret;
}
memset(mem, 0x00, pgcount * HV_HYP_PAGE_SIZE);
}
/*
* In a fully enlightened TDX/SNP VM with more than 64 VPs, if
* hyperv_pcpu_input_arg is not NULL, set_memory_decrypted() ->
* ... -> cpa_flush()-> ... -> __send_ipi_mask_ex() tries to
* use hyperv_pcpu_input_arg as the hypercall input page, which
* must be a decrypted page in such a VM, but the page is still
* encrypted before set_memory_decrypted() returns. Fix this by
* setting *inputarg after the above set_memory_decrypted(): if
* hyperv_pcpu_input_arg is NULL, __send_ipi_mask_ex() returns
* HV_STATUS_INVALID_PARAMETER immediately, and the function
* hv_send_ipi_mask() falls back to orig_apic.send_IPI_mask(),
* which may be slightly slower than the hypercall, but still
* works correctly in such a VM.
*/
*inputarg = mem;
}
msr_vp_index = hv_get_register(HV_REGISTER_VP_INDEX);
......@@ -502,6 +532,12 @@ bool __weak hv_isolation_type_snp(void)
}
EXPORT_SYMBOL_GPL(hv_isolation_type_snp);
bool __weak hv_isolation_type_tdx(void)
{
return false;
}
EXPORT_SYMBOL_GPL(hv_isolation_type_tdx);
void __weak hv_setup_vmbus_handler(void (*handler)(void))
{
}
......@@ -542,3 +578,9 @@ u64 __weak hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_s
return HV_STATUS_INVALID_PARAMETER;
}
EXPORT_SYMBOL_GPL(hv_ghcb_hypercall);
u64 __weak hv_tdx_hypercall(u64 control, u64 param1, u64 param2)
{
return HV_STATUS_INVALID_PARAMETER;
}
EXPORT_SYMBOL_GPL(hv_tdx_hypercall);
......@@ -123,6 +123,17 @@ struct hv_per_cpu_context {
void *synic_message_page;
void *synic_event_page;
/*
* The page is only used in hv_post_message() for a TDX VM (with the
* paravisor) to post a messages to Hyper-V: when such a VM calls
* HVCALL_POST_MESSAGE, it can't use the hyperv_pcpu_input_arg (which
* is encrypted in such a VM) as the hypercall input page, because
* the input page for HVCALL_POST_MESSAGE must be decrypted in such a
* VM, so post_msg_page (which is decrypted in hv_synic_alloc()) is
* introduced for this purpose. See hyperv_init() for more comments.
*/
void *post_msg_page;
/*
* Starting with win8, we can take channel interrupts on any CPU;
* we will manage the tasklet that handles events messages on a per CPU
......
......@@ -2287,7 +2287,8 @@ static int vmbus_acpi_add(struct platform_device *pdev)
* Some ancestor of the vmbus acpi device (Gen1 or Gen2
* firmware) is the VMOD that has the mmio ranges. Get that.
*/
for (ancestor = acpi_dev_parent(device); ancestor;
for (ancestor = acpi_dev_parent(device);
ancestor && ancestor->handle != ACPI_ROOT_OBJECT;
ancestor = acpi_dev_parent(ancestor)) {
result = acpi_walk_resources(ancestor->handle, METHOD_NAME__CRS,
vmbus_walk_resources, NULL);
......
......@@ -223,6 +223,7 @@ enum HV_GENERIC_SET_FORMAT {
#define HV_STATUS_INVALID_PORT_ID 17
#define HV_STATUS_INVALID_CONNECTION_ID 18
#define HV_STATUS_INSUFFICIENT_BUFFERS 19
#define HV_STATUS_TIME_OUT 120
#define HV_STATUS_VTL_ALREADY_ENABLED 134
/*
......
......@@ -36,18 +36,25 @@ struct ms_hyperv_info {
u32 nested_features;
u32 max_vp_index;
u32 max_lp_index;
u32 isolation_config_a;
union {
u32 isolation_config_a;
struct {
u32 paravisor_present : 1;
u32 reserved_a1 : 31;
};
};
union {
u32 isolation_config_b;
struct {
u32 cvm_type : 4;
u32 reserved1 : 1;
u32 reserved_b1 : 1;
u32 shared_gpa_boundary_active : 1;
u32 shared_gpa_boundary_bits : 6;
u32 reserved2 : 20;
u32 reserved_b2 : 20;
};
};
u64 shared_gpa_boundary;
u8 vtl;
};
extern struct ms_hyperv_info ms_hyperv;
extern bool hv_nested;
......@@ -57,7 +64,8 @@ extern void * __percpu *hyperv_pcpu_output_arg;
extern u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr);
extern u64 hv_do_fast_hypercall8(u16 control, u64 input8);
extern bool hv_isolation_type_snp(void);
bool hv_isolation_type_snp(void);
bool hv_isolation_type_tdx(void);
/* Helper functions that provide a consistent pattern for checking Hyper-V hypercall status. */
static inline int hv_result(u64 status)
......@@ -274,6 +282,7 @@ enum hv_isolation_type hv_get_isolation_type(void);
bool hv_is_isolation_supported(void);
bool hv_isolation_type_snp(void);
u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size);
u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2);
void hyperv_cleanup(void);
bool hv_query_ext_cap(u64 cap_query);
void hv_setup_dma_ops(struct device *dev, bool coherent);
......
......@@ -348,7 +348,7 @@ struct vmtransfer_page_packet_header {
u8 sender_owns_set;
u8 reserved;
u32 range_cnt;
struct vmtransfer_page_range ranges[1];
struct vmtransfer_page_range ranges[];
} __packed;
struct vmgpadl_packet_header {
......@@ -665,8 +665,8 @@ struct vmbus_channel_initiate_contact {
u64 interrupt_page;
struct {
u8 msg_sint;
u8 padding1[3];
u32 padding2;
u8 msg_vtl;
u8 reserved[6];
};
};
u64 monitor_page1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment