Commit 1c46d04a authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'hyperv-fixes-signed-20240303' of...

Merge tag 'hyperv-fixes-signed-20240303' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux

Pull hyperv fixes from Wei Liu:

 - Multiple fixes, cleanups and documentations for Hyper-V core code and
   drivers

* tag 'hyperv-fixes-signed-20240303' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux:
  Drivers: hv: vmbus: make hv_bus const
  x86/hyperv: Allow 15-bit APIC IDs for VTL platforms
  x86/hyperv: Make encrypted/decrypted changes safe for load_unaligned_zeropad()
  x86/mm: Regularize set_memory_p() parameters and make non-static
  x86/hyperv: Use slow_virt_to_phys() in page transition hypervisor callback
  Documentation: hyperv: Add overview of PCI pass-thru device support
  Drivers: hv: vmbus: Update indentation in create_gpadl_header()
  Drivers: hv: vmbus: Remove duplication and cleanup code in create_gpadl_header()
  fbdev/hyperv_fb: Fix logic error for Gen2 VMs in hvfb_getmem()
  Drivers: hv: vmbus: Calculate ring buffer size for more efficient use of memory
  hv_utils: Allow implicit ICTIMESYNCFLAG_SYNC
parents 90d35da6 aa707b61
......@@ -10,3 +10,4 @@ Hyper-V Enlightenments
overview
vmbus
clocks
vpci
This diff is collapsed.
......@@ -16,6 +16,11 @@
extern struct boot_params boot_params;
static struct real_mode_header hv_vtl_real_mode_header;
static bool __init hv_vtl_msi_ext_dest_id(void)
{
return true;
}
void __init hv_vtl_init_platform(void)
{
pr_info("Linux runs in Hyper-V Virtual Trust Level\n");
......@@ -38,6 +43,8 @@ void __init hv_vtl_init_platform(void)
x86_platform.legacy.warm_reset = 0;
x86_platform.legacy.reserve_bios_regions = 0;
x86_platform.legacy.devices.pnpbios = 0;
x86_init.hyper.msi_ext_dest_id = hv_vtl_msi_ext_dest_id;
}
static inline u64 hv_vtl_system_desc_base(struct ldttss_desc *desc)
......
......@@ -15,6 +15,7 @@
#include <asm/io.h>
#include <asm/coco.h>
#include <asm/mem_encrypt.h>
#include <asm/set_memory.h>
#include <asm/mshyperv.h>
#include <asm/hypervisor.h>
#include <asm/mtrr.h>
......@@ -502,6 +503,31 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
return -EFAULT;
}
/*
* When transitioning memory between encrypted and decrypted, the caller
* of set_memory_encrypted() or set_memory_decrypted() is responsible for
* ensuring that the memory isn't in use and isn't referenced while the
* transition is in progress. The transition has multiple steps, and the
* memory is in an inconsistent state until all steps are complete. A
* reference while the state is inconsistent could result in an exception
* that can't be cleanly fixed up.
*
* But the Linux kernel load_unaligned_zeropad() mechanism could cause a
* stray reference that can't be prevented by the caller, so Linux has
* specific code to handle this case. But when the #VC and #VE exceptions
* routed to a paravisor, the specific code doesn't work. To avoid this
* problem, mark the pages as "not present" while the transition is in
* progress. If load_unaligned_zeropad() causes a stray reference, a normal
* page fault is generated instead of #VC or #VE, and the page-fault-based
* handlers for load_unaligned_zeropad() resolve the reference. When the
* transition is complete, hv_vtom_set_host_visibility() marks the pages
* as "present" again.
*/
static bool hv_vtom_clear_present(unsigned long kbuffer, int pagecount, bool enc)
{
return !set_memory_np(kbuffer, pagecount);
}
/*
* hv_vtom_set_host_visibility - Set specified memory visible to host.
*
......@@ -515,16 +541,28 @@ static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bo
enum hv_mem_host_visibility visibility = enc ?
VMBUS_PAGE_NOT_VISIBLE : VMBUS_PAGE_VISIBLE_READ_WRITE;
u64 *pfn_array;
phys_addr_t paddr;
void *vaddr;
int ret = 0;
bool result = true;
int i, pfn;
pfn_array = kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
if (!pfn_array)
return false;
if (!pfn_array) {
result = false;
goto err_set_memory_p;
}
for (i = 0, pfn = 0; i < pagecount; i++) {
pfn_array[pfn] = virt_to_hvpfn((void *)kbuffer + i * HV_HYP_PAGE_SIZE);
/*
* Use slow_virt_to_phys() because the PRESENT bit has been
* temporarily cleared in the PTEs. slow_virt_to_phys() works
* without the PRESENT bit while virt_to_hvpfn() or similar
* does not.
*/
vaddr = (void *)kbuffer + (i * HV_HYP_PAGE_SIZE);
paddr = slow_virt_to_phys(vaddr);
pfn_array[pfn] = paddr >> HV_HYP_PAGE_SHIFT;
pfn++;
if (pfn == HV_MAX_MODIFY_GPA_REP_COUNT || i == pagecount - 1) {
......@@ -538,14 +576,30 @@ static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bo
}
}
err_free_pfn_array:
err_free_pfn_array:
kfree(pfn_array);
err_set_memory_p:
/*
* Set the PTE PRESENT bits again to revert what hv_vtom_clear_present()
* did. Do this even if there is an error earlier in this function in
* order to avoid leaving the memory range in a "broken" state. Setting
* the PRESENT bits shouldn't fail, but return an error if it does.
*/
if (set_memory_p(kbuffer, pagecount))
result = false;
return result;
}
static bool hv_vtom_tlb_flush_required(bool private)
{
return true;
/*
* Since hv_vtom_clear_present() marks the PTEs as "not present"
* and flushes the TLB, they can't be in the TLB. That makes the
* flush controlled by this function redundant, so return "false".
*/
return false;
}
static bool hv_vtom_cache_flush_required(void)
......@@ -608,6 +662,7 @@ void __init hv_vtom_init(void)
x86_platform.hyper.is_private_mmio = hv_is_private_mmio;
x86_platform.guest.enc_cache_flush_required = hv_vtom_cache_flush_required;
x86_platform.guest.enc_tlb_flush_required = hv_vtom_tlb_flush_required;
x86_platform.guest.enc_status_change_prepare = hv_vtom_clear_present;
x86_platform.guest.enc_status_change_finish = hv_vtom_set_host_visibility;
/* Set WB as the default cache mode. */
......
......@@ -47,6 +47,7 @@ int set_memory_uc(unsigned long addr, int numpages);
int set_memory_wc(unsigned long addr, int numpages);
int set_memory_wb(unsigned long addr, int numpages);
int set_memory_np(unsigned long addr, int numpages);
int set_memory_p(unsigned long addr, int numpages);
int set_memory_4k(unsigned long addr, int numpages);
int set_memory_encrypted(unsigned long addr, int numpages);
int set_memory_decrypted(unsigned long addr, int numpages);
......
......@@ -755,10 +755,14 @@ pmd_t *lookup_pmd_address(unsigned long address)
* areas on 32-bit NUMA systems. The percpu areas can
* end up in this kind of memory, for instance.
*
* This could be optimized, but it is only intended to be
* used at initialization time, and keeping it
* unoptimized should increase the testing coverage for
* the more obscure platforms.
* Note that as long as the PTEs are well-formed with correct PFNs, this
* works without checking the PRESENT bit in the leaf PTE. This is unlike
* the similar vmalloc_to_page() and derivatives. Callers may depend on
* this behavior.
*
* This could be optimized, but it is only used in paths that are not perf
* sensitive, and keeping it unoptimized should increase the testing coverage
* for the more obscure platforms.
*/
phys_addr_t slow_virt_to_phys(void *__virt_addr)
{
......@@ -2041,17 +2045,12 @@ int set_mce_nospec(unsigned long pfn)
return rc;
}
static int set_memory_p(unsigned long *addr, int numpages)
{
return change_page_attr_set(addr, numpages, __pgprot(_PAGE_PRESENT), 0);
}
/* Restore full speculative operation to the pfn. */
int clear_mce_nospec(unsigned long pfn)
{
unsigned long addr = (unsigned long) pfn_to_kaddr(pfn);
return set_memory_p(&addr, 1);
return set_memory_p(addr, 1);
}
EXPORT_SYMBOL_GPL(clear_mce_nospec);
#endif /* CONFIG_X86_64 */
......@@ -2104,6 +2103,11 @@ int set_memory_np_noalias(unsigned long addr, int numpages)
CPA_NO_CHECK_ALIAS, NULL);
}
int set_memory_p(unsigned long addr, int numpages)
{
return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_PRESENT), 0);
}
int set_memory_4k(unsigned long addr, int numpages)
{
return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
......
......@@ -322,21 +322,17 @@ static int create_gpadl_header(enum hv_gpadl_type type, void *kbuffer,
pagecount = hv_gpadl_size(type, size) >> HV_HYP_PAGE_SHIFT;
/* do we need a gpadl body msg */
pfnsize = MAX_SIZE_CHANNEL_MESSAGE -
sizeof(struct vmbus_channel_gpadl_header) -
sizeof(struct gpa_range);
pfncount = pfnsize / sizeof(u64);
pfncount = umin(pagecount, pfnsize / sizeof(u64));
if (pagecount > pfncount) {
/* we need a gpadl body */
/* fill in the header */
msgsize = sizeof(struct vmbus_channel_msginfo) +
sizeof(struct vmbus_channel_gpadl_header) +
sizeof(struct gpa_range) + pfncount * sizeof(u64);
msgheader = kzalloc(msgsize, GFP_KERNEL);
if (!msgheader)
goto nomem;
return -ENOMEM;
INIT_LIST_HEAD(&msgheader->submsglist);
msgheader->msgsize = msgsize;
......@@ -356,18 +352,17 @@ static int create_gpadl_header(enum hv_gpadl_type type, void *kbuffer,
pfnsum = pfncount;
pfnleft = pagecount - pfncount;
/* how many pfns can we fit */
/* how many pfns can we fit in a body message */
pfnsize = MAX_SIZE_CHANNEL_MESSAGE -
sizeof(struct vmbus_channel_gpadl_body);
pfncount = pfnsize / sizeof(u64);
/* fill in the body */
/*
* If pfnleft is zero, everything fits in the header and no body
* messages are needed
*/
while (pfnleft) {
if (pfnleft > pfncount)
pfncurr = pfncount;
else
pfncurr = pfnleft;
pfncurr = umin(pfncount, pfnleft);
msgsize = sizeof(struct vmbus_channel_msginfo) +
sizeof(struct vmbus_channel_gpadl_body) +
pfncurr * sizeof(u64);
......@@ -386,13 +381,12 @@ static int create_gpadl_header(enum hv_gpadl_type type, void *kbuffer,
list_del(&pos->msglistentry);
kfree(pos);
}
goto nomem;
kfree(msgheader);
return -ENOMEM;
}
msgbody->msgsize = msgsize;
gpadl_body =
(struct vmbus_channel_gpadl_body *)msgbody->msg;
gpadl_body = (struct vmbus_channel_gpadl_body *)msgbody->msg;
/*
* Gpadl is u32 and we are using a pointer which could
......@@ -405,42 +399,12 @@ static int create_gpadl_header(enum hv_gpadl_type type, void *kbuffer,
kbuffer, size, send_offset, pfnsum + i);
/* add to msg header */
list_add_tail(&msgbody->msglistentry,
&msgheader->submsglist);
list_add_tail(&msgbody->msglistentry, &msgheader->submsglist);
pfnsum += pfncurr;
pfnleft -= pfncurr;
}
} else {
/* everything fits in a header */
msgsize = sizeof(struct vmbus_channel_msginfo) +
sizeof(struct vmbus_channel_gpadl_header) +
sizeof(struct gpa_range) + pagecount * sizeof(u64);
msgheader = kzalloc(msgsize, GFP_KERNEL);
if (msgheader == NULL)
goto nomem;
INIT_LIST_HEAD(&msgheader->submsglist);
msgheader->msgsize = msgsize;
gpadl_header = (struct vmbus_channel_gpadl_header *)
msgheader->msg;
gpadl_header->rangecount = 1;
gpadl_header->range_buflen = sizeof(struct gpa_range) +
pagecount * sizeof(u64);
gpadl_header->range[0].byte_offset = 0;
gpadl_header->range[0].byte_count = hv_gpadl_size(type, size);
for (i = 0; i < pagecount; i++)
gpadl_header->range[0].pfn_array[i] = hv_gpadl_hvpfn(
type, kbuffer, size, send_offset, i);
*msginfo = msgheader;
}
return 0;
nomem:
kfree(msgheader);
kfree(msgbody);
return -ENOMEM;
}
/*
......
......@@ -296,6 +296,11 @@ static struct {
spinlock_t lock;
} host_ts;
static bool timesync_implicit;
module_param(timesync_implicit, bool, 0644);
MODULE_PARM_DESC(timesync_implicit, "If set treat SAMPLE as SYNC when clock is behind");
static inline u64 reftime_to_ns(u64 reftime)
{
return (reftime - WLTIMEDELTA) * 100;
......@@ -344,6 +349,29 @@ static void hv_set_host_time(struct work_struct *work)
do_settimeofday64(&ts);
}
/*
* Due to a bug on Hyper-V hosts, the sync flag may not always be sent on resume.
* Force a sync if the guest is behind.
*/
static inline bool hv_implicit_sync(u64 host_time)
{
struct timespec64 new_ts;
struct timespec64 threshold_ts;
new_ts = ns_to_timespec64(reftime_to_ns(host_time));
ktime_get_real_ts64(&threshold_ts);
threshold_ts.tv_sec += 5;
/*
* If guest behind the host by 5 or more seconds.
*/
if (timespec64_compare(&new_ts, &threshold_ts) >= 0)
return true;
return false;
}
/*
* Synchronize time with host after reboot, restore, etc.
*
......@@ -384,7 +412,8 @@ static inline void adj_guesttime(u64 hosttime, u64 reftime, u8 adj_flags)
spin_unlock_irqrestore(&host_ts.lock, flags);
/* Schedule work to do do_settimeofday64() */
if (adj_flags & ICTIMESYNCFLAG_SYNC)
if ((adj_flags & ICTIMESYNCFLAG_SYNC) ||
(timesync_implicit && hv_implicit_sync(host_ts.host_time)))
schedule_work(&adj_time_work);
}
......
......@@ -988,7 +988,7 @@ static const struct dev_pm_ops vmbus_pm = {
};
/* The one and only one */
static struct bus_type hv_bus = {
static const struct bus_type hv_bus = {
.name = "vmbus",
.match = vmbus_match,
.shutdown = vmbus_shutdown,
......
......@@ -1010,8 +1010,6 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info)
goto getmem_done;
}
pr_info("Unable to allocate enough contiguous physical memory on Gen 1 VM. Using MMIO instead.\n");
} else {
goto err1;
}
/*
......
......@@ -164,8 +164,28 @@ struct hv_ring_buffer {
u8 buffer[];
} __packed;
/*
* If the requested ring buffer size is at least 8 times the size of the
* header, steal space from the ring buffer for the header. Otherwise, add
* space for the header so that is doesn't take too much of the ring buffer
* space.
*
* The factor of 8 is somewhat arbitrary. The goal is to prevent adding a
* relatively small header (4 Kbytes on x86) to a large-ish power-of-2 ring
* buffer size (such as 128 Kbytes) and so end up making a nearly twice as
* large allocation that will be almost half wasted. As a contrasting example,
* on ARM64 with 64 Kbyte page size, we don't want to take 64 Kbytes for the
* header from a 128 Kbyte allocation, leaving only 64 Kbytes for the ring.
* In this latter case, we must add 64 Kbytes for the header and not worry
* about what's wasted.
*/
#define VMBUS_HEADER_ADJ(payload_sz) \
((payload_sz) >= 8 * sizeof(struct hv_ring_buffer) ? \
0 : sizeof(struct hv_ring_buffer))
/* Calculate the proper size of a ringbuffer, it must be page-aligned */
#define VMBUS_RING_SIZE(payload_sz) PAGE_ALIGN(sizeof(struct hv_ring_buffer) + \
#define VMBUS_RING_SIZE(payload_sz) PAGE_ALIGN(VMBUS_HEADER_ADJ(payload_sz) + \
(payload_sz))
struct hv_ring_buffer_info {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment