Commit 678e90a3 authored by Peter Shier's avatar Peter Shier Committed by Paolo Bonzini

KVM: selftests: Test IPI to halted vCPU in xAPIC while backing page moves

When a guest is using xAPIC KVM allocates a backing page for the required
EPT entry for the APIC access address set in the VMCS. If mm decides to
move that page the KVM mmu notifier will update the VMCS with the new
HPA. This test induces a page move to test that APIC access continues to
work correctly. It is a directed test for
commit e649b3f0 "KVM: x86: Fix APIC page invalidation race".

Tested: ran for 1 hour on a skylake, migrating backing page every 1ms

Depends on patch "selftests: kvm: Add exception handling to selftests"
from aaronlewis@google.com that has not yet been queued.
Signed-off-by: default avatarPeter Shier <pshier@google.com>
Reviewed-by: default avatarJim Mattson <jmattson@google.com>
Reviewed-by: default avatarRicardo Koller <ricarkol@google.com>
Message-Id: <20201105223823.850068-1-pshier@google.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 1085a6b5
......@@ -24,6 +24,7 @@
/x86_64/vmx_preemption_timer_test
/x86_64/vmx_set_nested_state_test
/x86_64/vmx_tsc_adjust_test
/x86_64/xapic_ipi_test
/x86_64/xss_msr_test
/demand_paging_test
/dirty_log_test
......
......@@ -56,6 +56,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test
TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
......
......@@ -406,8 +406,27 @@ uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
#define X86_CR0_CD (1UL<<30) /* Cache Disable */
#define X86_CR0_PG (1UL<<31) /* Paging */
#define APIC_DEFAULT_GPA 0xfee00000ULL
/* APIC base address MSR and fields */
#define MSR_IA32_APICBASE 0x0000001b
#define MSR_IA32_APICBASE_BSP (1<<8)
#define MSR_IA32_APICBASE_EXTD (1<<10)
#define MSR_IA32_APICBASE_ENABLE (1<<11)
#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
#define GET_APIC_BASE(x) (((x) >> 12) << 12)
#define APIC_BASE_MSR 0x800
#define X2APIC_ENABLE (1UL << 10)
#define APIC_ID 0x20
#define APIC_LVR 0x30
#define GET_APIC_ID_FIELD(x) (((x) >> 24) & 0xFF)
#define APIC_TASKPRI 0x80
#define APIC_PROCPRI 0xA0
#define APIC_EOI 0xB0
#define APIC_SPIV 0xF0
#define APIC_SPIV_FOCUS_DISABLED (1 << 9)
#define APIC_SPIV_APIC_ENABLED (1 << 8)
#define APIC_ICR 0x300
#define APIC_DEST_SELF 0x40000
#define APIC_DEST_ALLINC 0x80000
......@@ -432,6 +451,7 @@ uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
#define APIC_DM_EXTINT 0x00700
#define APIC_VECTOR_MASK 0x000FF
#define APIC_ICR2 0x310
#define SET_APIC_DEST_FIELD(x) ((x) << 24)
/* VMX_EPT_VPID_CAP bits */
#define VMX_EPT_VPID_CAP_AD_BITS (1ULL << 21)
......
// SPDX-License-Identifier: GPL-2.0
/*
* xapic_ipi_test
*
* Copyright (C) 2020, Google LLC.
*
* This work is licensed under the terms of the GNU GPL, version 2.
*
* Test that when the APIC is in xAPIC mode, a vCPU can send an IPI to wake
* another vCPU that is halted when KVM's backing page for the APIC access
* address has been moved by mm.
*
* The test starts two vCPUs: one that sends IPIs and one that continually
* executes HLT. The sender checks that the halter has woken from the HLT and
* has reentered HLT before sending the next IPI. While the vCPUs are running,
* the host continually calls migrate_pages to move all of the process' pages
* amongst the available numa nodes on the machine.
*
* Migration is a command line option. When used on non-numa machines will
* exit with error. Test is still usefull on non-numa for testing IPIs.
*/
#define _GNU_SOURCE /* for program_invocation_short_name */
#include <getopt.h>
#include <pthread.h>
#include <inttypes.h>
#include <string.h>
#include <time.h>
#include "kvm_util.h"
#include "numaif.h"
#include "processor.h"
#include "test_util.h"
#include "vmx.h"
/* Default running time for the test */
#define DEFAULT_RUN_SECS 3
/* Default delay between migrate_pages calls (microseconds) */
#define DEFAULT_DELAY_USECS 500000
#define HALTER_VCPU_ID 0
#define SENDER_VCPU_ID 1
volatile uint32_t *apic_base = (volatile uint32_t *)APIC_DEFAULT_GPA;
/*
* Vector for IPI from sender vCPU to halting vCPU.
* Value is arbitrary and was chosen for the alternating bit pattern. Any
* value should work.
*/
#define IPI_VECTOR 0xa5
/*
* Incremented in the IPI handler. Provides evidence to the sender that the IPI
* arrived at the destination
*/
static volatile uint64_t ipis_rcvd;
/* Data struct shared between host main thread and vCPUs */
struct test_data_page {
uint32_t halter_apic_id;
volatile uint64_t hlt_count;
volatile uint64_t wake_count;
uint64_t ipis_sent;
uint64_t migrations_attempted;
uint64_t migrations_completed;
uint32_t icr;
uint32_t icr2;
uint32_t halter_tpr;
uint32_t halter_ppr;
/*
* Record local version register as a cross-check that APIC access
* worked. Value should match what KVM reports (APIC_VERSION in
* arch/x86/kvm/lapic.c). If test is failing, check that values match
* to determine whether APIC access exits are working.
*/
uint32_t halter_lvr;
};
struct thread_params {
struct test_data_page *data;
struct kvm_vm *vm;
uint32_t vcpu_id;
uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */
};
uint32_t read_apic_reg(uint reg)
{
return apic_base[reg >> 2];
}
void write_apic_reg(uint reg, uint32_t val)
{
apic_base[reg >> 2] = val;
}
void disable_apic(void)
{
wrmsr(MSR_IA32_APICBASE,
rdmsr(MSR_IA32_APICBASE) &
~(MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD));
}
void enable_xapic(void)
{
uint64_t val = rdmsr(MSR_IA32_APICBASE);
/* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */
if (val & MSR_IA32_APICBASE_EXTD) {
disable_apic();
wrmsr(MSR_IA32_APICBASE,
rdmsr(MSR_IA32_APICBASE) | MSR_IA32_APICBASE_ENABLE);
} else if (!(val & MSR_IA32_APICBASE_ENABLE)) {
wrmsr(MSR_IA32_APICBASE, val | MSR_IA32_APICBASE_ENABLE);
}
/*
* Per SDM: reset value of spurious interrupt vector register has the
* APIC software enabled bit=0. It must be enabled in addition to the
* enable bit in the MSR.
*/
val = read_apic_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED;
write_apic_reg(APIC_SPIV, val);
}
void verify_apic_base_addr(void)
{
uint64_t msr = rdmsr(MSR_IA32_APICBASE);
uint64_t base = GET_APIC_BASE(msr);
GUEST_ASSERT(base == APIC_DEFAULT_GPA);
}
static void halter_guest_code(struct test_data_page *data)
{
verify_apic_base_addr();
enable_xapic();
data->halter_apic_id = GET_APIC_ID_FIELD(read_apic_reg(APIC_ID));
data->halter_lvr = read_apic_reg(APIC_LVR);
/*
* Loop forever HLTing and recording halts & wakes. Disable interrupts
* each time around to minimize window between signaling the pending
* halt to the sender vCPU and executing the halt. No need to disable on
* first run as this vCPU executes first and the host waits for it to
* signal going into first halt before starting the sender vCPU. Record
* TPR and PPR for diagnostic purposes in case the test fails.
*/
for (;;) {
data->halter_tpr = read_apic_reg(APIC_TASKPRI);
data->halter_ppr = read_apic_reg(APIC_PROCPRI);
data->hlt_count++;
asm volatile("sti; hlt; cli");
data->wake_count++;
}
}
/*
* Runs on halter vCPU when IPI arrives. Write an arbitrary non-zero value to
* enable diagnosing errant writes to the APIC access address backing page in
* case of test failure.
*/
static void guest_ipi_handler(struct ex_regs *regs)
{
ipis_rcvd++;
write_apic_reg(APIC_EOI, 77);
}
static void sender_guest_code(struct test_data_page *data)
{
uint64_t last_wake_count;
uint64_t last_hlt_count;
uint64_t last_ipis_rcvd_count;
uint32_t icr_val;
uint32_t icr2_val;
uint64_t tsc_start;
verify_apic_base_addr();
enable_xapic();
/*
* Init interrupt command register for sending IPIs
*
* Delivery mode=fixed, per SDM:
* "Delivers the interrupt specified in the vector field to the target
* processor."
*
* Destination mode=physical i.e. specify target by its local APIC
* ID. This vCPU assumes that the halter vCPU has already started and
* set data->halter_apic_id.
*/
icr_val = (APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR);
icr2_val = SET_APIC_DEST_FIELD(data->halter_apic_id);
data->icr = icr_val;
data->icr2 = icr2_val;
last_wake_count = data->wake_count;
last_hlt_count = data->hlt_count;
last_ipis_rcvd_count = ipis_rcvd;
for (;;) {
/*
* Send IPI to halter vCPU.
* First IPI can be sent unconditionally because halter vCPU
* starts earlier.
*/
write_apic_reg(APIC_ICR2, icr2_val);
write_apic_reg(APIC_ICR, icr_val);
data->ipis_sent++;
/*
* Wait up to ~1 sec for halter to indicate that it has:
* 1. Received the IPI
* 2. Woken up from the halt
* 3. Gone back into halt
* Current CPUs typically run at 2.x Ghz which is ~2
* billion ticks per second.
*/
tsc_start = rdtsc();
while (rdtsc() - tsc_start < 2000000000) {
if ((ipis_rcvd != last_ipis_rcvd_count) &&
(data->wake_count != last_wake_count) &&
(data->hlt_count != last_hlt_count))
break;
}
GUEST_ASSERT((ipis_rcvd != last_ipis_rcvd_count) &&
(data->wake_count != last_wake_count) &&
(data->hlt_count != last_hlt_count));
last_wake_count = data->wake_count;
last_hlt_count = data->hlt_count;
last_ipis_rcvd_count = ipis_rcvd;
}
}
static void *vcpu_thread(void *arg)
{
struct thread_params *params = (struct thread_params *)arg;
struct ucall uc;
int old;
int r;
unsigned int exit_reason;
r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
TEST_ASSERT(r == 0,
"pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
params->vcpu_id, r);
fprintf(stderr, "vCPU thread running vCPU %u\n", params->vcpu_id);
vcpu_run(params->vm, params->vcpu_id);
exit_reason = vcpu_state(params->vm, params->vcpu_id)->exit_reason;
TEST_ASSERT(exit_reason == KVM_EXIT_IO,
"vCPU %u exited with unexpected exit reason %u-%s, expected KVM_EXIT_IO",
params->vcpu_id, exit_reason, exit_reason_str(exit_reason));
if (get_ucall(params->vm, params->vcpu_id, &uc) == UCALL_ABORT) {
TEST_ASSERT(false,
"vCPU %u exited with error: %s.\n"
"Sending vCPU sent %lu IPIs to halting vCPU\n"
"Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
"Halter TPR=%#x PPR=%#x LVR=%#x\n"
"Migrations attempted: %lu\n"
"Migrations completed: %lu\n",
params->vcpu_id, (const char *)uc.args[0],
params->data->ipis_sent, params->data->hlt_count,
params->data->wake_count,
*params->pipis_rcvd, params->data->halter_tpr,
params->data->halter_ppr, params->data->halter_lvr,
params->data->migrations_attempted,
params->data->migrations_completed);
}
return NULL;
}
static void cancel_join_vcpu_thread(pthread_t thread, uint32_t vcpu_id)
{
void *retval;
int r;
r = pthread_cancel(thread);
TEST_ASSERT(r == 0,
"pthread_cancel on vcpu_id=%d failed with errno=%d",
vcpu_id, r);
r = pthread_join(thread, &retval);
TEST_ASSERT(r == 0,
"pthread_join on vcpu_id=%d failed with errno=%d",
vcpu_id, r);
TEST_ASSERT(retval == PTHREAD_CANCELED,
"expected retval=%p, got %p", PTHREAD_CANCELED,
retval);
}
void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
uint64_t *pipis_rcvd)
{
long pages_not_moved;
unsigned long nodemask = 0;
unsigned long nodemasks[sizeof(nodemask) * 8];
int nodes = 0;
time_t start_time, last_update, now;
time_t interval_secs = 1;
int i, r;
int from, to;
unsigned long bit;
uint64_t hlt_count;
uint64_t wake_count;
uint64_t ipis_sent;
fprintf(stderr, "Calling migrate_pages every %d microseconds\n",
delay_usecs);
/* Get set of first 64 numa nodes available */
r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
0, MPOL_F_MEMS_ALLOWED);
TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno);
fprintf(stderr, "Numa nodes found amongst first %lu possible nodes "
"(each 1-bit indicates node is present): %#lx\n",
sizeof(nodemask) * 8, nodemask);
/* Init array of masks containing a single-bit in each, one for each
* available node. migrate_pages called below requires specifying nodes
* as bit masks.
*/
for (i = 0, bit = 1; i < sizeof(nodemask) * 8; i++, bit <<= 1) {
if (nodemask & bit) {
nodemasks[nodes] = nodemask & bit;
nodes++;
}
}
TEST_ASSERT(nodes > 1,
"Did not find at least 2 numa nodes. Can't do migration\n");
fprintf(stderr, "Migrating amongst %d nodes found\n", nodes);
from = 0;
to = 1;
start_time = time(NULL);
last_update = start_time;
ipis_sent = data->ipis_sent;
hlt_count = data->hlt_count;
wake_count = data->wake_count;
while ((int)(time(NULL) - start_time) < run_secs) {
data->migrations_attempted++;
/*
* migrate_pages with PID=0 will migrate all pages of this
* process between the nodes specified as bitmasks. The page
* backing the APIC access address belongs to this process
* because it is allocated by KVM in the context of the
* KVM_CREATE_VCPU ioctl. If that assumption ever changes this
* test may break or give a false positive signal.
*/
pages_not_moved = migrate_pages(0, sizeof(nodemasks[from]),
&nodemasks[from],
&nodemasks[to]);
if (pages_not_moved < 0)
fprintf(stderr,
"migrate_pages failed, errno=%d\n", errno);
else if (pages_not_moved > 0)
fprintf(stderr,
"migrate_pages could not move %ld pages\n",
pages_not_moved);
else
data->migrations_completed++;
from = to;
to++;
if (to == nodes)
to = 0;
now = time(NULL);
if (((now - start_time) % interval_secs == 0) &&
(now != last_update)) {
last_update = now;
fprintf(stderr,
"%lu seconds: Migrations attempted=%lu completed=%lu, "
"IPIs sent=%lu received=%lu, HLTs=%lu wakes=%lu\n",
now - start_time, data->migrations_attempted,
data->migrations_completed,
data->ipis_sent, *pipis_rcvd,
data->hlt_count, data->wake_count);
TEST_ASSERT(ipis_sent != data->ipis_sent &&
hlt_count != data->hlt_count &&
wake_count != data->wake_count,
"IPI, HLT and wake count have not increased "
"in the last %lu seconds. "
"HLTer is likely hung.\n", interval_secs);
ipis_sent = data->ipis_sent;
hlt_count = data->hlt_count;
wake_count = data->wake_count;
}
usleep(delay_usecs);
}
}
void get_cmdline_args(int argc, char *argv[], int *run_secs,
bool *migrate, int *delay_usecs)
{
for (;;) {
int opt = getopt(argc, argv, "s:d:m");
if (opt == -1)
break;
switch (opt) {
case 's':
*run_secs = parse_size(optarg);
break;
case 'm':
*migrate = true;
break;
case 'd':
*delay_usecs = parse_size(optarg);
break;
default:
TEST_ASSERT(false,
"Usage: -s <runtime seconds>. Default is %d seconds.\n"
"-m adds calls to migrate_pages while vCPUs are running."
" Default is no migrations.\n"
"-d <delay microseconds> - delay between migrate_pages() calls."
" Default is %d microseconds.\n",
DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS);
}
}
}
int main(int argc, char *argv[])
{
int r;
int wait_secs;
const int max_halter_wait = 10;
int run_secs = 0;
int delay_usecs = 0;
struct test_data_page *data;
vm_vaddr_t test_data_page_vaddr;
bool migrate = false;
pthread_t threads[2];
struct thread_params params[2];
struct kvm_vm *vm;
uint64_t *pipis_rcvd;
get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs);
if (run_secs <= 0)
run_secs = DEFAULT_RUN_SECS;
if (delay_usecs <= 0)
delay_usecs = DEFAULT_DELAY_USECS;
vm = vm_create_default(HALTER_VCPU_ID, 0, halter_guest_code);
params[0].vm = vm;
params[1].vm = vm;
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vm, HALTER_VCPU_ID);
vm_handle_exception(vm, IPI_VECTOR, guest_ipi_handler);
virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA, 0);
vm_vcpu_add_default(vm, SENDER_VCPU_ID, sender_guest_code);
test_data_page_vaddr = vm_vaddr_alloc(vm, 0x1000, 0x1000, 0, 0);
data =
(struct test_data_page *)addr_gva2hva(vm, test_data_page_vaddr);
memset(data, 0, sizeof(*data));
params[0].data = data;
params[1].data = data;
vcpu_args_set(vm, HALTER_VCPU_ID, 1, test_data_page_vaddr);
vcpu_args_set(vm, SENDER_VCPU_ID, 1, test_data_page_vaddr);
pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd);
params[0].pipis_rcvd = pipis_rcvd;
params[1].pipis_rcvd = pipis_rcvd;
/* Start halter vCPU thread and wait for it to execute first HLT. */
params[0].vcpu_id = HALTER_VCPU_ID;
r = pthread_create(&threads[0], NULL, vcpu_thread, &params[0]);
TEST_ASSERT(r == 0,
"pthread_create halter failed errno=%d", errno);
fprintf(stderr, "Halter vCPU thread started\n");
wait_secs = 0;
while ((wait_secs < max_halter_wait) && !data->hlt_count) {
sleep(1);
wait_secs++;
}
TEST_ASSERT(data->hlt_count,
"Halter vCPU did not execute first HLT within %d seconds",
max_halter_wait);
fprintf(stderr,
"Halter vCPU thread reported its APIC ID: %u after %d seconds.\n",
data->halter_apic_id, wait_secs);
params[1].vcpu_id = SENDER_VCPU_ID;
r = pthread_create(&threads[1], NULL, vcpu_thread, &params[1]);
TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno);
fprintf(stderr,
"IPI sender vCPU thread started. Letting vCPUs run for %d seconds.\n",
run_secs);
if (!migrate)
sleep(run_secs);
else
do_migrations(data, run_secs, delay_usecs, pipis_rcvd);
/*
* Cancel threads and wait for them to stop.
*/
cancel_join_vcpu_thread(threads[0], HALTER_VCPU_ID);
cancel_join_vcpu_thread(threads[1], SENDER_VCPU_ID);
fprintf(stderr,
"Test successful after running for %d seconds.\n"
"Sending vCPU sent %lu IPIs to halting vCPU\n"
"Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
"Halter APIC ID=%#x\n"
"Sender ICR value=%#x ICR2 value=%#x\n"
"Halter TPR=%#x PPR=%#x LVR=%#x\n"
"Migrations attempted: %lu\n"
"Migrations completed: %lu\n",
run_secs, data->ipis_sent,
data->hlt_count, data->wake_count, *pipis_rcvd,
data->halter_apic_id,
data->icr, data->icr2,
data->halter_tpr, data->halter_ppr, data->halter_lvr,
data->migrations_attempted, data->migrations_completed);
kvm_vm_free(vm);
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment