Commit 783e9e51 authored by Paolo Bonzini's avatar Paolo Bonzini

kvm: selftests: add API testing infrastructure

Testsuite contributed by Google and cleaned up by myself for
inclusion in Linux.
Signed-off-by: default avatarKen Hofsass <hofsass@google.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 3140c156
......@@ -14,6 +14,7 @@ TARGETS += gpio
TARGETS += intel_pstate
TARGETS += ipc
TARGETS += kcmp
TARGETS += kvm
TARGETS += lib
TARGETS += membarrier
TARGETS += memfd
......
all:
top_srcdir = ../../../../
UNAME_M := $(shell uname -m)
LIBKVM = lib/assert.c lib/kvm_util.c lib/sparsebit.c
LIBKVM_x86_64 = lib/x86.c
TEST_GEN_PROGS_x86_64 = set_sregs_test
TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
LIBKVM += $(LIBKVM_$(UNAME_M))
INSTALL_HDR_PATH = $(top_srcdir)/usr
LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
CFLAGS += -O2 -g -I$(LINUX_HDR_PATH) -Iinclude -I$(<D)
# After inclusion, $(OUTPUT) is defined and
# $(TEST_GEN_PROGS) starts with $(OUTPUT)/
include ../lib.mk
STATIC_LIBS := $(OUTPUT)/libkvm.a
LIBKVM_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM))
EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS)
x := $(shell mkdir -p $(sort $(dir $(LIBKVM_OBJ))))
$(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c
$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
$(OUTPUT)/libkvm.a: $(LIBKVM_OBJ)
$(AR) crs $@ $^
$(LINUX_HDR_PATH):
make -C $(top_srcdir) headers_install
all: $(STATIC_LIBS) $(LINUX_HDR_PATH)
$(TEST_GEN_PROGS): $(STATIC_LIBS)
$(TEST_GEN_PROGS) $(LIBKVM_OBJ): | $(LINUX_HDR_PATH)
/*
* tools/testing/selftests/kvm/include/kvm_util.h
*
* Copyright (C) 2018, Google LLC.
*
* This work is licensed under the terms of the GNU GPL, version 2.
*
*/
#ifndef SELFTEST_KVM_UTIL_H
#define SELFTEST_KVM_UTIL_H 1
#include "test_util.h"
#include "asm/kvm.h"
#include "linux/kvm.h"
#include <sys/ioctl.h>
#include "sparsebit.h"
/*
* Memslots can't cover the gfn starting at this gpa otherwise vCPUs can't be
* created. Only applies to VMs using EPT.
*/
#define KVM_DEFAULT_IDENTITY_MAP_ADDRESS 0xfffbc000ul
/* Callers of kvm_util only have an incomplete/opaque description of the
* structure kvm_util is using to maintain the state of a VM.
*/
struct kvm_vm;
typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
/* Minimum allocated guest virtual and physical addresses */
#define KVM_UTIL_MIN_VADDR 0x2000
#define DEFAULT_GUEST_PHY_PAGES 512
#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000
#define DEFAULT_STACK_PGS 5
enum vm_guest_mode {
VM_MODE_FLAT48PG,
};
enum vm_mem_backing_src_type {
VM_MEM_SRC_ANONYMOUS,
VM_MEM_SRC_ANONYMOUS_THP,
VM_MEM_SRC_ANONYMOUS_HUGETLB,
};
int kvm_check_cap(long cap);
struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
void kvm_vm_free(struct kvm_vm *vmp);
int kvm_memcmp_hva_gva(void *hva,
struct kvm_vm *vm, const vm_vaddr_t gva, size_t len);
void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
void vcpu_dump(FILE *stream, struct kvm_vm *vm,
uint32_t vcpuid, uint8_t indent);
void vm_create_irqchip(struct kvm_vm *vm);
void vm_userspace_mem_region_add(struct kvm_vm *vm,
enum vm_mem_backing_src_type src_type,
uint64_t guest_paddr, uint32_t slot, uint64_t npages,
uint32_t flags);
void vcpu_ioctl(struct kvm_vm *vm,
uint32_t vcpuid, unsigned long ioctl, void *arg);
void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid);
vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
uint32_t data_memslot, uint32_t pgd_memslot);
void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid);
void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_mp_state *mp_state);
void vcpu_regs_get(struct kvm_vm *vm,
uint32_t vcpuid, struct kvm_regs *regs);
void vcpu_regs_set(struct kvm_vm *vm,
uint32_t vcpuid, struct kvm_regs *regs);
void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...);
void vcpu_sregs_get(struct kvm_vm *vm,
uint32_t vcpuid, struct kvm_sregs *sregs);
void vcpu_sregs_set(struct kvm_vm *vm,
uint32_t vcpuid, struct kvm_sregs *sregs);
int _vcpu_sregs_set(struct kvm_vm *vm,
uint32_t vcpuid, struct kvm_sregs *sregs);
void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_vcpu_events *events);
void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_vcpu_events *events);
const char *exit_reason_str(unsigned int exit_reason);
void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot);
void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
uint32_t pgd_memslot);
vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm,
vm_paddr_t paddr_min, uint32_t memslot);
void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid);
void vcpu_set_cpuid(
struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid);
struct kvm_cpuid2 *allocate_kvm_cpuid2(void);
struct kvm_cpuid_entry2 *
find_cpuid_index_entry(struct kvm_cpuid2 *cpuid, uint32_t function,
uint32_t index);
static inline struct kvm_cpuid_entry2 *
find_cpuid_entry(struct kvm_cpuid2 *cpuid, uint32_t function)
{
return find_cpuid_index_entry(cpuid, function, 0);
}
struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code);
void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
struct kvm_userspace_memory_region *
kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
uint64_t end);
struct kvm_dirty_log *
allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region);
int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
#endif /* SELFTEST_KVM_UTIL_H */
/*
* tools/testing/selftests/kvm/include/sparsebit.h
*
* Copyright (C) 2018, Google LLC.
*
* This work is licensed under the terms of the GNU GPL, version 2.
*
*
* Header file that describes API to the sparsebit library.
* This library provides a memory efficient means of storing
* the settings of bits indexed via a uint64_t. Memory usage
* is reasonable, significantly less than (2^64 / 8) bytes, as
* long as bits that are mostly set or mostly cleared are close
* to each other. This library is efficient in memory usage
* even in the case where most bits are set.
*/
#ifndef _TEST_SPARSEBIT_H_
#define _TEST_SPARSEBIT_H_
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
#endif
struct sparsebit;
typedef uint64_t sparsebit_idx_t;
typedef uint64_t sparsebit_num_t;
struct sparsebit *sparsebit_alloc(void);
void sparsebit_free(struct sparsebit **sbitp);
void sparsebit_copy(struct sparsebit *dstp, struct sparsebit *src);
bool sparsebit_is_set(struct sparsebit *sbit, sparsebit_idx_t idx);
bool sparsebit_is_set_num(struct sparsebit *sbit,
sparsebit_idx_t idx, sparsebit_num_t num);
bool sparsebit_is_clear(struct sparsebit *sbit, sparsebit_idx_t idx);
bool sparsebit_is_clear_num(struct sparsebit *sbit,
sparsebit_idx_t idx, sparsebit_num_t num);
sparsebit_num_t sparsebit_num_set(struct sparsebit *sbit);
bool sparsebit_any_set(struct sparsebit *sbit);
bool sparsebit_any_clear(struct sparsebit *sbit);
bool sparsebit_all_set(struct sparsebit *sbit);
bool sparsebit_all_clear(struct sparsebit *sbit);
sparsebit_idx_t sparsebit_first_set(struct sparsebit *sbit);
sparsebit_idx_t sparsebit_first_clear(struct sparsebit *sbit);
sparsebit_idx_t sparsebit_next_set(struct sparsebit *sbit, sparsebit_idx_t prev);
sparsebit_idx_t sparsebit_next_clear(struct sparsebit *sbit, sparsebit_idx_t prev);
sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *sbit,
sparsebit_idx_t start, sparsebit_num_t num);
sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *sbit,
sparsebit_idx_t start, sparsebit_num_t num);
void sparsebit_set(struct sparsebit *sbitp, sparsebit_idx_t idx);
void sparsebit_set_num(struct sparsebit *sbitp, sparsebit_idx_t start,
sparsebit_num_t num);
void sparsebit_set_all(struct sparsebit *sbitp);
void sparsebit_clear(struct sparsebit *sbitp, sparsebit_idx_t idx);
void sparsebit_clear_num(struct sparsebit *sbitp,
sparsebit_idx_t start, sparsebit_num_t num);
void sparsebit_clear_all(struct sparsebit *sbitp);
void sparsebit_dump(FILE *stream, struct sparsebit *sbit,
unsigned int indent);
void sparsebit_validate_internal(struct sparsebit *sbit);
#ifdef __cplusplus
}
#endif
#endif /* _TEST_SPARSEBIT_H_ */
/*
* tools/testing/selftests/kvm/include/test_util.h
*
* Copyright (C) 2018, Google LLC.
*
* This work is licensed under the terms of the GNU GPL, version 2.
*
*/
#ifndef TEST_UTIL_H
#define TEST_UTIL_H 1
#include <stdlib.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <inttypes.h>
#include <errno.h>
#include <unistd.h>
#include <fcntl.h>
ssize_t test_write(int fd, const void *buf, size_t count);
ssize_t test_read(int fd, void *buf, size_t count);
int test_seq_read(const char *path, char **bufp, size_t *sizep);
void test_assert(bool exp, const char *exp_str,
const char *file, unsigned int line, const char *fmt, ...);
#define ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0]))
#define TEST_ASSERT(e, fmt, ...) \
test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
#define ASSERT_EQ(a, b) do { \
typeof(a) __a = (a); \
typeof(b) __b = (b); \
TEST_ASSERT(__a == __b, \
"ASSERT_EQ(%s, %s) failed.\n" \
"\t%s is %#lx\n" \
"\t%s is %#lx", \
#a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \
} while (0)
#endif /* TEST_UTIL_H */
/*
* tools/testing/selftests/kvm/include/x86.h
*
* Copyright (C) 2018, Google LLC.
*
* This work is licensed under the terms of the GNU GPL, version 2.
*
*/
#ifndef SELFTEST_KVM_X86_H
#define SELFTEST_KVM_X86_H
#include <assert.h>
#include <stdint.h>
#define X86_EFLAGS_FIXED (1u << 1)
#define X86_CR4_VME (1ul << 0)
#define X86_CR4_PVI (1ul << 1)
#define X86_CR4_TSD (1ul << 2)
#define X86_CR4_DE (1ul << 3)
#define X86_CR4_PSE (1ul << 4)
#define X86_CR4_PAE (1ul << 5)
#define X86_CR4_MCE (1ul << 6)
#define X86_CR4_PGE (1ul << 7)
#define X86_CR4_PCE (1ul << 8)
#define X86_CR4_OSFXSR (1ul << 9)
#define X86_CR4_OSXMMEXCPT (1ul << 10)
#define X86_CR4_UMIP (1ul << 11)
#define X86_CR4_VMXE (1ul << 13)
#define X86_CR4_SMXE (1ul << 14)
#define X86_CR4_FSGSBASE (1ul << 16)
#define X86_CR4_PCIDE (1ul << 17)
#define X86_CR4_OSXSAVE (1ul << 18)
#define X86_CR4_SMEP (1ul << 20)
#define X86_CR4_SMAP (1ul << 21)
#define X86_CR4_PKE (1ul << 22)
/* The enum values match the intruction encoding of each register */
enum x86_register {
RAX = 0,
RCX,
RDX,
RBX,
RSP,
RBP,
RSI,
RDI,
R8,
R9,
R10,
R11,
R12,
R13,
R14,
R15,
};
struct desc64 {
uint16_t limit0;
uint16_t base0;
unsigned base1:8, type:5, dpl:2, p:1;
unsigned limit1:4, zero0:3, g:1, base2:8;
uint32_t base3;
uint32_t zero1;
} __attribute__((packed));
struct desc_ptr {
uint16_t size;
uint64_t address;
} __attribute__((packed));
static inline uint64_t get_desc64_base(const struct desc64 *desc)
{
return ((uint64_t)desc->base3 << 32) |
(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
}
static inline uint64_t rdtsc(void)
{
uint32_t eax, edx;
/*
* The lfence is to wait (on Intel CPUs) until all previous
* instructions have been executed.
*/
__asm__ __volatile__("lfence; rdtsc" : "=a"(eax), "=d"(edx));
return ((uint64_t)edx) << 32 | eax;
}
static inline uint64_t rdtscp(uint32_t *aux)
{
uint32_t eax, edx;
__asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux));
return ((uint64_t)edx) << 32 | eax;
}
static inline uint64_t rdmsr(uint32_t msr)
{
uint32_t a, d;
__asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory");
return a | ((uint64_t) d << 32);
}
static inline void wrmsr(uint32_t msr, uint64_t value)
{
uint32_t a = value;
uint32_t d = value >> 32;
__asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory");
}
static inline uint16_t inw(uint16_t port)
{
uint16_t tmp;
__asm__ __volatile__("in %%dx, %%ax"
: /* output */ "=a" (tmp)
: /* input */ "d" (port));
return tmp;
}
static inline uint16_t get_es(void)
{
uint16_t es;
__asm__ __volatile__("mov %%es, %[es]"
: /* output */ [es]"=rm"(es));
return es;
}
static inline uint16_t get_cs(void)
{
uint16_t cs;
__asm__ __volatile__("mov %%cs, %[cs]"
: /* output */ [cs]"=rm"(cs));
return cs;
}
static inline uint16_t get_ss(void)
{
uint16_t ss;
__asm__ __volatile__("mov %%ss, %[ss]"
: /* output */ [ss]"=rm"(ss));
return ss;
}
static inline uint16_t get_ds(void)
{
uint16_t ds;
__asm__ __volatile__("mov %%ds, %[ds]"
: /* output */ [ds]"=rm"(ds));
return ds;
}
static inline uint16_t get_fs(void)
{
uint16_t fs;
__asm__ __volatile__("mov %%fs, %[fs]"
: /* output */ [fs]"=rm"(fs));
return fs;
}
static inline uint16_t get_gs(void)
{
uint16_t gs;
__asm__ __volatile__("mov %%gs, %[gs]"
: /* output */ [gs]"=rm"(gs));
return gs;
}
static inline uint16_t get_tr(void)
{
uint16_t tr;
__asm__ __volatile__("str %[tr]"
: /* output */ [tr]"=rm"(tr));
return tr;
}
static inline uint64_t get_cr0(void)
{
uint64_t cr0;
__asm__ __volatile__("mov %%cr0, %[cr0]"
: /* output */ [cr0]"=r"(cr0));
return cr0;
}
static inline uint64_t get_cr3(void)
{
uint64_t cr3;
__asm__ __volatile__("mov %%cr3, %[cr3]"
: /* output */ [cr3]"=r"(cr3));
return cr3;
}
static inline uint64_t get_cr4(void)
{
uint64_t cr4;
__asm__ __volatile__("mov %%cr4, %[cr4]"
: /* output */ [cr4]"=r"(cr4));
return cr4;
}
static inline void set_cr4(uint64_t val)
{
__asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
}
static inline uint64_t get_gdt_base(void)
{
struct desc_ptr gdt;
__asm__ __volatile__("sgdt %[gdt]"
: /* output */ [gdt]"=m"(gdt));
return gdt.address;
}
static inline uint64_t get_idt_base(void)
{
struct desc_ptr idt;
__asm__ __volatile__("sidt %[idt]"
: /* output */ [idt]"=m"(idt));
return idt.address;
}
#define SET_XMM(__var, __xmm) \
asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm)
static inline void set_xmm(int n, unsigned long val)
{
switch (n) {
case 0:
SET_XMM(val, xmm0);
break;
case 1:
SET_XMM(val, xmm1);
break;
case 2:
SET_XMM(val, xmm2);
break;
case 3:
SET_XMM(val, xmm3);
break;
case 4:
SET_XMM(val, xmm4);
break;
case 5:
SET_XMM(val, xmm5);
break;
case 6:
SET_XMM(val, xmm6);
break;
case 7:
SET_XMM(val, xmm7);
break;
}
}
typedef unsigned long v1di __attribute__ ((vector_size (8)));
static inline unsigned long get_xmm(int n)
{
assert(n >= 0 && n <= 7);
register v1di xmm0 __asm__("%xmm0");
register v1di xmm1 __asm__("%xmm1");
register v1di xmm2 __asm__("%xmm2");
register v1di xmm3 __asm__("%xmm3");
register v1di xmm4 __asm__("%xmm4");
register v1di xmm5 __asm__("%xmm5");
register v1di xmm6 __asm__("%xmm6");
register v1di xmm7 __asm__("%xmm7");
switch (n) {
case 0:
return (unsigned long)xmm0;
case 1:
return (unsigned long)xmm1;
case 2:
return (unsigned long)xmm2;
case 3:
return (unsigned long)xmm3;
case 4:
return (unsigned long)xmm4;
case 5:
return (unsigned long)xmm5;
case 6:
return (unsigned long)xmm6;
case 7:
return (unsigned long)xmm7;
}
return 0;
}
/*
* Basic CPU control in CR0
*/
#define X86_CR0_PE (1UL<<0) /* Protection Enable */
#define X86_CR0_MP (1UL<<1) /* Monitor Coprocessor */
#define X86_CR0_EM (1UL<<2) /* Emulation */
#define X86_CR0_TS (1UL<<3) /* Task Switched */
#define X86_CR0_ET (1UL<<4) /* Extension Type */
#define X86_CR0_NE (1UL<<5) /* Numeric Error */
#define X86_CR0_WP (1UL<<16) /* Write Protect */
#define X86_CR0_AM (1UL<<18) /* Alignment Mask */
#define X86_CR0_NW (1UL<<29) /* Not Write-through */
#define X86_CR0_CD (1UL<<30) /* Cache Disable */
#define X86_CR0_PG (1UL<<31) /* Paging */
/*
* CPU model specific register (MSR) numbers.
*/
/* x86-64 specific MSRs */
#define MSR_EFER 0xc0000080 /* extended feature register */
#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */
#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */
#define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */
#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */
#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */
#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */
#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */
#define MSR_TSC_AUX 0xc0000103 /* Auxiliary TSC */
/* EFER bits: */
#define EFER_SCE (1<<0) /* SYSCALL/SYSRET */
#define EFER_LME (1<<8) /* Long mode enable */
#define EFER_LMA (1<<10) /* Long mode active (read-only) */
#define EFER_NX (1<<11) /* No execute enable */
#define EFER_SVME (1<<12) /* Enable virtualization */
#define EFER_LMSLE (1<<13) /* Long Mode Segment Limit Enable */
#define EFER_FFXSR (1<<14) /* Enable Fast FXSAVE/FXRSTOR */
/* Intel MSRs. Some also available on other CPUs */
#define MSR_PPIN_CTL 0x0000004e
#define MSR_PPIN 0x0000004f
#define MSR_IA32_PERFCTR0 0x000000c1
#define MSR_IA32_PERFCTR1 0x000000c2
#define MSR_FSB_FREQ 0x000000cd
#define MSR_PLATFORM_INFO 0x000000ce
#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31
#define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT)
#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2
#define NHM_C3_AUTO_DEMOTE (1UL << 25)
#define NHM_C1_AUTO_DEMOTE (1UL << 26)
#define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25)
#define SNB_C1_AUTO_UNDEMOTE (1UL << 27)
#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
#define MSR_MTRRcap 0x000000fe
#define MSR_IA32_BBL_CR_CTL 0x00000119
#define MSR_IA32_BBL_CR_CTL3 0x0000011e
#define MSR_IA32_SYSENTER_CS 0x00000174
#define MSR_IA32_SYSENTER_ESP 0x00000175
#define MSR_IA32_SYSENTER_EIP 0x00000176
#define MSR_IA32_MCG_CAP 0x00000179
#define MSR_IA32_MCG_STATUS 0x0000017a
#define MSR_IA32_MCG_CTL 0x0000017b
#define MSR_IA32_MCG_EXT_CTL 0x000004d0
#define MSR_OFFCORE_RSP_0 0x000001a6
#define MSR_OFFCORE_RSP_1 0x000001a7
#define MSR_TURBO_RATIO_LIMIT 0x000001ad
#define MSR_TURBO_RATIO_LIMIT1 0x000001ae
#define MSR_TURBO_RATIO_LIMIT2 0x000001af
#define MSR_LBR_SELECT 0x000001c8
#define MSR_LBR_TOS 0x000001c9
#define MSR_LBR_NHM_FROM 0x00000680
#define MSR_LBR_NHM_TO 0x000006c0
#define MSR_LBR_CORE_FROM 0x00000040
#define MSR_LBR_CORE_TO 0x00000060
#define MSR_LBR_INFO_0 0x00000dc0 /* ... 0xddf for _31 */
#define LBR_INFO_MISPRED BIT_ULL(63)
#define LBR_INFO_IN_TX BIT_ULL(62)
#define LBR_INFO_ABORT BIT_ULL(61)
#define LBR_INFO_CYCLES 0xffff
#define MSR_IA32_PEBS_ENABLE 0x000003f1
#define MSR_IA32_DS_AREA 0x00000600
#define MSR_IA32_PERF_CAPABILITIES 0x00000345
#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
#define MSR_IA32_RTIT_CTL 0x00000570
#define MSR_IA32_RTIT_STATUS 0x00000571
#define MSR_IA32_RTIT_ADDR0_A 0x00000580
#define MSR_IA32_RTIT_ADDR0_B 0x00000581
#define MSR_IA32_RTIT_ADDR1_A 0x00000582
#define MSR_IA32_RTIT_ADDR1_B 0x00000583
#define MSR_IA32_RTIT_ADDR2_A 0x00000584
#define MSR_IA32_RTIT_ADDR2_B 0x00000585
#define MSR_IA32_RTIT_ADDR3_A 0x00000586
#define MSR_IA32_RTIT_ADDR3_B 0x00000587
#define MSR_IA32_RTIT_CR3_MATCH 0x00000572
#define MSR_IA32_RTIT_OUTPUT_BASE 0x00000560
#define MSR_IA32_RTIT_OUTPUT_MASK 0x00000561
#define MSR_MTRRfix64K_00000 0x00000250
#define MSR_MTRRfix16K_80000 0x00000258
#define MSR_MTRRfix16K_A0000 0x00000259
#define MSR_MTRRfix4K_C0000 0x00000268
#define MSR_MTRRfix4K_C8000 0x00000269
#define MSR_MTRRfix4K_D0000 0x0000026a
#define MSR_MTRRfix4K_D8000 0x0000026b
#define MSR_MTRRfix4K_E0000 0x0000026c
#define MSR_MTRRfix4K_E8000 0x0000026d
#define MSR_MTRRfix4K_F0000 0x0000026e
#define MSR_MTRRfix4K_F8000 0x0000026f
#define MSR_MTRRdefType 0x000002ff
#define MSR_IA32_CR_PAT 0x00000277
#define MSR_IA32_DEBUGCTLMSR 0x000001d9
#define MSR_IA32_LASTBRANCHFROMIP 0x000001db
#define MSR_IA32_LASTBRANCHTOIP 0x000001dc
#define MSR_IA32_LASTINTFROMIP 0x000001dd
#define MSR_IA32_LASTINTTOIP 0x000001de
/* DEBUGCTLMSR bits (others vary by model): */
#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */
#define DEBUGCTLMSR_BTF_SHIFT 1
#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */
#define DEBUGCTLMSR_TR (1UL << 6)
#define DEBUGCTLMSR_BTS (1UL << 7)
#define DEBUGCTLMSR_BTINT (1UL << 8)
#define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9)
#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10)
#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11)
#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14
#define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT)
#define MSR_PEBS_FRONTEND 0x000003f7
#define MSR_IA32_POWER_CTL 0x000001fc
#define MSR_IA32_MC0_CTL 0x00000400
#define MSR_IA32_MC0_STATUS 0x00000401
#define MSR_IA32_MC0_ADDR 0x00000402
#define MSR_IA32_MC0_MISC 0x00000403
/* C-state Residency Counters */
#define MSR_PKG_C3_RESIDENCY 0x000003f8
#define MSR_PKG_C6_RESIDENCY 0x000003f9
#define MSR_ATOM_PKG_C6_RESIDENCY 0x000003fa
#define MSR_PKG_C7_RESIDENCY 0x000003fa
#define MSR_CORE_C3_RESIDENCY 0x000003fc
#define MSR_CORE_C6_RESIDENCY 0x000003fd
#define MSR_CORE_C7_RESIDENCY 0x000003fe
#define MSR_KNL_CORE_C6_RESIDENCY 0x000003ff
#define MSR_PKG_C2_RESIDENCY 0x0000060d
#define MSR_PKG_C8_RESIDENCY 0x00000630
#define MSR_PKG_C9_RESIDENCY 0x00000631
#define MSR_PKG_C10_RESIDENCY 0x00000632
/* Interrupt Response Limit */
#define MSR_PKGC3_IRTL 0x0000060a
#define MSR_PKGC6_IRTL 0x0000060b
#define MSR_PKGC7_IRTL 0x0000060c
#define MSR_PKGC8_IRTL 0x00000633
#define MSR_PKGC9_IRTL 0x00000634
#define MSR_PKGC10_IRTL 0x00000635
/* Run Time Average Power Limiting (RAPL) Interface */
#define MSR_RAPL_POWER_UNIT 0x00000606
#define MSR_PKG_POWER_LIMIT 0x00000610
#define MSR_PKG_ENERGY_STATUS 0x00000611
#define MSR_PKG_PERF_STATUS 0x00000613
#define MSR_PKG_POWER_INFO 0x00000614
#define MSR_DRAM_POWER_LIMIT 0x00000618
#define MSR_DRAM_ENERGY_STATUS 0x00000619
#define MSR_DRAM_PERF_STATUS 0x0000061b
#define MSR_DRAM_POWER_INFO 0x0000061c
#define MSR_PP0_POWER_LIMIT 0x00000638
#define MSR_PP0_ENERGY_STATUS 0x00000639
#define MSR_PP0_POLICY 0x0000063a
#define MSR_PP0_PERF_STATUS 0x0000063b
#define MSR_PP1_POWER_LIMIT 0x00000640
#define MSR_PP1_ENERGY_STATUS 0x00000641
#define MSR_PP1_POLICY 0x00000642
/* Config TDP MSRs */
#define MSR_CONFIG_TDP_NOMINAL 0x00000648
#define MSR_CONFIG_TDP_LEVEL_1 0x00000649
#define MSR_CONFIG_TDP_LEVEL_2 0x0000064A
#define MSR_CONFIG_TDP_CONTROL 0x0000064B
#define MSR_TURBO_ACTIVATION_RATIO 0x0000064C
#define MSR_PLATFORM_ENERGY_STATUS 0x0000064D
#define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658
#define MSR_PKG_ANY_CORE_C0_RES 0x00000659
#define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A
#define MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B
#define MSR_CORE_C1_RES 0x00000660
#define MSR_MODULE_C6_RES_MS 0x00000664
#define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668
#define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669
#define MSR_ATOM_CORE_RATIOS 0x0000066a
#define MSR_ATOM_CORE_VIDS 0x0000066b
#define MSR_ATOM_CORE_TURBO_RATIOS 0x0000066c
#define MSR_ATOM_CORE_TURBO_VIDS 0x0000066d
#define MSR_CORE_PERF_LIMIT_REASONS 0x00000690
#define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0
#define MSR_RING_PERF_LIMIT_REASONS 0x000006B1
/* Hardware P state interface */
#define MSR_PPERF 0x0000064e
#define MSR_PERF_LIMIT_REASONS 0x0000064f
#define MSR_PM_ENABLE 0x00000770
#define MSR_HWP_CAPABILITIES 0x00000771
#define MSR_HWP_REQUEST_PKG 0x00000772
#define MSR_HWP_INTERRUPT 0x00000773
#define MSR_HWP_REQUEST 0x00000774
#define MSR_HWP_STATUS 0x00000777
/* CPUID.6.EAX */
#define HWP_BASE_BIT (1<<7)
#define HWP_NOTIFICATIONS_BIT (1<<8)
#define HWP_ACTIVITY_WINDOW_BIT (1<<9)
#define HWP_ENERGY_PERF_PREFERENCE_BIT (1<<10)
#define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11)
/* IA32_HWP_CAPABILITIES */
#define HWP_HIGHEST_PERF(x) (((x) >> 0) & 0xff)
#define HWP_GUARANTEED_PERF(x) (((x) >> 8) & 0xff)
#define HWP_MOSTEFFICIENT_PERF(x) (((x) >> 16) & 0xff)
#define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff)
/* IA32_HWP_REQUEST */
#define HWP_MIN_PERF(x) (x & 0xff)
#define HWP_MAX_PERF(x) ((x & 0xff) << 8)
#define HWP_DESIRED_PERF(x) ((x & 0xff) << 16)
#define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24)
#define HWP_EPP_PERFORMANCE 0x00
#define HWP_EPP_BALANCE_PERFORMANCE 0x80
#define HWP_EPP_BALANCE_POWERSAVE 0xC0
#define HWP_EPP_POWERSAVE 0xFF
#define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32)
#define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42)
/* IA32_HWP_STATUS */
#define HWP_GUARANTEED_CHANGE(x) (x & 0x1)
#define HWP_EXCURSION_TO_MINIMUM(x) (x & 0x4)
/* IA32_HWP_INTERRUPT */
#define HWP_CHANGE_TO_GUARANTEED_INT(x) (x & 0x1)
#define HWP_EXCURSION_TO_MINIMUM_INT(x) (x & 0x2)
#define MSR_AMD64_MC0_MASK 0xc0010044
#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x))
#define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x))
#define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x))
#define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x))
#define MSR_AMD64_MCx_MASK(x) (MSR_AMD64_MC0_MASK + (x))
/* These are consecutive and not in the normal 4er MCE bank block */
#define MSR_IA32_MC0_CTL2 0x00000280
#define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x))
#define MSR_P6_PERFCTR0 0x000000c1
#define MSR_P6_PERFCTR1 0x000000c2
#define MSR_P6_EVNTSEL0 0x00000186
#define MSR_P6_EVNTSEL1 0x00000187
#define MSR_KNC_PERFCTR0 0x00000020
#define MSR_KNC_PERFCTR1 0x00000021
#define MSR_KNC_EVNTSEL0 0x00000028
#define MSR_KNC_EVNTSEL1 0x00000029
/* Alternative perfctr range with full access. */
#define MSR_IA32_PMC0 0x000004c1
/* AMD64 MSRs. Not complete. See the architecture manual for a more
complete list. */
#define MSR_AMD64_PATCH_LEVEL 0x0000008b
#define MSR_AMD64_TSC_RATIO 0xc0000104
#define MSR_AMD64_NB_CFG 0xc001001f
#define MSR_AMD64_PATCH_LOADER 0xc0010020
#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
#define MSR_AMD64_OSVW_STATUS 0xc0010141
#define MSR_AMD64_LS_CFG 0xc0011020
#define MSR_AMD64_DC_CFG 0xc0011022
#define MSR_AMD64_BU_CFG2 0xc001102a
#define MSR_AMD64_IBSFETCHCTL 0xc0011030
#define MSR_AMD64_IBSFETCHLINAD 0xc0011031
#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032
#define MSR_AMD64_IBSFETCH_REG_COUNT 3
#define MSR_AMD64_IBSFETCH_REG_MASK ((1UL<<MSR_AMD64_IBSFETCH_REG_COUNT)-1)
#define MSR_AMD64_IBSOPCTL 0xc0011033
#define MSR_AMD64_IBSOPRIP 0xc0011034
#define MSR_AMD64_IBSOPDATA 0xc0011035
#define MSR_AMD64_IBSOPDATA2 0xc0011036
#define MSR_AMD64_IBSOPDATA3 0xc0011037
#define MSR_AMD64_IBSDCLINAD 0xc0011038
#define MSR_AMD64_IBSDCPHYSAD 0xc0011039
#define MSR_AMD64_IBSOP_REG_COUNT 7
#define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1)
#define MSR_AMD64_IBSCTL 0xc001103a
#define MSR_AMD64_IBSBRTARGET 0xc001103b
#define MSR_AMD64_IBSOPDATA4 0xc001103d
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
#define MSR_AMD64_SEV 0xc0010131
#define MSR_AMD64_SEV_ENABLED_BIT 0
#define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
/* Fam 17h MSRs */
#define MSR_F17H_IRPERF 0xc00000e9
/* Fam 16h MSRs */
#define MSR_F16H_L2I_PERF_CTL 0xc0010230
#define MSR_F16H_L2I_PERF_CTR 0xc0010231
#define MSR_F16H_DR1_ADDR_MASK 0xc0011019
#define MSR_F16H_DR2_ADDR_MASK 0xc001101a
#define MSR_F16H_DR3_ADDR_MASK 0xc001101b
#define MSR_F16H_DR0_ADDR_MASK 0xc0011027
/* Fam 15h MSRs */
#define MSR_F15H_PERF_CTL 0xc0010200
#define MSR_F15H_PERF_CTR 0xc0010201
#define MSR_F15H_NB_PERF_CTL 0xc0010240
#define MSR_F15H_NB_PERF_CTR 0xc0010241
#define MSR_F15H_PTSC 0xc0010280
#define MSR_F15H_IC_CFG 0xc0011021
/* Fam 10h MSRs */
#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
#define FAM10H_MMIO_CONF_ENABLE (1<<0)
#define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf
#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2
#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
#define FAM10H_MMIO_CONF_BASE_SHIFT 20
#define MSR_FAM10H_NODE_ID 0xc001100c
#define MSR_F10H_DECFG 0xc0011029
#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1
#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
/* K8 MSRs */
#define MSR_K8_TOP_MEM1 0xc001001a
#define MSR_K8_TOP_MEM2 0xc001001d
#define MSR_K8_SYSCFG 0xc0010010
#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT 23
#define MSR_K8_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT)
#define MSR_K8_INT_PENDING_MSG 0xc0010055
/* C1E active bits in int pending message */
#define K8_INTP_C1E_ACTIVE_MASK 0x18000000
#define MSR_K8_TSEG_ADDR 0xc0010112
#define MSR_K8_TSEG_MASK 0xc0010113
#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */
#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */
#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */
/* K7 MSRs */
#define MSR_K7_EVNTSEL0 0xc0010000
#define MSR_K7_PERFCTR0 0xc0010004
#define MSR_K7_EVNTSEL1 0xc0010001
#define MSR_K7_PERFCTR1 0xc0010005
#define MSR_K7_EVNTSEL2 0xc0010002
#define MSR_K7_PERFCTR2 0xc0010006
#define MSR_K7_EVNTSEL3 0xc0010003
#define MSR_K7_PERFCTR3 0xc0010007
#define MSR_K7_CLK_CTL 0xc001001b
#define MSR_K7_HWCR 0xc0010015
#define MSR_K7_HWCR_SMMLOCK_BIT 0
#define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT)
#define MSR_K7_FID_VID_CTL 0xc0010041
#define MSR_K7_FID_VID_STATUS 0xc0010042
/* K6 MSRs */
#define MSR_K6_WHCR 0xc0000082
#define MSR_K6_UWCCR 0xc0000085
#define MSR_K6_EPMR 0xc0000086
#define MSR_K6_PSOR 0xc0000087
#define MSR_K6_PFIR 0xc0000088
/* Centaur-Hauls/IDT defined MSRs. */
#define MSR_IDT_FCR1 0x00000107
#define MSR_IDT_FCR2 0x00000108
#define MSR_IDT_FCR3 0x00000109
#define MSR_IDT_FCR4 0x0000010a
#define MSR_IDT_MCR0 0x00000110
#define MSR_IDT_MCR1 0x00000111
#define MSR_IDT_MCR2 0x00000112
#define MSR_IDT_MCR3 0x00000113
#define MSR_IDT_MCR4 0x00000114
#define MSR_IDT_MCR5 0x00000115
#define MSR_IDT_MCR6 0x00000116
#define MSR_IDT_MCR7 0x00000117
#define MSR_IDT_MCR_CTRL 0x00000120
/* VIA Cyrix defined MSRs*/
#define MSR_VIA_FCR 0x00001107
#define MSR_VIA_LONGHAUL 0x0000110a
#define MSR_VIA_RNG 0x0000110b
#define MSR_VIA_BCR2 0x00001147
/* Transmeta defined MSRs */
#define MSR_TMTA_LONGRUN_CTRL 0x80868010
#define MSR_TMTA_LONGRUN_FLAGS 0x80868011
#define MSR_TMTA_LRTI_READOUT 0x80868018
#define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a
/* Intel defined MSRs. */
#define MSR_IA32_P5_MC_ADDR 0x00000000
#define MSR_IA32_P5_MC_TYPE 0x00000001
#define MSR_IA32_TSC 0x00000010
#define MSR_IA32_PLATFORM_ID 0x00000017
#define MSR_IA32_EBL_CR_POWERON 0x0000002a
#define MSR_EBC_FREQUENCY_ID 0x0000002c
#define MSR_SMI_COUNT 0x00000034
#define MSR_IA32_FEATURE_CONTROL 0x0000003a
#define MSR_IA32_TSC_ADJUST 0x0000003b
#define MSR_IA32_BNDCFGS 0x00000d90
#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc
#define MSR_IA32_XSS 0x00000da0
#define FEATURE_CONTROL_LOCKED (1<<0)
#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1)
#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2)
#define FEATURE_CONTROL_LMCE (1<<20)
#define MSR_IA32_APICBASE 0x0000001b
#define MSR_IA32_APICBASE_BSP (1<<8)
#define MSR_IA32_APICBASE_ENABLE (1<<11)
#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
#define MSR_IA32_TSCDEADLINE 0x000006e0
#define MSR_IA32_UCODE_WRITE 0x00000079
#define MSR_IA32_UCODE_REV 0x0000008b
#define MSR_IA32_SMM_MONITOR_CTL 0x0000009b
#define MSR_IA32_SMBASE 0x0000009e
#define MSR_IA32_PERF_STATUS 0x00000198
#define MSR_IA32_PERF_CTL 0x00000199
#define INTEL_PERF_CTL_MASK 0xffff
#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064
#define MSR_AMD_PERF_STATUS 0xc0010063
#define MSR_AMD_PERF_CTL 0xc0010062
#define MSR_IA32_MPERF 0x000000e7
#define MSR_IA32_APERF 0x000000e8
#define MSR_IA32_THERM_CONTROL 0x0000019a
#define MSR_IA32_THERM_INTERRUPT 0x0000019b
#define THERM_INT_HIGH_ENABLE (1 << 0)
#define THERM_INT_LOW_ENABLE (1 << 1)
#define THERM_INT_PLN_ENABLE (1 << 24)
#define MSR_IA32_THERM_STATUS 0x0000019c
#define THERM_STATUS_PROCHOT (1 << 0)
#define THERM_STATUS_POWER_LIMIT (1 << 10)
#define MSR_THERM2_CTL 0x0000019d
#define MSR_THERM2_CTL_TM_SELECT (1ULL << 16)
#define MSR_IA32_MISC_ENABLE 0x000001a0
#define MSR_IA32_TEMPERATURE_TARGET 0x000001a2
#define MSR_MISC_FEATURE_CONTROL 0x000001a4
#define MSR_MISC_PWR_MGMT 0x000001aa
#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0
#define ENERGY_PERF_BIAS_PERFORMANCE 0
#define ENERGY_PERF_BIAS_BALANCE_PERFORMANCE 4
#define ENERGY_PERF_BIAS_NORMAL 6
#define ENERGY_PERF_BIAS_BALANCE_POWERSAVE 8
#define ENERGY_PERF_BIAS_POWERSAVE 15
#define MSR_IA32_PACKAGE_THERM_STATUS 0x000001b1
#define PACKAGE_THERM_STATUS_PROCHOT (1 << 0)
#define PACKAGE_THERM_STATUS_POWER_LIMIT (1 << 10)
#define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x000001b2
#define PACKAGE_THERM_INT_HIGH_ENABLE (1 << 0)
#define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1)
#define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24)
/* Thermal Thresholds Support */
#define THERM_INT_THRESHOLD0_ENABLE (1 << 15)
#define THERM_SHIFT_THRESHOLD0 8
#define THERM_MASK_THRESHOLD0 (0x7f << THERM_SHIFT_THRESHOLD0)
#define THERM_INT_THRESHOLD1_ENABLE (1 << 23)
#define THERM_SHIFT_THRESHOLD1 16
#define THERM_MASK_THRESHOLD1 (0x7f << THERM_SHIFT_THRESHOLD1)
#define THERM_STATUS_THRESHOLD0 (1 << 6)
#define THERM_LOG_THRESHOLD0 (1 << 7)
#define THERM_STATUS_THRESHOLD1 (1 << 8)
#define THERM_LOG_THRESHOLD1 (1 << 9)
/* MISC_ENABLE bits: architectural */
#define MSR_IA32_MISC_ENABLE_FAST_STRING_BIT 0
#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << MSR_IA32_MISC_ENABLE_FAST_STRING_BIT)
#define MSR_IA32_MISC_ENABLE_TCC_BIT 1
#define MSR_IA32_MISC_ENABLE_TCC (1ULL << MSR_IA32_MISC_ENABLE_TCC_BIT)
#define MSR_IA32_MISC_ENABLE_EMON_BIT 7
#define MSR_IA32_MISC_ENABLE_EMON (1ULL << MSR_IA32_MISC_ENABLE_EMON_BIT)
#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT 11
#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT)
#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT 12
#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT)
#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT 16
#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP (1ULL << MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT)
#define MSR_IA32_MISC_ENABLE_MWAIT_BIT 18
#define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << MSR_IA32_MISC_ENABLE_MWAIT_BIT)
#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT 22
#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT)
#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT 23
#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT)
#define MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT 34
#define MSR_IA32_MISC_ENABLE_XD_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT)
/* MISC_ENABLE bits: model-specific, meaning may vary from core to core */
#define MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT 2
#define MSR_IA32_MISC_ENABLE_X87_COMPAT (1ULL << MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT)
#define MSR_IA32_MISC_ENABLE_TM1_BIT 3
#define MSR_IA32_MISC_ENABLE_TM1 (1ULL << MSR_IA32_MISC_ENABLE_TM1_BIT)
#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT 4
#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT)
#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT 6
#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT)
#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT 8
#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT)
#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT 9
#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT)
#define MSR_IA32_MISC_ENABLE_FERR_BIT 10
#define MSR_IA32_MISC_ENABLE_FERR (1ULL << MSR_IA32_MISC_ENABLE_FERR_BIT)
#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT 10
#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX (1ULL << MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT)
#define MSR_IA32_MISC_ENABLE_TM2_BIT 13
#define MSR_IA32_MISC_ENABLE_TM2 (1ULL << MSR_IA32_MISC_ENABLE_TM2_BIT)
#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT 19
#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT)
#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT 20
#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT)
#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT 24
#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT (1ULL << MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT)
#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT 37
#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT)
#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT 38
#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT)
#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39
#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT)
/* MISC_FEATURES_ENABLES non-architectural features */
#define MSR_MISC_FEATURES_ENABLES 0x00000140
#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT 0
#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT BIT_ULL(MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT)
#define MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT 1
#define MSR_IA32_TSC_DEADLINE 0x000006E0
/* P4/Xeon+ specific */
#define MSR_IA32_MCG_EAX 0x00000180
#define MSR_IA32_MCG_EBX 0x00000181
#define MSR_IA32_MCG_ECX 0x00000182
#define MSR_IA32_MCG_EDX 0x00000183
#define MSR_IA32_MCG_ESI 0x00000184
#define MSR_IA32_MCG_EDI 0x00000185
#define MSR_IA32_MCG_EBP 0x00000186
#define MSR_IA32_MCG_ESP 0x00000187
#define MSR_IA32_MCG_EFLAGS 0x00000188
#define MSR_IA32_MCG_EIP 0x00000189
#define MSR_IA32_MCG_RESERVED 0x0000018a
/* Pentium IV performance counter MSRs */
#define MSR_P4_BPU_PERFCTR0 0x00000300
#define MSR_P4_BPU_PERFCTR1 0x00000301
#define MSR_P4_BPU_PERFCTR2 0x00000302
#define MSR_P4_BPU_PERFCTR3 0x00000303
#define MSR_P4_MS_PERFCTR0 0x00000304
#define MSR_P4_MS_PERFCTR1 0x00000305
#define MSR_P4_MS_PERFCTR2 0x00000306
#define MSR_P4_MS_PERFCTR3 0x00000307
#define MSR_P4_FLAME_PERFCTR0 0x00000308
#define MSR_P4_FLAME_PERFCTR1 0x00000309
#define MSR_P4_FLAME_PERFCTR2 0x0000030a
#define MSR_P4_FLAME_PERFCTR3 0x0000030b
#define MSR_P4_IQ_PERFCTR0 0x0000030c
#define MSR_P4_IQ_PERFCTR1 0x0000030d
#define MSR_P4_IQ_PERFCTR2 0x0000030e
#define MSR_P4_IQ_PERFCTR3 0x0000030f
#define MSR_P4_IQ_PERFCTR4 0x00000310
#define MSR_P4_IQ_PERFCTR5 0x00000311
#define MSR_P4_BPU_CCCR0 0x00000360
#define MSR_P4_BPU_CCCR1 0x00000361
#define MSR_P4_BPU_CCCR2 0x00000362
#define MSR_P4_BPU_CCCR3 0x00000363
#define MSR_P4_MS_CCCR0 0x00000364
#define MSR_P4_MS_CCCR1 0x00000365
#define MSR_P4_MS_CCCR2 0x00000366
#define MSR_P4_MS_CCCR3 0x00000367
#define MSR_P4_FLAME_CCCR0 0x00000368
#define MSR_P4_FLAME_CCCR1 0x00000369
#define MSR_P4_FLAME_CCCR2 0x0000036a
#define MSR_P4_FLAME_CCCR3 0x0000036b
#define MSR_P4_IQ_CCCR0 0x0000036c
#define MSR_P4_IQ_CCCR1 0x0000036d
#define MSR_P4_IQ_CCCR2 0x0000036e
#define MSR_P4_IQ_CCCR3 0x0000036f
#define MSR_P4_IQ_CCCR4 0x00000370
#define MSR_P4_IQ_CCCR5 0x00000371
#define MSR_P4_ALF_ESCR0 0x000003ca
#define MSR_P4_ALF_ESCR1 0x000003cb
#define MSR_P4_BPU_ESCR0 0x000003b2
#define MSR_P4_BPU_ESCR1 0x000003b3
#define MSR_P4_BSU_ESCR0 0x000003a0
#define MSR_P4_BSU_ESCR1 0x000003a1
#define MSR_P4_CRU_ESCR0 0x000003b8
#define MSR_P4_CRU_ESCR1 0x000003b9
#define MSR_P4_CRU_ESCR2 0x000003cc
#define MSR_P4_CRU_ESCR3 0x000003cd
#define MSR_P4_CRU_ESCR4 0x000003e0
#define MSR_P4_CRU_ESCR5 0x000003e1
#define MSR_P4_DAC_ESCR0 0x000003a8
#define MSR_P4_DAC_ESCR1 0x000003a9
#define MSR_P4_FIRM_ESCR0 0x000003a4
#define MSR_P4_FIRM_ESCR1 0x000003a5
#define MSR_P4_FLAME_ESCR0 0x000003a6
#define MSR_P4_FLAME_ESCR1 0x000003a7
#define MSR_P4_FSB_ESCR0 0x000003a2
#define MSR_P4_FSB_ESCR1 0x000003a3
#define MSR_P4_IQ_ESCR0 0x000003ba
#define MSR_P4_IQ_ESCR1 0x000003bb
#define MSR_P4_IS_ESCR0 0x000003b4
#define MSR_P4_IS_ESCR1 0x000003b5
#define MSR_P4_ITLB_ESCR0 0x000003b6
#define MSR_P4_ITLB_ESCR1 0x000003b7
#define MSR_P4_IX_ESCR0 0x000003c8
#define MSR_P4_IX_ESCR1 0x000003c9
#define MSR_P4_MOB_ESCR0 0x000003aa
#define MSR_P4_MOB_ESCR1 0x000003ab
#define MSR_P4_MS_ESCR0 0x000003c0
#define MSR_P4_MS_ESCR1 0x000003c1
#define MSR_P4_PMH_ESCR0 0x000003ac
#define MSR_P4_PMH_ESCR1 0x000003ad
#define MSR_P4_RAT_ESCR0 0x000003bc
#define MSR_P4_RAT_ESCR1 0x000003bd
#define MSR_P4_SAAT_ESCR0 0x000003ae
#define MSR_P4_SAAT_ESCR1 0x000003af
#define MSR_P4_SSU_ESCR0 0x000003be
#define MSR_P4_SSU_ESCR1 0x000003bf /* guess: not in manual */
#define MSR_P4_TBPU_ESCR0 0x000003c2
#define MSR_P4_TBPU_ESCR1 0x000003c3
#define MSR_P4_TC_ESCR0 0x000003c4
#define MSR_P4_TC_ESCR1 0x000003c5
#define MSR_P4_U2L_ESCR0 0x000003b0
#define MSR_P4_U2L_ESCR1 0x000003b1
#define MSR_P4_PEBS_MATRIX_VERT 0x000003f2
/* Intel Core-based CPU performance counters */
#define MSR_CORE_PERF_FIXED_CTR0 0x00000309
#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a
#define MSR_CORE_PERF_FIXED_CTR2 0x0000030b
#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d
#define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e
#define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f
#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390
/* Geode defined MSRs */
#define MSR_GEODE_BUSCONT_CONF0 0x00001900
/* Intel VT MSRs */
#define MSR_IA32_VMX_BASIC 0x00000480
#define MSR_IA32_VMX_PINBASED_CTLS 0x00000481
#define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482
#define MSR_IA32_VMX_EXIT_CTLS 0x00000483
#define MSR_IA32_VMX_ENTRY_CTLS 0x00000484
#define MSR_IA32_VMX_MISC 0x00000485
#define MSR_IA32_VMX_CR0_FIXED0 0x00000486
#define MSR_IA32_VMX_CR0_FIXED1 0x00000487
#define MSR_IA32_VMX_CR4_FIXED0 0x00000488
#define MSR_IA32_VMX_CR4_FIXED1 0x00000489
#define MSR_IA32_VMX_VMCS_ENUM 0x0000048a
#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b
#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c
#define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x0000048d
#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e
#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f
#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490
#define MSR_IA32_VMX_VMFUNC 0x00000491
/* VMX_BASIC bits and bitmasks */
#define VMX_BASIC_VMCS_SIZE_SHIFT 32
#define VMX_BASIC_TRUE_CTLS (1ULL << 55)
#define VMX_BASIC_64 0x0001000000000000LLU
#define VMX_BASIC_MEM_TYPE_SHIFT 50
#define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU
#define VMX_BASIC_MEM_TYPE_WB 6LLU
#define VMX_BASIC_INOUT 0x0040000000000000LLU
/* MSR_IA32_VMX_MISC bits */
#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F
/* AMD-V MSRs */
#define MSR_VM_CR 0xc0010114
#define MSR_VM_IGNNE 0xc0010115
#define MSR_VM_HSAVE_PA 0xc0010117
#endif /* !SELFTEST_KVM_X86_H */
/*
* tools/testing/selftests/kvm/lib/assert.c
*
* Copyright (C) 2018, Google LLC.
*
* This work is licensed under the terms of the GNU GPL, version 2.
*/
#define _GNU_SOURCE /* for getline(3) and strchrnul(3)*/
#include "test_util.h"
#include <execinfo.h>
#include <sys/syscall.h>
/* Dumps the current stack trace to stderr. */
static void __attribute__((noinline)) test_dump_stack(void);
static void test_dump_stack(void)
{
/*
* Build and run this command:
*
* addr2line -s -e /proc/$PPID/exe -fpai {backtrace addresses} | \
* grep -v test_dump_stack | cat -n 1>&2
*
* Note that the spacing is different and there's no newline.
*/
size_t i;
size_t n = 20;
void *stack[n];
const char *addr2line = "addr2line -s -e /proc/$PPID/exe -fpai";
const char *pipeline = "|cat -n 1>&2";
char cmd[strlen(addr2line) + strlen(pipeline) +
/* N bytes per addr * 2 digits per byte + 1 space per addr: */
n * (((sizeof(void *)) * 2) + 1) +
/* Null terminator: */
1];
char *c;
n = backtrace(stack, n);
c = &cmd[0];
c += sprintf(c, "%s", addr2line);
/*
* Skip the first 3 frames: backtrace, test_dump_stack, and
* test_assert. We hope that backtrace isn't inlined and the other two
* we've declared noinline.
*/
for (i = 2; i < n; i++)
c += sprintf(c, " %lx", ((unsigned long) stack[i]) - 1);
c += sprintf(c, "%s", pipeline);
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
system(cmd);
#pragma GCC diagnostic pop
}
static pid_t gettid(void)
{
return syscall(SYS_gettid);
}
void __attribute__((noinline))
test_assert(bool exp, const char *exp_str,
const char *file, unsigned int line, const char *fmt, ...)
{
va_list ap;
if (!(exp)) {
va_start(ap, fmt);
fprintf(stderr, "==== Test Assertion Failure ====\n"
" %s:%u: %s\n"
" pid=%d tid=%d\n",
file, line, exp_str, getpid(), gettid());
test_dump_stack();
if (fmt) {
fputs(" ", stderr);
vfprintf(stderr, fmt, ap);
fputs("\n", stderr);
}
va_end(ap);
exit(254);
}
return;
}
/*
* tools/testing/selftests/kvm/lib/kvm_util.c
*
* Copyright (C) 2018, Google LLC.
*
* This work is licensed under the terms of the GNU GPL, version 2.
*/
#include "test_util.h"
#include "kvm_util.h"
#include "kvm_util_internal.h"
#include <assert.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#define KVM_DEV_PATH "/dev/kvm"
#define KVM_UTIL_PGS_PER_HUGEPG 512
#define KVM_UTIL_MIN_PADDR 0x2000
/* Aligns x up to the next multiple of size. Size must be a power of 2. */
static void *align(void *x, size_t size)
{
size_t mask = size - 1;
TEST_ASSERT(size != 0 && !(size & (size - 1)),
"size not a power of 2: %lu", size);
return (void *) (((size_t) x + mask) & ~mask);
}
/* Capability
*
* Input Args:
* cap - Capability
*
* Output Args: None
*
* Return:
* On success, the Value corresponding to the capability (KVM_CAP_*)
* specified by the value of cap. On failure a TEST_ASSERT failure
* is produced.
*
* Looks up and returns the value corresponding to the capability
* (KVM_CAP_*) given by cap.
*/
int kvm_check_cap(long cap)
{
int ret;
int kvm_fd;
kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i",
KVM_DEV_PATH, kvm_fd, errno);
ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n"
" rc: %i errno: %i", ret, errno);
close(kvm_fd);
return ret;
}
/* VM Create
*
* Input Args:
* mode - VM Mode (e.g. VM_MODE_FLAT48PG)
* phy_pages - Physical memory pages
* perm - permission
*
* Output Args: None
*
* Return:
* Pointer to opaque structure that describes the created VM.
*
* Creates a VM with the mode specified by mode (e.g. VM_MODE_FLAT48PG).
* When phy_pages is non-zero, a memory region of phy_pages physical pages
* is created and mapped starting at guest physical address 0. The file
* descriptor to control the created VM is created with the permissions
* given by perm (e.g. O_RDWR).
*/
struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
{
struct kvm_vm *vm;
int kvm_fd;
/* Allocate memory. */
vm = calloc(1, sizeof(*vm));
TEST_ASSERT(vm != NULL, "Insufficent Memory");
vm->mode = mode;
kvm_fd = open(KVM_DEV_PATH, perm);
TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i",
KVM_DEV_PATH, kvm_fd, errno);
/* Create VM. */
vm->fd = ioctl(kvm_fd, KVM_CREATE_VM, NULL);
TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
"rc: %i errno: %i", vm->fd, errno);
close(kvm_fd);
/* Setup mode specific traits. */
switch (vm->mode) {
case VM_MODE_FLAT48PG:
vm->page_size = 0x1000;
vm->page_shift = 12;
/* Limit to 48-bit canonical virtual addresses. */
vm->vpages_valid = sparsebit_alloc();
sparsebit_set_num(vm->vpages_valid,
0, (1ULL << (48 - 1)) >> vm->page_shift);
sparsebit_set_num(vm->vpages_valid,
(~((1ULL << (48 - 1)) - 1)) >> vm->page_shift,
(1ULL << (48 - 1)) >> vm->page_shift);
/* Limit physical addresses to 52-bits. */
vm->max_gfn = ((1ULL << 52) >> vm->page_shift) - 1;
break;
default:
TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode);
}
/* Allocate and setup memory for guest. */
vm->vpages_mapped = sparsebit_alloc();
if (phy_pages != 0)
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
0, 0, phy_pages, 0);
return vm;
}
/* Userspace Memory Region Find
*
* Input Args:
* vm - Virtual Machine
* start - Starting VM physical address
* end - Ending VM physical address, inclusive.
*
* Output Args: None
*
* Return:
* Pointer to overlapping region, NULL if no such region.
*
* Searches for a region with any physical memory that overlaps with
* any portion of the guest physical addresses from start to end
* inclusive. If multiple overlapping regions exist, a pointer to any
* of the regions is returned. Null is returned only when no overlapping
* region exists.
*/
static struct userspace_mem_region *userspace_mem_region_find(
struct kvm_vm *vm, uint64_t start, uint64_t end)
{
struct userspace_mem_region *region;
for (region = vm->userspace_mem_region_head; region;
region = region->next) {
uint64_t existing_start = region->region.guest_phys_addr;
uint64_t existing_end = region->region.guest_phys_addr
+ region->region.memory_size - 1;
if (start <= existing_end && end >= existing_start)
return region;
}
return NULL;
}
/* KVM Userspace Memory Region Find
*
* Input Args:
* vm - Virtual Machine
* start - Starting VM physical address
* end - Ending VM physical address, inclusive.
*
* Output Args: None
*
* Return:
* Pointer to overlapping region, NULL if no such region.
*
* Public interface to userspace_mem_region_find. Allows tests to look up
* the memslot datastructure for a given range of guest physical memory.
*/
struct kvm_userspace_memory_region *
kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
uint64_t end)
{
struct userspace_mem_region *region;
region = userspace_mem_region_find(vm, start, end);
if (!region)
return NULL;
return &region->region;
}
/* VCPU Find
*
* Input Args:
* vm - Virtual Machine
* vcpuid - VCPU ID
*
* Output Args: None
*
* Return:
* Pointer to VCPU structure
*
* Locates a vcpu structure that describes the VCPU specified by vcpuid and
* returns a pointer to it. Returns NULL if the VM doesn't contain a VCPU
* for the specified vcpuid.
*/
struct vcpu *vcpu_find(struct kvm_vm *vm,
uint32_t vcpuid)
{
struct vcpu *vcpup;
for (vcpup = vm->vcpu_head; vcpup; vcpup = vcpup->next) {
if (vcpup->id == vcpuid)
return vcpup;
}
return NULL;
}
/* VM VCPU Remove
*
* Input Args:
* vm - Virtual Machine
* vcpuid - VCPU ID
*
* Output Args: None
*
* Return: None, TEST_ASSERT failures for all error conditions
*
* Within the VM specified by vm, removes the VCPU given by vcpuid.
*/
static void vm_vcpu_rm(struct kvm_vm *vm, uint32_t vcpuid)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int ret = close(vcpu->fd);
TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i "
"errno: %i", ret, errno);
if (vcpu->next)
vcpu->next->prev = vcpu->prev;
if (vcpu->prev)
vcpu->prev->next = vcpu->next;
else
vm->vcpu_head = vcpu->next;
free(vcpu);
}
/* Destroys and frees the VM pointed to by vmp.
*/
void kvm_vm_free(struct kvm_vm *vmp)
{
int ret;
if (vmp == NULL)
return;
/* Free userspace_mem_regions. */
while (vmp->userspace_mem_region_head) {
struct userspace_mem_region *region
= vmp->userspace_mem_region_head;
region->region.memory_size = 0;
ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION,
&region->region);
TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, "
"rc: %i errno: %i", ret, errno);
vmp->userspace_mem_region_head = region->next;
sparsebit_free(&region->unused_phy_pages);
ret = munmap(region->mmap_start, region->mmap_size);
TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i",
ret, errno);
free(region);
}
/* Free VCPUs. */
while (vmp->vcpu_head)
vm_vcpu_rm(vmp, vmp->vcpu_head->id);
/* Free sparsebit arrays. */
sparsebit_free(&vmp->vpages_valid);
sparsebit_free(&vmp->vpages_mapped);
/* Close file descriptor for the VM. */
ret = close(vmp->fd);
TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
" vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno);
/* Free the structure describing the VM. */
free(vmp);
}
/* Memory Compare, host virtual to guest virtual
*
* Input Args:
* hva - Starting host virtual address
* vm - Virtual Machine
* gva - Starting guest virtual address
* len - number of bytes to compare
*
* Output Args: None
*
* Input/Output Args: None
*
* Return:
* Returns 0 if the bytes starting at hva for a length of len
* are equal the guest virtual bytes starting at gva. Returns
* a value < 0, if bytes at hva are less than those at gva.
* Otherwise a value > 0 is returned.
*
* Compares the bytes starting at the host virtual address hva, for
* a length of len, to the guest bytes starting at the guest virtual
* address given by gva.
*/
int kvm_memcmp_hva_gva(void *hva,
struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
{
size_t amt;
/* Compare a batch of bytes until either a match is found
* or all the bytes have been compared.
*/
for (uintptr_t offset = 0; offset < len; offset += amt) {
uintptr_t ptr1 = (uintptr_t)hva + offset;
/* Determine host address for guest virtual address
* at offset.
*/
uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset);
/* Determine amount to compare on this pass.
* Don't allow the comparsion to cross a page boundary.
*/
amt = len - offset;
if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift))
amt = vm->page_size - (ptr1 % vm->page_size);
if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift))
amt = vm->page_size - (ptr2 % vm->page_size);
assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift));
assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift));
/* Perform the comparison. If there is a difference
* return that result to the caller, otherwise need
* to continue on looking for a mismatch.
*/
int ret = memcmp((void *)ptr1, (void *)ptr2, amt);
if (ret != 0)
return ret;
}
/* No mismatch found. Let the caller know the two memory
* areas are equal.
*/
return 0;
}
/* Allocate an instance of struct kvm_cpuid2
*
* Input Args: None
*
* Output Args: None
*
* Return: A pointer to the allocated struct. The caller is responsible
* for freeing this struct.
*
* Since kvm_cpuid2 uses a 0-length array to allow a the size of the
* array to be decided at allocation time, allocation is slightly
* complicated. This function uses a reasonable default length for
* the array and performs the appropriate allocation.
*/
struct kvm_cpuid2 *allocate_kvm_cpuid2(void)
{
struct kvm_cpuid2 *cpuid;
int nent = 100;
size_t size;
size = sizeof(*cpuid);
size += nent * sizeof(struct kvm_cpuid_entry2);
cpuid = malloc(size);
if (!cpuid) {
perror("malloc");
abort();
}
cpuid->nent = nent;
return cpuid;
}
/* KVM Supported CPUID Get
*
* Input Args: None
*
* Output Args:
* cpuid - The supported KVM CPUID
*
* Return: void
*
* Get the guest CPUID supported by KVM.
*/
void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid)
{
int ret;
int kvm_fd;
kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i",
KVM_DEV_PATH, kvm_fd, errno);
ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
ret, errno);
close(kvm_fd);
}
/* Locate a cpuid entry.
*
* Input Args:
* cpuid: The cpuid.
* function: The function of the cpuid entry to find.
*
* Output Args: None
*
* Return: A pointer to the cpuid entry. Never returns NULL.
*/
struct kvm_cpuid_entry2 *
find_cpuid_index_entry(struct kvm_cpuid2 *cpuid, uint32_t function,
uint32_t index)
{
struct kvm_cpuid_entry2 *entry = NULL;
int i;
for (i = 0; i < cpuid->nent; i++) {
if (cpuid->entries[i].function == function &&
cpuid->entries[i].index == index) {
entry = &cpuid->entries[i];
break;
}
}
TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).",
function, index);
return entry;
}
/* VM Userspace Memory Region Add
*
* Input Args:
* vm - Virtual Machine
* backing_src - Storage source for this region.
* NULL to use anonymous memory.
* guest_paddr - Starting guest physical address
* slot - KVM region slot
* npages - Number of physical pages
* flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES)
*
* Output Args: None
*
* Return: None
*
* Allocates a memory area of the number of pages specified by npages
* and maps it to the VM specified by vm, at a starting physical address
* given by guest_paddr. The region is created with a KVM region slot
* given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The
* region is created with the flags given by flags.
*/
void vm_userspace_mem_region_add(struct kvm_vm *vm,
enum vm_mem_backing_src_type src_type,
uint64_t guest_paddr, uint32_t slot, uint64_t npages,
uint32_t flags)
{
int ret;
unsigned long pmem_size = 0;
struct userspace_mem_region *region;
size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size;
TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical "
"address not on a page boundary.\n"
" guest_paddr: 0x%lx vm->page_size: 0x%x",
guest_paddr, vm->page_size);
TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1)
<= vm->max_gfn, "Physical range beyond maximum "
"supported physical address,\n"
" guest_paddr: 0x%lx npages: 0x%lx\n"
" vm->max_gfn: 0x%lx vm->page_size: 0x%x",
guest_paddr, npages, vm->max_gfn, vm->page_size);
/* Confirm a mem region with an overlapping address doesn't
* already exist.
*/
region = (struct userspace_mem_region *) userspace_mem_region_find(
vm, guest_paddr, guest_paddr + npages * vm->page_size);
if (region != NULL)
TEST_ASSERT(false, "overlapping userspace_mem_region already "
"exists\n"
" requested guest_paddr: 0x%lx npages: 0x%lx "
"page_size: 0x%x\n"
" existing guest_paddr: 0x%lx size: 0x%lx",
guest_paddr, npages, vm->page_size,
(uint64_t) region->region.guest_phys_addr,
(uint64_t) region->region.memory_size);
/* Confirm no region with the requested slot already exists. */
for (region = vm->userspace_mem_region_head; region;
region = region->next) {
if (region->region.slot == slot)
break;
if ((guest_paddr <= (region->region.guest_phys_addr
+ region->region.memory_size))
&& ((guest_paddr + npages * vm->page_size)
>= region->region.guest_phys_addr))
break;
}
if (region != NULL)
TEST_ASSERT(false, "A mem region with the requested slot "
"or overlapping physical memory range already exists.\n"
" requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
" existing slot: %u paddr: 0x%lx size: 0x%lx",
slot, guest_paddr, npages,
region->region.slot,
(uint64_t) region->region.guest_phys_addr,
(uint64_t) region->region.memory_size);
/* Allocate and initialize new mem region structure. */
region = calloc(1, sizeof(*region));
TEST_ASSERT(region != NULL, "Insufficient Memory");
region->mmap_size = npages * vm->page_size;
/* Enough memory to align up to a huge page. */
if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
region->mmap_size += huge_page_size;
region->mmap_start = mmap(NULL, region->mmap_size,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS
| (src_type == VM_MEM_SRC_ANONYMOUS_HUGETLB ? MAP_HUGETLB : 0),
-1, 0);
TEST_ASSERT(region->mmap_start != MAP_FAILED,
"test_malloc failed, mmap_start: %p errno: %i",
region->mmap_start, errno);
/* Align THP allocation up to start of a huge page. */
region->host_mem = align(region->mmap_start,
src_type == VM_MEM_SRC_ANONYMOUS_THP ? huge_page_size : 1);
/* As needed perform madvise */
if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) {
ret = madvise(region->host_mem, npages * vm->page_size,
src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
TEST_ASSERT(ret == 0, "madvise failed,\n"
" addr: %p\n"
" length: 0x%lx\n"
" src_type: %x",
region->host_mem, npages * vm->page_size, src_type);
}
region->unused_phy_pages = sparsebit_alloc();
sparsebit_set_num(region->unused_phy_pages,
guest_paddr >> vm->page_shift, npages);
region->region.slot = slot;
region->region.flags = flags;
region->region.guest_phys_addr = guest_paddr;
region->region.memory_size = npages * vm->page_size;
region->region.userspace_addr = (uintptr_t) region->host_mem;
ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
" rc: %i errno: %i\n"
" slot: %u flags: 0x%x\n"
" guest_phys_addr: 0x%lx size: 0x%lx",
ret, errno, slot, flags,
guest_paddr, (uint64_t) region->region.memory_size);
/* Add to linked-list of memory regions. */
if (vm->userspace_mem_region_head)
vm->userspace_mem_region_head->prev = region;
region->next = vm->userspace_mem_region_head;
vm->userspace_mem_region_head = region;
}
/* Memslot to region
*
* Input Args:
* vm - Virtual Machine
* memslot - KVM memory slot ID
*
* Output Args: None
*
* Return:
* Pointer to memory region structure that describe memory region
* using kvm memory slot ID given by memslot. TEST_ASSERT failure
* on error (e.g. currently no memory region using memslot as a KVM
* memory slot ID).
*/
static struct userspace_mem_region *memslot2region(struct kvm_vm *vm,
uint32_t memslot)
{
struct userspace_mem_region *region;
for (region = vm->userspace_mem_region_head; region;
region = region->next) {
if (region->region.slot == memslot)
break;
}
if (region == NULL) {
fprintf(stderr, "No mem region with the requested slot found,\n"
" requested slot: %u\n", memslot);
fputs("---- vm dump ----\n", stderr);
vm_dump(stderr, vm, 2);
TEST_ASSERT(false, "Mem region not found");
}
return region;
}
/* VM Memory Region Flags Set
*
* Input Args:
* vm - Virtual Machine
* flags - Starting guest physical address
*
* Output Args: None
*
* Return: None
*
* Sets the flags of the memory region specified by the value of slot,
* to the values given by flags.
*/
void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
{
int ret;
struct userspace_mem_region *region;
/* Locate memory region. */
region = memslot2region(vm, slot);
region->region.flags = flags;
ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
" rc: %i errno: %i slot: %u flags: 0x%x",
ret, errno, slot, flags);
}
/* VCPU mmap Size
*
* Input Args: None
*
* Output Args: None
*
* Return:
* Size of VCPU state
*
* Returns the size of the structure pointed to by the return value
* of vcpu_state().
*/
static int vcpu_mmap_sz(void)
{
int dev_fd, ret;
dev_fd = open(KVM_DEV_PATH, O_RDONLY);
TEST_ASSERT(dev_fd >= 0, "%s open %s failed, rc: %i errno: %i",
__func__, KVM_DEV_PATH, dev_fd, errno);
ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
TEST_ASSERT(ret >= sizeof(struct kvm_run),
"%s KVM_GET_VCPU_MMAP_SIZE ioctl failed, rc: %i errno: %i",
__func__, ret, errno);
close(dev_fd);
return ret;
}
/* VM VCPU Add
*
* Input Args:
* vm - Virtual Machine
* vcpuid - VCPU ID
*
* Output Args: None
*
* Return: None
*
* Creates and adds to the VM specified by vm and virtual CPU with
* the ID given by vcpuid.
*/
void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
{
struct vcpu *vcpu;
/* Confirm a vcpu with the specified id doesn't already exist. */
vcpu = vcpu_find(vm, vcpuid);
if (vcpu != NULL)
TEST_ASSERT(false, "vcpu with the specified id "
"already exists,\n"
" requested vcpuid: %u\n"
" existing vcpuid: %u state: %p",
vcpuid, vcpu->id, vcpu->state);
/* Allocate and initialize new vcpu structure. */
vcpu = calloc(1, sizeof(*vcpu));
TEST_ASSERT(vcpu != NULL, "Insufficient Memory");
vcpu->id = vcpuid;
vcpu->fd = ioctl(vm->fd, KVM_CREATE_VCPU, vcpuid);
TEST_ASSERT(vcpu->fd >= 0, "KVM_CREATE_VCPU failed, rc: %i errno: %i",
vcpu->fd, errno);
TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size "
"smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
vcpu_mmap_sz(), sizeof(*vcpu->state));
vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state),
PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, "
"vcpu id: %u errno: %i", vcpuid, errno);
/* Add to linked-list of VCPUs. */
if (vm->vcpu_head)
vm->vcpu_head->prev = vcpu;
vcpu->next = vm->vcpu_head;
vm->vcpu_head = vcpu;
vcpu_setup(vm, vcpuid);
}
/* VM Virtual Address Unused Gap
*
* Input Args:
* vm - Virtual Machine
* sz - Size (bytes)
* vaddr_min - Minimum Virtual Address
*
* Output Args: None
*
* Return:
* Lowest virtual address at or below vaddr_min, with at least
* sz unused bytes. TEST_ASSERT failure if no area of at least
* size sz is available.
*
* Within the VM specified by vm, locates the lowest starting virtual
* address >= vaddr_min, that has at least sz unallocated bytes. A
* TEST_ASSERT failure occurs for invalid input or no area of at least
* sz unallocated bytes >= vaddr_min is available.
*/
static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
vm_vaddr_t vaddr_min)
{
uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift;
/* Determine lowest permitted virtual page index. */
uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift;
if ((pgidx_start * vm->page_size) < vaddr_min)
goto no_va_found;
/* Loop over section with enough valid virtual page indexes. */
if (!sparsebit_is_set_num(vm->vpages_valid,
pgidx_start, pages))
pgidx_start = sparsebit_next_set_num(vm->vpages_valid,
pgidx_start, pages);
do {
/*
* Are there enough unused virtual pages available at
* the currently proposed starting virtual page index.
* If not, adjust proposed starting index to next
* possible.
*/
if (sparsebit_is_clear_num(vm->vpages_mapped,
pgidx_start, pages))
goto va_found;
pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped,
pgidx_start, pages);
if (pgidx_start == 0)
goto no_va_found;
/*
* If needed, adjust proposed starting virtual address,
* to next range of valid virtual addresses.
*/
if (!sparsebit_is_set_num(vm->vpages_valid,
pgidx_start, pages)) {
pgidx_start = sparsebit_next_set_num(
vm->vpages_valid, pgidx_start, pages);
if (pgidx_start == 0)
goto no_va_found;
}
} while (pgidx_start != 0);
no_va_found:
TEST_ASSERT(false, "No vaddr of specified pages available, "
"pages: 0x%lx", pages);
/* NOT REACHED */
return -1;
va_found:
TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid,
pgidx_start, pages),
"Unexpected, invalid virtual page index range,\n"
" pgidx_start: 0x%lx\n"
" pages: 0x%lx",
pgidx_start, pages);
TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped,
pgidx_start, pages),
"Unexpected, pages already mapped,\n"
" pgidx_start: 0x%lx\n"
" pages: 0x%lx",
pgidx_start, pages);
return pgidx_start * vm->page_size;
}
/* VM Virtual Address Allocate
*
* Input Args:
* vm - Virtual Machine
* sz - Size in bytes
* vaddr_min - Minimum starting virtual address
* data_memslot - Memory region slot for data pages
* pgd_memslot - Memory region slot for new virtual translation tables
*
* Output Args: None
*
* Return:
* Starting guest virtual address
*
* Allocates at least sz bytes within the virtual address space of the vm
* given by vm. The allocated bytes are mapped to a virtual address >=
* the address given by vaddr_min. Note that each allocation uses a
* a unique set of pages, with the minimum real allocation being at least
* a page.
*/
vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
uint32_t data_memslot, uint32_t pgd_memslot)
{
uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
virt_pgd_alloc(vm, pgd_memslot);
/* Find an unused range of virtual page addresses of at least
* pages in length.
*/
vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min);
/* Map the virtual pages. */
for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
pages--, vaddr += vm->page_size) {
vm_paddr_t paddr;
paddr = vm_phy_page_alloc(vm, KVM_UTIL_MIN_PADDR, data_memslot);
virt_pg_map(vm, vaddr, paddr, pgd_memslot);
sparsebit_set(vm->vpages_mapped,
vaddr >> vm->page_shift);
}
return vaddr_start;
}
/* Address VM Physical to Host Virtual
*
* Input Args:
* vm - Virtual Machine
* gpa - VM physical address
*
* Output Args: None
*
* Return:
* Equivalent host virtual address
*
* Locates the memory region containing the VM physical address given
* by gpa, within the VM given by vm. When found, the host virtual
* address providing the memory to the vm physical address is returned.
* A TEST_ASSERT failure occurs if no region containing gpa exists.
*/
void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
{
struct userspace_mem_region *region;
for (region = vm->userspace_mem_region_head; region;
region = region->next) {
if ((gpa >= region->region.guest_phys_addr)
&& (gpa <= (region->region.guest_phys_addr
+ region->region.memory_size - 1)))
return (void *) ((uintptr_t) region->host_mem
+ (gpa - region->region.guest_phys_addr));
}
TEST_ASSERT(false, "No vm physical memory at 0x%lx", gpa);
return NULL;
}
/* Address Host Virtual to VM Physical
*
* Input Args:
* vm - Virtual Machine
* hva - Host virtual address
*
* Output Args: None
*
* Return:
* Equivalent VM physical address
*
* Locates the memory region containing the host virtual address given
* by hva, within the VM given by vm. When found, the equivalent
* VM physical address is returned. A TEST_ASSERT failure occurs if no
* region containing hva exists.
*/
vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
{
struct userspace_mem_region *region;
for (region = vm->userspace_mem_region_head; region;
region = region->next) {
if ((hva >= region->host_mem)
&& (hva <= (region->host_mem
+ region->region.memory_size - 1)))
return (vm_paddr_t) ((uintptr_t)
region->region.guest_phys_addr
+ (hva - (uintptr_t) region->host_mem));
}
TEST_ASSERT(false, "No mapping to a guest physical address, "
"hva: %p", hva);
return -1;
}
/* VM Create IRQ Chip
*
* Input Args:
* vm - Virtual Machine
*
* Output Args: None
*
* Return: None
*
* Creates an interrupt controller chip for the VM specified by vm.
*/
void vm_create_irqchip(struct kvm_vm *vm)
{
int ret;
ret = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0);
TEST_ASSERT(ret == 0, "KVM_CREATE_IRQCHIP IOCTL failed, "
"rc: %i errno: %i", ret, errno);
}
/* VM VCPU State
*
* Input Args:
* vm - Virtual Machine
* vcpuid - VCPU ID
*
* Output Args: None
*
* Return:
* Pointer to structure that describes the state of the VCPU.
*
* Locates and returns a pointer to a structure that describes the
* state of the VCPU with the given vcpuid.
*/
struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
return vcpu->state;
}
/* VM VCPU Run
*
* Input Args:
* vm - Virtual Machine
* vcpuid - VCPU ID
*
* Output Args: None
*
* Return: None
*
* Switch to executing the code for the VCPU given by vcpuid, within the VM
* given by vm.
*/
void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
{
int ret = _vcpu_run(vm, vcpuid);
TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
"rc: %i errno: %i", ret, errno);
}
int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int rc;
TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
do {
rc = ioctl(vcpu->fd, KVM_RUN, NULL);
} while (rc == -1 && errno == EINTR);
return rc;
}
/* VM VCPU Set MP State
*
* Input Args:
* vm - Virtual Machine
* vcpuid - VCPU ID
* mp_state - mp_state to be set
*
* Output Args: None
*
* Return: None
*
* Sets the MP state of the VCPU given by vcpuid, to the state given
* by mp_state.
*/
void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_mp_state *mp_state)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int ret;
TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
ret = ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state);
TEST_ASSERT(ret == 0, "KVM_SET_MP_STATE IOCTL failed, "
"rc: %i errno: %i", ret, errno);
}
/* VM VCPU Regs Get
*
* Input Args:
* vm - Virtual Machine
* vcpuid - VCPU ID
*
* Output Args:
* regs - current state of VCPU regs
*
* Return: None
*
* Obtains the current register state for the VCPU specified by vcpuid
* and stores it at the location given by regs.
*/
void vcpu_regs_get(struct kvm_vm *vm,
uint32_t vcpuid, struct kvm_regs *regs)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int ret;
TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
/* Get the regs. */
ret = ioctl(vcpu->fd, KVM_GET_REGS, regs);
TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i",
ret, errno);
}
/* VM VCPU Regs Set
*
* Input Args:
* vm - Virtual Machine
* vcpuid - VCPU ID
* regs - Values to set VCPU regs to
*
* Output Args: None
*
* Return: None
*
* Sets the regs of the VCPU specified by vcpuid to the values
* given by regs.
*/
void vcpu_regs_set(struct kvm_vm *vm,
uint32_t vcpuid, struct kvm_regs *regs)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int ret;
TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
/* Set the regs. */
ret = ioctl(vcpu->fd, KVM_SET_REGS, regs);
TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i",
ret, errno);
}
void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_vcpu_events *events)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int ret;
TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
/* Get the regs. */
ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events);
TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i",
ret, errno);
}
void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_vcpu_events *events)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int ret;
TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
/* Set the regs. */
ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events);
TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i",
ret, errno);
}
/* VM VCPU Args Set
*
* Input Args:
* vm - Virtual Machine
* vcpuid - VCPU ID
* num - number of arguments
* ... - arguments, each of type uint64_t
*
* Output Args: None
*
* Return: None
*
* Sets the first num function input arguments to the values
* given as variable args. Each of the variable args is expected to
* be of type uint64_t.
*/
void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
{
va_list ap;
struct kvm_regs regs;
TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
" num: %u\n",
num);
va_start(ap, num);
vcpu_regs_get(vm, vcpuid, &regs);
if (num >= 1)
regs.rdi = va_arg(ap, uint64_t);
if (num >= 2)
regs.rsi = va_arg(ap, uint64_t);
if (num >= 3)
regs.rdx = va_arg(ap, uint64_t);
if (num >= 4)
regs.rcx = va_arg(ap, uint64_t);
if (num >= 5)
regs.r8 = va_arg(ap, uint64_t);
if (num >= 6)
regs.r9 = va_arg(ap, uint64_t);
vcpu_regs_set(vm, vcpuid, &regs);
va_end(ap);
}
/* VM VCPU System Regs Get
*
* Input Args:
* vm - Virtual Machine
* vcpuid - VCPU ID
*
* Output Args:
* sregs - current state of VCPU system regs
*
* Return: None
*
* Obtains the current system register state for the VCPU specified by
* vcpuid and stores it at the location given by sregs.
*/
void vcpu_sregs_get(struct kvm_vm *vm,
uint32_t vcpuid, struct kvm_sregs *sregs)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int ret;
TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
/* Get the regs. */
/* Get the regs. */
ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i",
ret, errno);
}
/* VM VCPU System Regs Set
*
* Input Args:
* vm - Virtual Machine
* vcpuid - VCPU ID
* sregs - Values to set VCPU system regs to
*
* Output Args: None
*
* Return: None
*
* Sets the system regs of the VCPU specified by vcpuid to the values
* given by sregs.
*/
void vcpu_sregs_set(struct kvm_vm *vm,
uint32_t vcpuid, struct kvm_sregs *sregs)
{
int ret = _vcpu_sregs_set(vm, vcpuid, sregs);
TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
"rc: %i errno: %i", ret, errno);
}
int _vcpu_sregs_set(struct kvm_vm *vm,
uint32_t vcpuid, struct kvm_sregs *sregs)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int ret;
TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
/* Get the regs. */
return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
}
/* VCPU Ioctl
*
* Input Args:
* vm - Virtual Machine
* vcpuid - VCPU ID
* cmd - Ioctl number
* arg - Argument to pass to the ioctl
*
* Return: None
*
* Issues an arbitrary ioctl on a VCPU fd.
*/
void vcpu_ioctl(struct kvm_vm *vm,
uint32_t vcpuid, unsigned long cmd, void *arg)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int ret;
TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
ret = ioctl(vcpu->fd, cmd, arg);
TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)",
cmd, ret, errno, strerror(errno));
}
/* VM Ioctl
*
* Input Args:
* vm - Virtual Machine
* cmd - Ioctl number
* arg - Argument to pass to the ioctl
*
* Return: None
*
* Issues an arbitrary ioctl on a VM fd.
*/
void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
{
int ret;
ret = ioctl(vm->fd, cmd, arg);
TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)",
cmd, ret, errno, strerror(errno));
}
/* VM Dump
*
* Input Args:
* vm - Virtual Machine
* indent - Left margin indent amount
*
* Output Args:
* stream - Output FILE stream
*
* Return: None
*
* Dumps the current state of the VM given by vm, to the FILE stream
* given by stream.
*/
void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
struct userspace_mem_region *region;
struct vcpu *vcpu;
fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode);
fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);
fprintf(stream, "%*sMem Regions:\n", indent, "");
for (region = vm->userspace_mem_region_head; region;
region = region->next) {
fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
"host_virt: %p\n", indent + 2, "",
(uint64_t) region->region.guest_phys_addr,
(uint64_t) region->region.memory_size,
region->host_mem);
fprintf(stream, "%*sunused_phy_pages: ", indent + 2, "");
sparsebit_dump(stream, region->unused_phy_pages, 0);
}
fprintf(stream, "%*sMapped Virtual Pages:\n", indent, "");
sparsebit_dump(stream, vm->vpages_mapped, indent + 2);
fprintf(stream, "%*spgd_created: %u\n", indent, "",
vm->pgd_created);
if (vm->pgd_created) {
fprintf(stream, "%*sVirtual Translation Tables:\n",
indent + 2, "");
virt_dump(stream, vm, indent + 4);
}
fprintf(stream, "%*sVCPUs:\n", indent, "");
for (vcpu = vm->vcpu_head; vcpu; vcpu = vcpu->next)
vcpu_dump(stream, vm, vcpu->id, indent + 2);
}
/* VM VCPU Dump
*
* Input Args:
* vm - Virtual Machine
* vcpuid - VCPU ID
* indent - Left margin indent amount
*
* Output Args:
* stream - Output FILE stream
*
* Return: None
*
* Dumps the current state of the VCPU specified by vcpuid, within the VM
* given by vm, to the FILE stream given by stream.
*/
void vcpu_dump(FILE *stream, struct kvm_vm *vm,
uint32_t vcpuid, uint8_t indent)
{
struct kvm_regs regs;
struct kvm_sregs sregs;
fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid);
fprintf(stream, "%*sregs:\n", indent + 2, "");
vcpu_regs_get(vm, vcpuid, &regs);
regs_dump(stream, &regs, indent + 4);
fprintf(stream, "%*ssregs:\n", indent + 2, "");
vcpu_sregs_get(vm, vcpuid, &sregs);
sregs_dump(stream, &sregs, indent + 4);
}
/* Known KVM exit reasons */
static struct exit_reason {
unsigned int reason;
const char *name;
} exit_reasons_known[] = {
{KVM_EXIT_UNKNOWN, "UNKNOWN"},
{KVM_EXIT_EXCEPTION, "EXCEPTION"},
{KVM_EXIT_IO, "IO"},
{KVM_EXIT_HYPERCALL, "HYPERCALL"},
{KVM_EXIT_DEBUG, "DEBUG"},
{KVM_EXIT_HLT, "HLT"},
{KVM_EXIT_MMIO, "MMIO"},
{KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"},
{KVM_EXIT_SHUTDOWN, "SHUTDOWN"},
{KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"},
{KVM_EXIT_INTR, "INTR"},
{KVM_EXIT_SET_TPR, "SET_TPR"},
{KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"},
{KVM_EXIT_S390_SIEIC, "S390_SIEIC"},
{KVM_EXIT_S390_RESET, "S390_RESET"},
{KVM_EXIT_DCR, "DCR"},
{KVM_EXIT_NMI, "NMI"},
{KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"},
{KVM_EXIT_OSI, "OSI"},
{KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"},
#ifdef KVM_EXIT_MEMORY_NOT_PRESENT
{KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
#endif
};
/* Exit Reason String
*
* Input Args:
* exit_reason - Exit reason
*
* Output Args: None
*
* Return:
* Constant string pointer describing the exit reason.
*
* Locates and returns a constant string that describes the KVM exit
* reason given by exit_reason. If no such string is found, a constant
* string of "Unknown" is returned.
*/
const char *exit_reason_str(unsigned int exit_reason)
{
unsigned int n1;
for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) {
if (exit_reason == exit_reasons_known[n1].reason)
return exit_reasons_known[n1].name;
}
return "Unknown";
}
/* Physical Page Allocate
*
* Input Args:
* vm - Virtual Machine
* paddr_min - Physical address minimum
* memslot - Memory region to allocate page from
*
* Output Args: None
*
* Return:
* Starting physical address
*
* Within the VM specified by vm, locates an available physical page
* at or above paddr_min. If found, the page is marked as in use
* and its address is returned. A TEST_ASSERT failure occurs if no
* page is available at or above paddr_min.
*/
vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm,
vm_paddr_t paddr_min, uint32_t memslot)
{
struct userspace_mem_region *region;
sparsebit_idx_t pg;
TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address "
"not divisable by page size.\n"
" paddr_min: 0x%lx page_size: 0x%x",
paddr_min, vm->page_size);
/* Locate memory region. */
region = memslot2region(vm, memslot);
/* Locate next available physical page at or above paddr_min. */
pg = paddr_min >> vm->page_shift;
if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
pg = sparsebit_next_set(region->unused_phy_pages, pg);
if (pg == 0) {
fprintf(stderr, "No guest physical page available, "
"paddr_min: 0x%lx page_size: 0x%x memslot: %u",
paddr_min, vm->page_size, memslot);
fputs("---- vm dump ----\n", stderr);
vm_dump(stderr, vm, 2);
abort();
}
}
/* Specify page as in use and return its address. */
sparsebit_clear(region->unused_phy_pages, pg);
return pg * vm->page_size;
}
/* Address Guest Virtual to Host Virtual
*
* Input Args:
* vm - Virtual Machine
* gva - VM virtual address
*
* Output Args: None
*
* Return:
* Equivalent host virtual address
*/
void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva)
{
return addr_gpa2hva(vm, addr_gva2gpa(vm, gva));
}
/*
* tools/testing/selftests/kvm/lib/kvm_util.c
*
* Copyright (C) 2018, Google LLC.
*
* This work is licensed under the terms of the GNU GPL, version 2.
*/
#ifndef KVM_UTIL_INTERNAL_H
#define KVM_UTIL_INTERNAL_H 1
#include "sparsebit.h"
#ifndef BITS_PER_BYTE
#define BITS_PER_BYTE 8
#endif
#ifndef BITS_PER_LONG
#define BITS_PER_LONG (BITS_PER_BYTE * sizeof(long))
#endif
#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG)
/* Concrete definition of struct kvm_vm. */
struct userspace_mem_region {
struct userspace_mem_region *next, *prev;
struct kvm_userspace_memory_region region;
struct sparsebit *unused_phy_pages;
int fd;
off_t offset;
void *host_mem;
void *mmap_start;
size_t mmap_size;
};
struct vcpu {
struct vcpu *next, *prev;
uint32_t id;
int fd;
struct kvm_run *state;
};
struct kvm_vm {
int mode;
int fd;
unsigned int page_size;
unsigned int page_shift;
uint64_t max_gfn;
struct vcpu *vcpu_head;
struct userspace_mem_region *userspace_mem_region_head;
struct sparsebit *vpages_valid;
struct sparsebit *vpages_mapped;
bool pgd_created;
vm_paddr_t pgd;
};
struct vcpu *vcpu_find(struct kvm_vm *vm,
uint32_t vcpuid);
void vcpu_setup(struct kvm_vm *vm, int vcpuid);
void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
void regs_dump(FILE *stream, struct kvm_regs *regs,
uint8_t indent);
void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
uint8_t indent);
#endif
/*
* Sparse bit array
*
* Copyright (C) 2018, Google LLC.
* Copyright (C) 2018, Red Hat, Inc. (code style cleanup and fuzzing driver)
*
* This work is licensed under the terms of the GNU GPL, version 2.
*
* This library provides functions to support a memory efficient bit array,
* with an index size of 2^64. A sparsebit array is allocated through
* the use sparsebit_alloc() and free'd via sparsebit_free(),
* such as in the following:
*
* struct sparsebit *s;
* s = sparsebit_alloc();
* sparsebit_free(&s);
*
* The struct sparsebit type resolves down to a struct sparsebit.
* Note that, sparsebit_free() takes a pointer to the sparsebit
* structure. This is so that sparsebit_free() is able to poison
* the pointer (e.g. set it to NULL) to the struct sparsebit before
* returning to the caller.
*
* Between the return of sparsebit_alloc() and the call of
* sparsebit_free(), there are multiple query and modifying operations
* that can be performed on the allocated sparsebit array. All of
* these operations take as a parameter the value returned from
* sparsebit_alloc() and most also take a bit index. Frequently
* used routines include:
*
* ---- Query Operations
* sparsebit_is_set(s, idx)
* sparsebit_is_clear(s, idx)
* sparsebit_any_set(s)
* sparsebit_first_set(s)
* sparsebit_next_set(s, prev_idx)
*
* ---- Modifying Operations
* sparsebit_set(s, idx)
* sparsebit_clear(s, idx)
* sparsebit_set_num(s, idx, num);
* sparsebit_clear_num(s, idx, num);
*
* A common operation, is to itterate over all the bits set in a test
* sparsebit array. This can be done via code with the following structure:
*
* sparsebit_idx_t idx;
* if (sparsebit_any_set(s)) {
* idx = sparsebit_first_set(s);
* do {
* ...
* idx = sparsebit_next_set(s, idx);
* } while (idx != 0);
* }
*
* The index of the first bit set needs to be obtained via
* sparsebit_first_set(), because sparsebit_next_set(), needs
* the index of the previously set. The sparsebit_idx_t type is
* unsigned, so there is no previous index before 0 that is available.
* Also, the call to sparsebit_first_set() is not made unless there
* is at least 1 bit in the array set. This is because sparsebit_first_set()
* aborts if sparsebit_first_set() is called with no bits set.
* It is the callers responsibility to assure that the
* sparsebit array has at least a single bit set before calling
* sparsebit_first_set().
*
* ==== Implementation Overview ====
* For the most part the internal implementation of sparsebit is
* opaque to the caller. One important implementation detail that the
* caller may need to be aware of is the spatial complexity of the
* implementation. This implementation of a sparsebit array is not
* only sparse, in that it uses memory proportional to the number of bits
* set. It is also efficient in memory usage when most of the bits are
* set.
*
* At a high-level the state of the bit settings are maintained through
* the use of a binary-search tree, where each node contains at least
* the following members:
*
* typedef uint64_t sparsebit_idx_t;
* typedef uint64_t sparsebit_num_t;
*
* sparsebit_idx_t idx;
* uint32_t mask;
* sparsebit_num_t num_after;
*
* The idx member contains the bit index of the first bit described by this
* node, while the mask member stores the setting of the first 32-bits.
* The setting of the bit at idx + n, where 0 <= n < 32, is located in the
* mask member at 1 << n.
*
* Nodes are sorted by idx and the bits described by two nodes will never
* overlap. The idx member is always aligned to the mask size, i.e. a
* multiple of 32.
*
* Beyond a typical implementation, the nodes in this implementation also
* contains a member named num_after. The num_after member holds the
* number of bits immediately after the mask bits that are contiguously set.
* The use of the num_after member allows this implementation to efficiently
* represent cases where most bits are set. For example, the case of all
* but the last two bits set, is represented by the following two nodes:
*
* node 0 - idx: 0x0 mask: 0xffffffff num_after: 0xffffffffffffffc0
* node 1 - idx: 0xffffffffffffffe0 mask: 0x3fffffff num_after: 0
*
* ==== Invariants ====
* This implementation usses the following invariants:
*
* + Node are only used to represent bits that are set.
* Nodes with a mask of 0 and num_after of 0 are not allowed.
*
* + Sum of bits set in all the nodes is equal to the value of
* the struct sparsebit_pvt num_set member.
*
* + The setting of at least one bit is always described in a nodes
* mask (mask >= 1).
*
* + A node with all mask bits set only occurs when the last bit
* described by the previous node is not equal to this nodes
* starting index - 1. All such occurences of this condition are
* avoided by moving the setting of the nodes mask bits into
* the previous nodes num_after setting.
*
* + Node starting index is evenly divisable by the number of bits
* within a nodes mask member.
*
* + Nodes never represent a range of bits that wrap around the
* highest supported index.
*
* (idx + MASK_BITS + num_after - 1) <= ((sparsebit_idx_t) 0) - 1)
*
* As a consequence of the above, the num_after member of a node
* will always be <=:
*
* maximum_index - nodes_starting_index - number_of_mask_bits
*
* + Nodes within the binary search tree are sorted based on each
* nodes starting index.
*
* + The range of bits described by any two nodes do not overlap. The
* range of bits described by a single node is:
*
* start: node->idx
* end (inclusive): node->idx + MASK_BITS + node->num_after - 1;
*
* Note, at times these invariants are temporarily violated for a
* specific portion of the code. For example, when setting a mask
* bit, there is a small delay between when the mask bit is set and the
* value in the struct sparsebit_pvt num_set member is updated. Other
* temporary violations occur when node_split() is called with a specified
* index and assures that a node where its mask represents the bit
* at the specified index exists. At times to do this node_split()
* must split an existing node into two nodes or create a node that
* has no bits set. Such temporary violations must be corrected before
* returning to the caller. These corrections are typically performed
* by the local function node_reduce().
*/
#include "test_util.h"
#include "sparsebit.h"
#include <limits.h>
#include <assert.h>
#define DUMP_LINE_MAX 100 /* Does not include indent amount */
typedef uint32_t mask_t;
#define MASK_BITS (sizeof(mask_t) * CHAR_BIT)
struct node {
struct node *parent;
struct node *left;
struct node *right;
sparsebit_idx_t idx; /* index of least-significant bit in mask */
sparsebit_num_t num_after; /* num contiguously set after mask */
mask_t mask;
};
struct sparsebit {
/*
* Points to root node of the binary search
* tree. Equal to NULL when no bits are set in
* the entire sparsebit array.
*/
struct node *root;
/*
* A redundant count of the total number of bits set. Used for
* diagnostic purposes and to change the time complexity of
* sparsebit_num_set() from O(n) to O(1).
* Note: Due to overflow, a value of 0 means none or all set.
*/
sparsebit_num_t num_set;
};
/* Returns the number of set bits described by the settings
* of the node pointed to by nodep.
*/
static sparsebit_num_t node_num_set(struct node *nodep)
{
return nodep->num_after + __builtin_popcount(nodep->mask);
}
/* Returns a pointer to the node that describes the
* lowest bit index.
*/
static struct node *node_first(struct sparsebit *s)
{
struct node *nodep;
for (nodep = s->root; nodep && nodep->left; nodep = nodep->left)
;
return nodep;
}
/* Returns a pointer to the node that describes the
* lowest bit index > the index of the node pointed to by np.
* Returns NULL if no node with a higher index exists.
*/
static struct node *node_next(struct sparsebit *s, struct node *np)
{
struct node *nodep = np;
/*
* If current node has a right child, next node is the left-most
* of the right child.
*/
if (nodep->right) {
for (nodep = nodep->right; nodep->left; nodep = nodep->left)
;
return nodep;
}
/*
* No right child. Go up until node is left child of a parent.
* That parent is then the next node.
*/
while (nodep->parent && nodep == nodep->parent->right)
nodep = nodep->parent;
return nodep->parent;
}
/* Searches for and returns a pointer to the node that describes the
* highest index < the index of the node pointed to by np.
* Returns NULL if no node with a lower index exists.
*/
static struct node *node_prev(struct sparsebit *s, struct node *np)
{
struct node *nodep = np;
/*
* If current node has a left child, next node is the right-most
* of the left child.
*/
if (nodep->left) {
for (nodep = nodep->left; nodep->right; nodep = nodep->right)
;
return (struct node *) nodep;
}
/*
* No left child. Go up until node is right child of a parent.
* That parent is then the next node.
*/
while (nodep->parent && nodep == nodep->parent->left)
nodep = nodep->parent;
return (struct node *) nodep->parent;
}
/* Allocates space to hold a copy of the node sub-tree pointed to by
* subtree and duplicates the bit settings to the newly allocated nodes.
* Returns the newly allocated copy of subtree.
*/
static struct node *node_copy_subtree(struct node *subtree)
{
struct node *root;
/* Duplicate the node at the root of the subtree */
root = calloc(1, sizeof(*root));
if (!root) {
perror("calloc");
abort();
}
root->idx = subtree->idx;
root->mask = subtree->mask;
root->num_after = subtree->num_after;
/* As needed, recursively duplicate the left and right subtrees */
if (subtree->left) {
root->left = node_copy_subtree(subtree->left);
root->left->parent = root;
}
if (subtree->right) {
root->right = node_copy_subtree(subtree->right);
root->right->parent = root;
}
return root;
}
/* Searches for and returns a pointer to the node that describes the setting
* of the bit given by idx. A node describes the setting of a bit if its
* index is within the bits described by the mask bits or the number of
* contiguous bits set after the mask. Returns NULL if there is no such node.
*/
static struct node *node_find(struct sparsebit *s, sparsebit_idx_t idx)
{
struct node *nodep;
/* Find the node that describes the setting of the bit at idx */
for (nodep = s->root; nodep;
nodep = nodep->idx > idx ? nodep->left : nodep->right) {
if (idx >= nodep->idx &&
idx <= nodep->idx + MASK_BITS + nodep->num_after - 1)
break;
}
return nodep;
}
/* Entry Requirements:
* + A node that describes the setting of idx is not already present.
*
* Adds a new node to describe the setting of the bit at the index given
* by idx. Returns a pointer to the newly added node.
*
* TODO(lhuemill): Degenerate cases causes the tree to get unbalanced.
*/
static struct node *node_add(struct sparsebit *s, sparsebit_idx_t idx)
{
struct node *nodep, *parentp, *prev;
/* Allocate and initialize the new node. */
nodep = calloc(1, sizeof(*nodep));
if (!nodep) {
perror("calloc");
abort();
}
nodep->idx = idx & -MASK_BITS;
/* If no nodes, set it up as the root node. */
if (!s->root) {
s->root = nodep;
return nodep;
}
/*
* Find the parent where the new node should be attached
* and add the node there.
*/
parentp = s->root;
while (true) {
if (idx < parentp->idx) {
if (!parentp->left) {
parentp->left = nodep;
nodep->parent = parentp;
break;
}
parentp = parentp->left;
} else {
assert(idx > parentp->idx + MASK_BITS + parentp->num_after - 1);
if (!parentp->right) {
parentp->right = nodep;
nodep->parent = parentp;
break;
}
parentp = parentp->right;
}
}
/*
* Does num_after bits of previous node overlap with the mask
* of the new node? If so set the bits in the new nodes mask
* and reduce the previous nodes num_after.
*/
prev = node_prev(s, nodep);
while (prev && prev->idx + MASK_BITS + prev->num_after - 1 >= nodep->idx) {
unsigned int n1 = (prev->idx + MASK_BITS + prev->num_after - 1)
- nodep->idx;
assert(prev->num_after > 0);
assert(n1 < MASK_BITS);
assert(!(nodep->mask & (1 << n1)));
nodep->mask |= (1 << n1);
prev->num_after--;
}
return nodep;
}
/* Returns whether all the bits in the sparsebit array are set. */
bool sparsebit_all_set(struct sparsebit *s)
{
/*
* If any nodes there must be at least one bit set. Only case
* where a bit is set and total num set is 0, is when all bits
* are set.
*/
return s->root && s->num_set == 0;
}
/* Clears all bits described by the node pointed to by nodep, then
* removes the node.
*/
static void node_rm(struct sparsebit *s, struct node *nodep)
{
struct node *tmp;
sparsebit_num_t num_set;
num_set = node_num_set(nodep);
assert(s->num_set >= num_set || sparsebit_all_set(s));
s->num_set -= node_num_set(nodep);
/* Have both left and right child */
if (nodep->left && nodep->right) {
/*
* Move left children to the leftmost leaf node
* of the right child.
*/
for (tmp = nodep->right; tmp->left; tmp = tmp->left)
;
tmp->left = nodep->left;
nodep->left = NULL;
tmp->left->parent = tmp;
}
/* Left only child */
if (nodep->left) {
if (!nodep->parent) {
s->root = nodep->left;
nodep->left->parent = NULL;
} else {
nodep->left->parent = nodep->parent;
if (nodep == nodep->parent->left)
nodep->parent->left = nodep->left;
else {
assert(nodep == nodep->parent->right);
nodep->parent->right = nodep->left;
}
}
nodep->parent = nodep->left = nodep->right = NULL;
free(nodep);
return;
}
/* Right only child */
if (nodep->right) {
if (!nodep->parent) {
s->root = nodep->right;
nodep->right->parent = NULL;
} else {
nodep->right->parent = nodep->parent;
if (nodep == nodep->parent->left)
nodep->parent->left = nodep->right;
else {
assert(nodep == nodep->parent->right);
nodep->parent->right = nodep->right;
}
}
nodep->parent = nodep->left = nodep->right = NULL;
free(nodep);
return;
}
/* Leaf Node */
if (!nodep->parent) {
s->root = NULL;
} else {
if (nodep->parent->left == nodep)
nodep->parent->left = NULL;
else {
assert(nodep == nodep->parent->right);
nodep->parent->right = NULL;
}
}
nodep->parent = nodep->left = nodep->right = NULL;
free(nodep);
return;
}
/* Splits the node containing the bit at idx so that there is a node
* that starts at the specified index. If no such node exists, a new
* node at the specified index is created. Returns the new node.
*
* idx must start of a mask boundary.
*/
static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx)
{
struct node *nodep1, *nodep2;
sparsebit_idx_t offset;
sparsebit_num_t orig_num_after;
assert(!(idx % MASK_BITS));
/*
* Is there a node that describes the setting of idx?
* If not, add it.
*/
nodep1 = node_find(s, idx);
if (!nodep1)
return node_add(s, idx);
/*
* All done if the starting index of the node is where the
* split should occur.
*/
if (nodep1->idx == idx)
return nodep1;
/*
* Split point not at start of mask, so it must be part of
* bits described by num_after.
*/
/*
* Calculate offset within num_after for where the split is
* to occur.
*/
offset = idx - (nodep1->idx + MASK_BITS);
orig_num_after = nodep1->num_after;
/*
* Add a new node to describe the bits starting at
* the split point.
*/
nodep1->num_after = offset;
nodep2 = node_add(s, idx);
/* Move bits after the split point into the new node */
nodep2->num_after = orig_num_after - offset;
if (nodep2->num_after >= MASK_BITS) {
nodep2->mask = ~(mask_t) 0;
nodep2->num_after -= MASK_BITS;
} else {
nodep2->mask = (1 << nodep2->num_after) - 1;
nodep2->num_after = 0;
}
return nodep2;
}
/* Iteratively reduces the node pointed to by nodep and its adjacent
* nodes into a more compact form. For example, a node with a mask with
* all bits set adjacent to a previous node, will get combined into a
* single node with an increased num_after setting.
*
* After each reduction, a further check is made to see if additional
* reductions are possible with the new previous and next nodes. Note,
* a search for a reduction is only done across the nodes nearest nodep
* and those that became part of a reduction. Reductions beyond nodep
* and the adjacent nodes that are reduced are not discovered. It is the
* responsibility of the caller to pass a nodep that is within one node
* of each possible reduction.
*
* This function does not fix the temporary violation of all invariants.
* For example it does not fix the case where the bit settings described
* by two or more nodes overlap. Such a violation introduces the potential
* complication of a bit setting for a specific index having different settings
* in different nodes. This would then introduce the further complication
* of which node has the correct setting of the bit and thus such conditions
* are not allowed.
*
* This function is designed to fix invariant violations that are introduced
* by node_split() and by changes to the nodes mask or num_after members.
* For example, when setting a bit within a nodes mask, the function that
* sets the bit doesn't have to worry about whether the setting of that
* bit caused the mask to have leading only or trailing only bits set.
* Instead, the function can call node_reduce(), with nodep equal to the
* node address that it set a mask bit in, and node_reduce() will notice
* the cases of leading or trailing only bits and that there is an
* adjacent node that the bit settings could be merged into.
*
* This implementation specifically detects and corrects violation of the
* following invariants:
*
* + Node are only used to represent bits that are set.
* Nodes with a mask of 0 and num_after of 0 are not allowed.
*
* + The setting of at least one bit is always described in a nodes
* mask (mask >= 1).
*
* + A node with all mask bits set only occurs when the last bit
* described by the previous node is not equal to this nodes
* starting index - 1. All such occurences of this condition are
* avoided by moving the setting of the nodes mask bits into
* the previous nodes num_after setting.
*/
static void node_reduce(struct sparsebit *s, struct node *nodep)
{
bool reduction_performed;
do {
reduction_performed = false;
struct node *prev, *next, *tmp;
/* 1) Potential reductions within the current node. */
/* Nodes with all bits cleared may be removed. */
if (nodep->mask == 0 && nodep->num_after == 0) {
/*
* About to remove the node pointed to by
* nodep, which normally would cause a problem
* for the next pass through the reduction loop,
* because the node at the starting point no longer
* exists. This potential problem is handled
* by first remembering the location of the next
* or previous nodes. Doesn't matter which, because
* once the node at nodep is removed, there will be
* no other nodes between prev and next.
*
* Note, the checks performed on nodep against both
* both prev and next both check for an adjacent
* node that can be reduced into a single node. As
* such, after removing the node at nodep, doesn't
* matter whether the nodep for the next pass
* through the loop is equal to the previous pass
* prev or next node. Either way, on the next pass
* the one not selected will become either the
* prev or next node.
*/
tmp = node_next(s, nodep);
if (!tmp)
tmp = node_prev(s, nodep);
node_rm(s, nodep);
nodep = NULL;
nodep = tmp;
reduction_performed = true;
continue;
}
/*
* When the mask is 0, can reduce the amount of num_after
* bits by moving the initial num_after bits into the mask.
*/
if (nodep->mask == 0) {
assert(nodep->num_after != 0);
assert(nodep->idx + MASK_BITS > nodep->idx);
nodep->idx += MASK_BITS;
if (nodep->num_after >= MASK_BITS) {
nodep->mask = ~0;
nodep->num_after -= MASK_BITS;
} else {
nodep->mask = (1u << nodep->num_after) - 1;
nodep->num_after = 0;
}
reduction_performed = true;
continue;
}
/*
* 2) Potential reductions between the current and
* previous nodes.
*/
prev = node_prev(s, nodep);
if (prev) {
sparsebit_idx_t prev_highest_bit;
/* Nodes with no bits set can be removed. */
if (prev->mask == 0 && prev->num_after == 0) {
node_rm(s, prev);
reduction_performed = true;
continue;
}
/*
* All mask bits set and previous node has
* adjacent index.
*/
if (nodep->mask + 1 == 0 &&
prev->idx + MASK_BITS == nodep->idx) {
prev->num_after += MASK_BITS + nodep->num_after;
nodep->mask = 0;
nodep->num_after = 0;
reduction_performed = true;
continue;
}
/*
* Is node adjacent to previous node and the node
* contains a single contiguous range of bits
* starting from the beginning of the mask?
*/
prev_highest_bit = prev->idx + MASK_BITS - 1 + prev->num_after;
if (prev_highest_bit + 1 == nodep->idx &&
(nodep->mask | (nodep->mask >> 1)) == nodep->mask) {
/*
* How many contiguous bits are there?
* Is equal to the total number of set
* bits, due to an earlier check that
* there is a single contiguous range of
* set bits.
*/
unsigned int num_contiguous
= __builtin_popcount(nodep->mask);
assert((num_contiguous > 0) &&
((1ULL << num_contiguous) - 1) == nodep->mask);
prev->num_after += num_contiguous;
nodep->mask = 0;
/*
* For predictable performance, handle special
* case where all mask bits are set and there
* is a non-zero num_after setting. This code
* is functionally correct without the following
* conditionalized statements, but without them
* the value of num_after is only reduced by
* the number of mask bits per pass. There are
* cases where num_after can be close to 2^64.
* Without this code it could take nearly
* (2^64) / 32 passes to perform the full
* reduction.
*/
if (num_contiguous == MASK_BITS) {
prev->num_after += nodep->num_after;
nodep->num_after = 0;
}
reduction_performed = true;
continue;
}
}
/*
* 3) Potential reductions between the current and
* next nodes.
*/
next = node_next(s, nodep);
if (next) {
/* Nodes with no bits set can be removed. */
if (next->mask == 0 && next->num_after == 0) {
node_rm(s, next);
reduction_performed = true;
continue;
}
/*
* Is next node index adjacent to current node
* and has a mask with all bits set?
*/
if (next->idx == nodep->idx + MASK_BITS + nodep->num_after &&
next->mask == ~(mask_t) 0) {
nodep->num_after += MASK_BITS;
next->mask = 0;
nodep->num_after += next->num_after;
next->num_after = 0;
node_rm(s, next);
next = NULL;
reduction_performed = true;
continue;
}
}
} while (nodep && reduction_performed);
}
/* Returns whether the bit at the index given by idx, within the
* sparsebit array is set or not.
*/
bool sparsebit_is_set(struct sparsebit *s, sparsebit_idx_t idx)
{
struct node *nodep;
/* Find the node that describes the setting of the bit at idx */
for (nodep = s->root; nodep;
nodep = nodep->idx > idx ? nodep->left : nodep->right)
if (idx >= nodep->idx &&
idx <= nodep->idx + MASK_BITS + nodep->num_after - 1)
goto have_node;
return false;
have_node:
/* Bit is set if it is any of the bits described by num_after */
if (nodep->num_after && idx >= nodep->idx + MASK_BITS)
return true;
/* Is the corresponding mask bit set */
assert(idx >= nodep->idx && idx - nodep->idx < MASK_BITS);
return !!(nodep->mask & (1 << (idx - nodep->idx)));
}
/* Within the sparsebit array pointed to by s, sets the bit
* at the index given by idx.
*/
static void bit_set(struct sparsebit *s, sparsebit_idx_t idx)
{
struct node *nodep;
/* Skip bits that are already set */
if (sparsebit_is_set(s, idx))
return;
/*
* Get a node where the bit at idx is described by the mask.
* The node_split will also create a node, if there isn't
* already a node that describes the setting of bit.
*/
nodep = node_split(s, idx & -MASK_BITS);
/* Set the bit within the nodes mask */
assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1);
assert(!(nodep->mask & (1 << (idx - nodep->idx))));
nodep->mask |= 1 << (idx - nodep->idx);
s->num_set++;
node_reduce(s, nodep);
}
/* Within the sparsebit array pointed to by s, clears the bit
* at the index given by idx.
*/
static void bit_clear(struct sparsebit *s, sparsebit_idx_t idx)
{
struct node *nodep;
/* Skip bits that are already cleared */
if (!sparsebit_is_set(s, idx))
return;
/* Is there a node that describes the setting of this bit? */
nodep = node_find(s, idx);
if (!nodep)
return;
/*
* If a num_after bit, split the node, so that the bit is
* part of a node mask.
*/
if (idx >= nodep->idx + MASK_BITS)
nodep = node_split(s, idx & -MASK_BITS);
/*
* After node_split above, bit at idx should be within the mask.
* Clear that bit.
*/
assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1);
assert(nodep->mask & (1 << (idx - nodep->idx)));
nodep->mask &= ~(1 << (idx - nodep->idx));
assert(s->num_set > 0 || sparsebit_all_set(s));
s->num_set--;
node_reduce(s, nodep);
}
/* Recursively dumps to the FILE stream given by stream the contents
* of the sub-tree of nodes pointed to by nodep. Each line of output
* is prefixed by the number of spaces given by indent. On each
* recursion, the indent amount is increased by 2. This causes nodes
* at each level deeper into the binary search tree to be displayed
* with a greater indent.
*/
static void dump_nodes(FILE *stream, struct node *nodep,
unsigned int indent)
{
char *node_type;
/* Dump contents of node */
if (!nodep->parent)
node_type = "root";
else if (nodep == nodep->parent->left)
node_type = "left";
else {
assert(nodep == nodep->parent->right);
node_type = "right";
}
fprintf(stream, "%*s---- %s nodep: %p\n", indent, "", node_type, nodep);
fprintf(stream, "%*s parent: %p left: %p right: %p\n", indent, "",
nodep->parent, nodep->left, nodep->right);
fprintf(stream, "%*s idx: 0x%lx mask: 0x%x num_after: 0x%lx\n",
indent, "", nodep->idx, nodep->mask, nodep->num_after);
/* If present, dump contents of left child nodes */
if (nodep->left)
dump_nodes(stream, nodep->left, indent + 2);
/* If present, dump contents of right child nodes */
if (nodep->right)
dump_nodes(stream, nodep->right, indent + 2);
}
static inline sparsebit_idx_t node_first_set(struct node *nodep, int start)
{
mask_t leading = (mask_t)1 << start;
int n1 = __builtin_ctz(nodep->mask & -leading);
return nodep->idx + n1;
}
static inline sparsebit_idx_t node_first_clear(struct node *nodep, int start)
{
mask_t leading = (mask_t)1 << start;
int n1 = __builtin_ctz(~nodep->mask & -leading);
return nodep->idx + n1;
}
/* Dumps to the FILE stream specified by stream, the implementation dependent
* internal state of s. Each line of output is prefixed with the number
* of spaces given by indent. The output is completely implementation
* dependent and subject to change. Output from this function should only
* be used for diagnostic purposes. For example, this function can be
* used by test cases after they detect an unexpected condition, as a means
* to capture diagnostic information.
*/
static void sparsebit_dump_internal(FILE *stream, struct sparsebit *s,
unsigned int indent)
{
/* Dump the contents of s */
fprintf(stream, "%*sroot: %p\n", indent, "", s->root);
fprintf(stream, "%*snum_set: 0x%lx\n", indent, "", s->num_set);
if (s->root)
dump_nodes(stream, s->root, indent);
}
/* Allocates and returns a new sparsebit array. The initial state
* of the newly allocated sparsebit array has all bits cleared.
*/
struct sparsebit *sparsebit_alloc(void)
{
struct sparsebit *s;
/* Allocate top level structure. */
s = calloc(1, sizeof(*s));
if (!s) {
perror("calloc");
abort();
}
return s;
}
/* Frees the implementation dependent data for the sparsebit array
* pointed to by s and poisons the pointer to that data.
*/
void sparsebit_free(struct sparsebit **sbitp)
{
struct sparsebit *s = *sbitp;
if (!s)
return;
sparsebit_clear_all(s);
free(s);
*sbitp = NULL;
}
/* Makes a copy of the sparsebit array given by s, to the sparsebit
* array given by d. Note, d must have already been allocated via
* sparsebit_alloc(). It can though already have bits set, which
* if different from src will be cleared.
*/
void sparsebit_copy(struct sparsebit *d, struct sparsebit *s)
{
/* First clear any bits already set in the destination */
sparsebit_clear_all(d);
if (s->root) {
d->root = node_copy_subtree(s->root);
d->num_set = s->num_set;
}
}
/* Returns whether num consecutive bits starting at idx are all set. */
bool sparsebit_is_set_num(struct sparsebit *s,
sparsebit_idx_t idx, sparsebit_num_t num)
{
sparsebit_idx_t next_cleared;
assert(num > 0);
assert(idx + num - 1 >= idx);
/* With num > 0, the first bit must be set. */
if (!sparsebit_is_set(s, idx))
return false;
/* Find the next cleared bit */
next_cleared = sparsebit_next_clear(s, idx);
/*
* If no cleared bits beyond idx, then there are at least num
* set bits. idx + num doesn't wrap. Otherwise check if
* there are enough set bits between idx and the next cleared bit.
*/
return next_cleared == 0 || next_cleared - idx >= num;
}
/* Returns whether the bit at the index given by idx. */
bool sparsebit_is_clear(struct sparsebit *s,
sparsebit_idx_t idx)
{
return !sparsebit_is_set(s, idx);
}
/* Returns whether num consecutive bits starting at idx are all cleared. */
bool sparsebit_is_clear_num(struct sparsebit *s,
sparsebit_idx_t idx, sparsebit_num_t num)
{
sparsebit_idx_t next_set;
assert(num > 0);
assert(idx + num - 1 >= idx);
/* With num > 0, the first bit must be cleared. */
if (!sparsebit_is_clear(s, idx))
return false;
/* Find the next set bit */
next_set = sparsebit_next_set(s, idx);
/*
* If no set bits beyond idx, then there are at least num
* cleared bits. idx + num doesn't wrap. Otherwise check if
* there are enough cleared bits between idx and the next set bit.
*/
return next_set == 0 || next_set - idx >= num;
}
/* Returns the total number of bits set. Note: 0 is also returned for
* the case of all bits set. This is because with all bits set, there
* is 1 additional bit set beyond what can be represented in the return
* value. Use sparsebit_any_set(), instead of sparsebit_num_set() > 0,
* to determine if the sparsebit array has any bits set.
*/
sparsebit_num_t sparsebit_num_set(struct sparsebit *s)
{
return s->num_set;
}
/* Returns whether any bit is set in the sparsebit array. */
bool sparsebit_any_set(struct sparsebit *s)
{
/*
* Nodes only describe set bits. If any nodes then there
* is at least 1 bit set.
*/
if (!s->root)
return false;
/*
* Every node should have a non-zero mask. For now will
* just assure that the root node has a non-zero mask,
* which is a quick check that at least 1 bit is set.
*/
assert(s->root->mask != 0);
assert(s->num_set > 0 ||
(s->root->num_after == ((sparsebit_num_t) 0) - MASK_BITS &&
s->root->mask == ~(mask_t) 0));
return true;
}
/* Returns whether all the bits in the sparsebit array are cleared. */
bool sparsebit_all_clear(struct sparsebit *s)
{
return !sparsebit_any_set(s);
}
/* Returns whether all the bits in the sparsebit array are set. */
bool sparsebit_any_clear(struct sparsebit *s)
{
return !sparsebit_all_set(s);
}
/* Returns the index of the first set bit. Abort if no bits are set.
*/
sparsebit_idx_t sparsebit_first_set(struct sparsebit *s)
{
struct node *nodep;
/* Validate at least 1 bit is set */
assert(sparsebit_any_set(s));
nodep = node_first(s);
return node_first_set(nodep, 0);
}
/* Returns the index of the first cleared bit. Abort if
* no bits are cleared.
*/
sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s)
{
struct node *nodep1, *nodep2;
/* Validate at least 1 bit is cleared. */
assert(sparsebit_any_clear(s));
/* If no nodes or first node index > 0 then lowest cleared is 0 */
nodep1 = node_first(s);
if (!nodep1 || nodep1->idx > 0)
return 0;
/* Does the mask in the first node contain any cleared bits. */
if (nodep1->mask != ~(mask_t) 0)
return node_first_clear(nodep1, 0);
/*
* All mask bits set in first node. If there isn't a second node
* then the first cleared bit is the first bit after the bits
* described by the first node.
*/
nodep2 = node_next(s, nodep1);
if (!nodep2) {
/*
* No second node. First cleared bit is first bit beyond
* bits described by first node.
*/
assert(nodep1->mask == ~(mask_t) 0);
assert(nodep1->idx + MASK_BITS + nodep1->num_after != (sparsebit_idx_t) 0);
return nodep1->idx + MASK_BITS + nodep1->num_after;
}
/*
* There is a second node.
* If it is not adjacent to the first node, then there is a gap
* of cleared bits between the nodes, and the first cleared bit
* is the first bit within the gap.
*/
if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx)
return nodep1->idx + MASK_BITS + nodep1->num_after;
/*
* Second node is adjacent to the first node.
* Because it is adjacent, its mask should be non-zero. If all
* its mask bits are set, then with it being adjacent, it should
* have had the mask bits moved into the num_after setting of the
* previous node.
*/
return node_first_clear(nodep2, 0);
}
/* Returns index of next bit set within s after the index given by prev.
* Returns 0 if there are no bits after prev that are set.
*/
sparsebit_idx_t sparsebit_next_set(struct sparsebit *s,
sparsebit_idx_t prev)
{
sparsebit_idx_t lowest_possible = prev + 1;
sparsebit_idx_t start;
struct node *nodep;
/* A bit after the highest index can't be set. */
if (lowest_possible == 0)
return 0;
/*
* Find the leftmost 'candidate' overlapping or to the right
* of lowest_possible.
*/
struct node *candidate = NULL;
/* True iff lowest_possible is within candidate */
bool contains = false;
/*
* Find node that describes setting of bit at lowest_possible.
* If such a node doesn't exist, find the node with the lowest
* starting index that is > lowest_possible.
*/
for (nodep = s->root; nodep;) {
if ((nodep->idx + MASK_BITS + nodep->num_after - 1)
>= lowest_possible) {
candidate = nodep;
if (candidate->idx <= lowest_possible) {
contains = true;
break;
}
nodep = nodep->left;
} else {
nodep = nodep->right;
}
}
if (!candidate)
return 0;
assert(candidate->mask != 0);
/* Does the candidate node describe the setting of lowest_possible? */
if (!contains) {
/*
* Candidate doesn't describe setting of bit at lowest_possible.
* Candidate points to the first node with a starting index
* > lowest_possible.
*/
assert(candidate->idx > lowest_possible);
return node_first_set(candidate, 0);
}
/*
* Candidate describes setting of bit at lowest_possible.
* Note: although the node describes the setting of the bit
* at lowest_possible, its possible that its setting and the
* setting of all latter bits described by this node are 0.
* For now, just handle the cases where this node describes
* a bit at or after an index of lowest_possible that is set.
*/
start = lowest_possible - candidate->idx;
if (start < MASK_BITS && candidate->mask >= (1 << start))
return node_first_set(candidate, start);
if (candidate->num_after) {
sparsebit_idx_t first_num_after_idx = candidate->idx + MASK_BITS;
return lowest_possible < first_num_after_idx
? first_num_after_idx : lowest_possible;
}
/*
* Although candidate node describes setting of bit at
* the index of lowest_possible, all bits at that index and
* latter that are described by candidate are cleared. With
* this, the next bit is the first bit in the next node, if
* such a node exists. If a next node doesn't exist, then
* there is no next set bit.
*/
candidate = node_next(s, candidate);
if (!candidate)
return 0;
return node_first_set(candidate, 0);
}
/* Returns index of next bit cleared within s after the index given by prev.
* Returns 0 if there are no bits after prev that are cleared.
*/
sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s,
sparsebit_idx_t prev)
{
sparsebit_idx_t lowest_possible = prev + 1;
sparsebit_idx_t idx;
struct node *nodep1, *nodep2;
/* A bit after the highest index can't be set. */
if (lowest_possible == 0)
return 0;
/*
* Does a node describing the setting of lowest_possible exist?
* If not, the bit at lowest_possible is cleared.
*/
nodep1 = node_find(s, lowest_possible);
if (!nodep1)
return lowest_possible;
/* Does a mask bit in node 1 describe the next cleared bit. */
for (idx = lowest_possible - nodep1->idx; idx < MASK_BITS; idx++)
if (!(nodep1->mask & (1 << idx)))
return nodep1->idx + idx;
/*
* Next cleared bit is not described by node 1. If there
* isn't a next node, then next cleared bit is described
* by bit after the bits described by the first node.
*/
nodep2 = node_next(s, nodep1);
if (!nodep2)
return nodep1->idx + MASK_BITS + nodep1->num_after;
/*
* There is a second node.
* If it is not adjacent to the first node, then there is a gap
* of cleared bits between the nodes, and the next cleared bit
* is the first bit within the gap.
*/
if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx)
return nodep1->idx + MASK_BITS + nodep1->num_after;
/*
* Second node is adjacent to the first node.
* Because it is adjacent, its mask should be non-zero. If all
* its mask bits are set, then with it being adjacent, it should
* have had the mask bits moved into the num_after setting of the
* previous node.
*/
return node_first_clear(nodep2, 0);
}
/* Starting with the index 1 greater than the index given by start, finds
* and returns the index of the first sequence of num consecutively set
* bits. Returns a value of 0 of no such sequence exists.
*/
sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s,
sparsebit_idx_t start, sparsebit_num_t num)
{
sparsebit_idx_t idx;
assert(num >= 1);
for (idx = sparsebit_next_set(s, start);
idx != 0 && idx + num - 1 >= idx;
idx = sparsebit_next_set(s, idx)) {
assert(sparsebit_is_set(s, idx));
/*
* Does the sequence of bits starting at idx consist of
* num set bits?
*/
if (sparsebit_is_set_num(s, idx, num))
return idx;
/*
* Sequence of set bits at idx isn't large enough.
* Skip this entire sequence of set bits.
*/
idx = sparsebit_next_clear(s, idx);
if (idx == 0)
return 0;
}
return 0;
}
/* Starting with the index 1 greater than the index given by start, finds
* and returns the index of the first sequence of num consecutively cleared
* bits. Returns a value of 0 of no such sequence exists.
*/
sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *s,
sparsebit_idx_t start, sparsebit_num_t num)
{
sparsebit_idx_t idx;
assert(num >= 1);
for (idx = sparsebit_next_clear(s, start);
idx != 0 && idx + num - 1 >= idx;
idx = sparsebit_next_clear(s, idx)) {
assert(sparsebit_is_clear(s, idx));
/*
* Does the sequence of bits starting at idx consist of
* num cleared bits?
*/
if (sparsebit_is_clear_num(s, idx, num))
return idx;
/*
* Sequence of cleared bits at idx isn't large enough.
* Skip this entire sequence of cleared bits.
*/
idx = sparsebit_next_set(s, idx);
if (idx == 0)
return 0;
}
return 0;
}
/* Sets the bits * in the inclusive range idx through idx + num - 1. */
void sparsebit_set_num(struct sparsebit *s,
sparsebit_idx_t start, sparsebit_num_t num)
{
struct node *nodep, *next;
unsigned int n1;
sparsebit_idx_t idx;
sparsebit_num_t n;
sparsebit_idx_t middle_start, middle_end;
assert(num > 0);
assert(start + num - 1 >= start);
/*
* Leading - bits before first mask boundary.
*
* TODO(lhuemill): With some effort it may be possible to
* replace the following loop with a sequential sequence
* of statements. High level sequence would be:
*
* 1. Use node_split() to force node that describes setting
* of idx to be within the mask portion of a node.
* 2. Form mask of bits to be set.
* 3. Determine number of mask bits already set in the node
* and store in a local variable named num_already_set.
* 4. Set the appropriate mask bits within the node.
* 5. Increment struct sparsebit_pvt num_set member
* by the number of bits that were actually set.
* Exclude from the counts bits that were already set.
* 6. Before returning to the caller, use node_reduce() to
* handle the multiple corner cases that this method
* introduces.
*/
for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--)
bit_set(s, idx);
/* Middle - bits spanning one or more entire mask */
middle_start = idx;
middle_end = middle_start + (n & -MASK_BITS) - 1;
if (n >= MASK_BITS) {
nodep = node_split(s, middle_start);
/*
* As needed, split just after end of middle bits.
* No split needed if end of middle bits is at highest
* supported bit index.
*/
if (middle_end + 1 > middle_end)
(void) node_split(s, middle_end + 1);
/* Delete nodes that only describe bits within the middle. */
for (next = node_next(s, nodep);
next && (next->idx < middle_end);
next = node_next(s, nodep)) {
assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end);
node_rm(s, next);
next = NULL;
}
/* As needed set each of the mask bits */
for (n1 = 0; n1 < MASK_BITS; n1++) {
if (!(nodep->mask & (1 << n1))) {
nodep->mask |= 1 << n1;
s->num_set++;
}
}
s->num_set -= nodep->num_after;
nodep->num_after = middle_end - middle_start + 1 - MASK_BITS;
s->num_set += nodep->num_after;
node_reduce(s, nodep);
}
idx = middle_end + 1;
n -= middle_end - middle_start + 1;
/* Trailing - bits at and beyond last mask boundary */
assert(n < MASK_BITS);
for (; n > 0; idx++, n--)
bit_set(s, idx);
}
/* Clears the bits * in the inclusive range idx through idx + num - 1. */
void sparsebit_clear_num(struct sparsebit *s,
sparsebit_idx_t start, sparsebit_num_t num)
{
struct node *nodep, *next;
unsigned int n1;
sparsebit_idx_t idx;
sparsebit_num_t n;
sparsebit_idx_t middle_start, middle_end;
assert(num > 0);
assert(start + num - 1 >= start);
/* Leading - bits before first mask boundary */
for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--)
bit_clear(s, idx);
/* Middle - bits spanning one or more entire mask */
middle_start = idx;
middle_end = middle_start + (n & -MASK_BITS) - 1;
if (n >= MASK_BITS) {
nodep = node_split(s, middle_start);
/*
* As needed, split just after end of middle bits.
* No split needed if end of middle bits is at highest
* supported bit index.
*/
if (middle_end + 1 > middle_end)
(void) node_split(s, middle_end + 1);
/* Delete nodes that only describe bits within the middle. */
for (next = node_next(s, nodep);
next && (next->idx < middle_end);
next = node_next(s, nodep)) {
assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end);
node_rm(s, next);
next = NULL;
}
/* As needed clear each of the mask bits */
for (n1 = 0; n1 < MASK_BITS; n1++) {
if (nodep->mask & (1 << n1)) {
nodep->mask &= ~(1 << n1);
s->num_set--;
}
}
/* Clear any bits described by num_after */
s->num_set -= nodep->num_after;
nodep->num_after = 0;
/*
* Delete the node that describes the beginning of
* the middle bits and perform any allowed reductions
* with the nodes prev or next of nodep.
*/
node_reduce(s, nodep);
nodep = NULL;
}
idx = middle_end + 1;
n -= middle_end - middle_start + 1;
/* Trailing - bits at and beyond last mask boundary */
assert(n < MASK_BITS);
for (; n > 0; idx++, n--)
bit_clear(s, idx);
}
/* Sets the bit at the index given by idx. */
void sparsebit_set(struct sparsebit *s, sparsebit_idx_t idx)
{
sparsebit_set_num(s, idx, 1);
}
/* Clears the bit at the index given by idx. */
void sparsebit_clear(struct sparsebit *s, sparsebit_idx_t idx)
{
sparsebit_clear_num(s, idx, 1);
}
/* Sets the bits in the entire addressable range of the sparsebit array. */
void sparsebit_set_all(struct sparsebit *s)
{
sparsebit_set(s, 0);
sparsebit_set_num(s, 1, ~(sparsebit_idx_t) 0);
assert(sparsebit_all_set(s));
}
/* Clears the bits in the entire addressable range of the sparsebit array. */
void sparsebit_clear_all(struct sparsebit *s)
{
sparsebit_clear(s, 0);
sparsebit_clear_num(s, 1, ~(sparsebit_idx_t) 0);
assert(!sparsebit_any_set(s));
}
static size_t display_range(FILE *stream, sparsebit_idx_t low,
sparsebit_idx_t high, bool prepend_comma_space)
{
char *fmt_str;
size_t sz;
/* Determine the printf format string */
if (low == high)
fmt_str = prepend_comma_space ? ", 0x%lx" : "0x%lx";
else
fmt_str = prepend_comma_space ? ", 0x%lx:0x%lx" : "0x%lx:0x%lx";
/*
* When stream is NULL, just determine the size of what would
* have been printed, else print the range.
*/
if (!stream)
sz = snprintf(NULL, 0, fmt_str, low, high);
else
sz = fprintf(stream, fmt_str, low, high);
return sz;
}
/* Dumps to the FILE stream given by stream, the bit settings
* of s. Each line of output is prefixed with the number of
* spaces given by indent. The length of each line is implementation
* dependent and does not depend on the indent amount. The following
* is an example output of a sparsebit array that has bits:
*
* 0x5, 0x8, 0xa:0xe, 0x12
*
* This corresponds to a sparsebit whose bits 5, 8, 10, 11, 12, 13, 14, 18
* are set. Note that a ':', instead of a '-' is used to specify a range of
* contiguous bits. This is done because '-' is used to specify command-line
* options, and sometimes ranges are specified as command-line arguments.
*/
void sparsebit_dump(FILE *stream, struct sparsebit *s,
unsigned int indent)
{
size_t current_line_len = 0;
size_t sz;
struct node *nodep;
if (!sparsebit_any_set(s))
return;
/* Display initial indent */
fprintf(stream, "%*s", indent, "");
/* For each node */
for (nodep = node_first(s); nodep; nodep = node_next(s, nodep)) {
unsigned int n1;
sparsebit_idx_t low, high;
/* For each group of bits in the mask */
for (n1 = 0; n1 < MASK_BITS; n1++) {
if (nodep->mask & (1 << n1)) {
low = high = nodep->idx + n1;
for (; n1 < MASK_BITS; n1++) {
if (nodep->mask & (1 << n1))
high = nodep->idx + n1;
else
break;
}
if ((n1 == MASK_BITS) && nodep->num_after)
high += nodep->num_after;
/*
* How much room will it take to display
* this range.
*/
sz = display_range(NULL, low, high,
current_line_len != 0);
/*
* If there is not enough room, display
* a newline plus the indent of the next
* line.
*/
if (current_line_len + sz > DUMP_LINE_MAX) {
fputs("\n", stream);
fprintf(stream, "%*s", indent, "");
current_line_len = 0;
}
/* Display the range */
sz = display_range(stream, low, high,
current_line_len != 0);
current_line_len += sz;
}
}
/*
* If num_after and most significant-bit of mask is not
* set, then still need to display a range for the bits
* described by num_after.
*/
if (!(nodep->mask & (1 << (MASK_BITS - 1))) && nodep->num_after) {
low = nodep->idx + MASK_BITS;
high = nodep->idx + MASK_BITS + nodep->num_after - 1;
/*
* How much room will it take to display
* this range.
*/
sz = display_range(NULL, low, high,
current_line_len != 0);
/*
* If there is not enough room, display
* a newline plus the indent of the next
* line.
*/
if (current_line_len + sz > DUMP_LINE_MAX) {
fputs("\n", stream);
fprintf(stream, "%*s", indent, "");
current_line_len = 0;
}
/* Display the range */
sz = display_range(stream, low, high,
current_line_len != 0);
current_line_len += sz;
}
}
fputs("\n", stream);
}
/* Validates the internal state of the sparsebit array given by
* s. On error, diagnostic information is printed to stderr and
* abort is called.
*/
void sparsebit_validate_internal(struct sparsebit *s)
{
bool error_detected = false;
struct node *nodep, *prev = NULL;
sparsebit_num_t total_bits_set = 0;
unsigned int n1;
/* For each node */
for (nodep = node_first(s); nodep;
prev = nodep, nodep = node_next(s, nodep)) {
/*
* Increase total bits set by the number of bits set
* in this node.
*/
for (n1 = 0; n1 < MASK_BITS; n1++)
if (nodep->mask & (1 << n1))
total_bits_set++;
total_bits_set += nodep->num_after;
/*
* Arbitrary choice as to whether a mask of 0 is allowed
* or not. For diagnostic purposes it is beneficial to
* have only one valid means to represent a set of bits.
* To support this an arbitrary choice has been made
* to not allow a mask of zero.
*/
if (nodep->mask == 0) {
fprintf(stderr, "Node mask of zero, "
"nodep: %p nodep->mask: 0x%x",
nodep, nodep->mask);
error_detected = true;
break;
}
/*
* Validate num_after is not greater than the max index
* - the number of mask bits. The num_after member
* uses 0-based indexing and thus has no value that
* represents all bits set. This limitation is handled
* by requiring a non-zero mask. With a non-zero mask,
* MASK_BITS worth of bits are described by the mask,
* which makes the largest needed num_after equal to:
*
* (~(sparsebit_num_t) 0) - MASK_BITS + 1
*/
if (nodep->num_after
> (~(sparsebit_num_t) 0) - MASK_BITS + 1) {
fprintf(stderr, "num_after too large, "
"nodep: %p nodep->num_after: 0x%lx",
nodep, nodep->num_after);
error_detected = true;
break;
}
/* Validate node index is divisible by the mask size */
if (nodep->idx % MASK_BITS) {
fprintf(stderr, "Node index not divisable by "
"mask size,\n"
" nodep: %p nodep->idx: 0x%lx "
"MASK_BITS: %lu\n",
nodep, nodep->idx, MASK_BITS);
error_detected = true;
break;
}
/*
* Validate bits described by node don't wrap beyond the
* highest supported index.
*/
if ((nodep->idx + MASK_BITS + nodep->num_after - 1) < nodep->idx) {
fprintf(stderr, "Bits described by node wrap "
"beyond highest supported index,\n"
" nodep: %p nodep->idx: 0x%lx\n"
" MASK_BITS: %lu nodep->num_after: 0x%lx",
nodep, nodep->idx, MASK_BITS, nodep->num_after);
error_detected = true;
break;
}
/* Check parent pointers. */
if (nodep->left) {
if (nodep->left->parent != nodep) {
fprintf(stderr, "Left child parent pointer "
"doesn't point to this node,\n"
" nodep: %p nodep->left: %p "
"nodep->left->parent: %p",
nodep, nodep->left,
nodep->left->parent);
error_detected = true;
break;
}
}
if (nodep->right) {
if (nodep->right->parent != nodep) {
fprintf(stderr, "Right child parent pointer "
"doesn't point to this node,\n"
" nodep: %p nodep->right: %p "
"nodep->right->parent: %p",
nodep, nodep->right,
nodep->right->parent);
error_detected = true;
break;
}
}
if (!nodep->parent) {
if (s->root != nodep) {
fprintf(stderr, "Unexpected root node, "
"s->root: %p nodep: %p",
s->root, nodep);
error_detected = true;
break;
}
}
if (prev) {
/*
* Is index of previous node before index of
* current node?
*/
if (prev->idx >= nodep->idx) {
fprintf(stderr, "Previous node index "
">= current node index,\n"
" prev: %p prev->idx: 0x%lx\n"
" nodep: %p nodep->idx: 0x%lx",
prev, prev->idx, nodep, nodep->idx);
error_detected = true;
break;
}
/*
* Nodes occur in asscending order, based on each
* nodes starting index.
*/
if ((prev->idx + MASK_BITS + prev->num_after - 1)
>= nodep->idx) {
fprintf(stderr, "Previous node bit range "
"overlap with current node bit range,\n"
" prev: %p prev->idx: 0x%lx "
"prev->num_after: 0x%lx\n"
" nodep: %p nodep->idx: 0x%lx "
"nodep->num_after: 0x%lx\n"
" MASK_BITS: %lu",
prev, prev->idx, prev->num_after,
nodep, nodep->idx, nodep->num_after,
MASK_BITS);
error_detected = true;
break;
}
/*
* When the node has all mask bits set, it shouldn't
* be adjacent to the last bit described by the
* previous node.
*/
if (nodep->mask == ~(mask_t) 0 &&
prev->idx + MASK_BITS + prev->num_after == nodep->idx) {
fprintf(stderr, "Current node has mask with "
"all bits set and is adjacent to the "
"previous node,\n"
" prev: %p prev->idx: 0x%lx "
"prev->num_after: 0x%lx\n"
" nodep: %p nodep->idx: 0x%lx "
"nodep->num_after: 0x%lx\n"
" MASK_BITS: %lu",
prev, prev->idx, prev->num_after,
nodep, nodep->idx, nodep->num_after,
MASK_BITS);
error_detected = true;
break;
}
}
}
if (!error_detected) {
/*
* Is sum of bits set in each node equal to the count
* of total bits set.
*/
if (s->num_set != total_bits_set) {
fprintf(stderr, "Number of bits set missmatch,\n"
" s->num_set: 0x%lx total_bits_set: 0x%lx",
s->num_set, total_bits_set);
error_detected = true;
}
}
if (error_detected) {
fputs(" dump_internal:\n", stderr);
sparsebit_dump_internal(stderr, s, 4);
abort();
}
}
#ifdef FUZZ
/* A simple but effective fuzzing driver. Look for bugs with the help
* of some invariants and of a trivial representation of sparsebit.
* Just use 512 bytes of /dev/zero and /dev/urandom as inputs, and let
* afl-fuzz do the magic. :)
*/
#include <stdlib.h>
#include <assert.h>
struct range {
sparsebit_idx_t first, last;
bool set;
};
struct sparsebit *s;
struct range ranges[1000];
int num_ranges;
static bool get_value(sparsebit_idx_t idx)
{
int i;
for (i = num_ranges; --i >= 0; )
if (ranges[i].first <= idx && idx <= ranges[i].last)
return ranges[i].set;
return false;
}
static void operate(int code, sparsebit_idx_t first, sparsebit_idx_t last)
{
sparsebit_num_t num;
sparsebit_idx_t next;
if (first < last) {
num = last - first + 1;
} else {
num = first - last + 1;
first = last;
last = first + num - 1;
}
switch (code) {
case 0:
sparsebit_set(s, first);
assert(sparsebit_is_set(s, first));
assert(!sparsebit_is_clear(s, first));
assert(sparsebit_any_set(s));
assert(!sparsebit_all_clear(s));
if (get_value(first))
return;
if (num_ranges == 1000)
exit(0);
ranges[num_ranges++] = (struct range)
{ .first = first, .last = first, .set = true };
break;
case 1:
sparsebit_clear(s, first);
assert(!sparsebit_is_set(s, first));
assert(sparsebit_is_clear(s, first));
assert(sparsebit_any_clear(s));
assert(!sparsebit_all_set(s));
if (!get_value(first))
return;
if (num_ranges == 1000)
exit(0);
ranges[num_ranges++] = (struct range)
{ .first = first, .last = first, .set = false };
break;
case 2:
assert(sparsebit_is_set(s, first) == get_value(first));
assert(sparsebit_is_clear(s, first) == !get_value(first));
break;
case 3:
if (sparsebit_any_set(s))
assert(get_value(sparsebit_first_set(s)));
if (sparsebit_any_clear(s))
assert(!get_value(sparsebit_first_clear(s)));
sparsebit_set_all(s);
assert(!sparsebit_any_clear(s));
assert(sparsebit_all_set(s));
num_ranges = 0;
ranges[num_ranges++] = (struct range)
{ .first = 0, .last = ~(sparsebit_idx_t)0, .set = true };
break;
case 4:
if (sparsebit_any_set(s))
assert(get_value(sparsebit_first_set(s)));
if (sparsebit_any_clear(s))
assert(!get_value(sparsebit_first_clear(s)));
sparsebit_clear_all(s);
assert(!sparsebit_any_set(s));
assert(sparsebit_all_clear(s));
num_ranges = 0;
break;
case 5:
next = sparsebit_next_set(s, first);
assert(next == 0 || next > first);
assert(next == 0 || get_value(next));
break;
case 6:
next = sparsebit_next_clear(s, first);
assert(next == 0 || next > first);
assert(next == 0 || !get_value(next));
break;
case 7:
next = sparsebit_next_clear(s, first);
if (sparsebit_is_set_num(s, first, num)) {
assert(next == 0 || next > last);
if (first)
next = sparsebit_next_set(s, first - 1);
else if (sparsebit_any_set(s))
next = sparsebit_first_set(s);
else
return;
assert(next == first);
} else {
assert(sparsebit_is_clear(s, first) || next <= last);
}
break;
case 8:
next = sparsebit_next_set(s, first);
if (sparsebit_is_clear_num(s, first, num)) {
assert(next == 0 || next > last);
if (first)
next = sparsebit_next_clear(s, first - 1);
else if (sparsebit_any_clear(s))
next = sparsebit_first_clear(s);
else
return;
assert(next == first);
} else {
assert(sparsebit_is_set(s, first) || next <= last);
}
break;
case 9:
sparsebit_set_num(s, first, num);
assert(sparsebit_is_set_num(s, first, num));
assert(!sparsebit_is_clear_num(s, first, num));
assert(sparsebit_any_set(s));
assert(!sparsebit_all_clear(s));
if (num_ranges == 1000)
exit(0);
ranges[num_ranges++] = (struct range)
{ .first = first, .last = last, .set = true };
break;
case 10:
sparsebit_clear_num(s, first, num);
assert(!sparsebit_is_set_num(s, first, num));
assert(sparsebit_is_clear_num(s, first, num));
assert(sparsebit_any_clear(s));
assert(!sparsebit_all_set(s));
if (num_ranges == 1000)
exit(0);
ranges[num_ranges++] = (struct range)
{ .first = first, .last = last, .set = false };
break;
case 11:
sparsebit_validate_internal(s);
break;
default:
break;
}
}
unsigned char get8(void)
{
int ch;
ch = getchar();
if (ch == EOF)
exit(0);
return ch;
}
uint64_t get64(void)
{
uint64_t x;
x = get8();
x = (x << 8) | get8();
x = (x << 8) | get8();
x = (x << 8) | get8();
x = (x << 8) | get8();
x = (x << 8) | get8();
x = (x << 8) | get8();
return (x << 8) | get8();
}
int main(void)
{
s = sparsebit_alloc();
for (;;) {
uint8_t op = get8() & 0xf;
uint64_t first = get64();
uint64_t last = get64();
operate(op, first, last);
}
}
#endif
/*
* tools/testing/selftests/kvm/lib/x86.c
*
* Copyright (C) 2018, Google LLC.
*
* This work is licensed under the terms of the GNU GPL, version 2.
*/
#define _GNU_SOURCE /* for program_invocation_name */
#include "test_util.h"
#include "kvm_util.h"
#include "kvm_util_internal.h"
#include "x86.h"
/* Minimum physical address used for virtual translation tables. */
#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
/* Virtual translation table structure declarations */
struct pageMapL4Entry {
uint64_t present:1;
uint64_t writable:1;
uint64_t user:1;
uint64_t write_through:1;
uint64_t cache_disable:1;
uint64_t accessed:1;
uint64_t ignored_06:1;
uint64_t page_size:1;
uint64_t ignored_11_08:4;
uint64_t address:40;
uint64_t ignored_62_52:11;
uint64_t execute_disable:1;
};
struct pageDirectoryPointerEntry {
uint64_t present:1;
uint64_t writable:1;
uint64_t user:1;
uint64_t write_through:1;
uint64_t cache_disable:1;
uint64_t accessed:1;
uint64_t ignored_06:1;
uint64_t page_size:1;
uint64_t ignored_11_08:4;
uint64_t address:40;
uint64_t ignored_62_52:11;
uint64_t execute_disable:1;
};
struct pageDirectoryEntry {
uint64_t present:1;
uint64_t writable:1;
uint64_t user:1;
uint64_t write_through:1;
uint64_t cache_disable:1;
uint64_t accessed:1;
uint64_t ignored_06:1;
uint64_t page_size:1;
uint64_t ignored_11_08:4;
uint64_t address:40;
uint64_t ignored_62_52:11;
uint64_t execute_disable:1;
};
struct pageTableEntry {
uint64_t present:1;
uint64_t writable:1;
uint64_t user:1;
uint64_t write_through:1;
uint64_t cache_disable:1;
uint64_t accessed:1;
uint64_t dirty:1;
uint64_t reserved_07:1;
uint64_t global:1;
uint64_t ignored_11_09:3;
uint64_t address:40;
uint64_t ignored_62_52:11;
uint64_t execute_disable:1;
};
/* Register Dump
*
* Input Args:
* indent - Left margin indent amount
* regs - register
*
* Output Args:
* stream - Output FILE stream
*
* Return: None
*
* Dumps the state of the registers given by regs, to the FILE stream
* given by steam.
*/
void regs_dump(FILE *stream, struct kvm_regs *regs,
uint8_t indent)
{
fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx "
"rcx: 0x%.16llx rdx: 0x%.16llx\n",
indent, "",
regs->rax, regs->rbx, regs->rcx, regs->rdx);
fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx "
"rsp: 0x%.16llx rbp: 0x%.16llx\n",
indent, "",
regs->rsi, regs->rdi, regs->rsp, regs->rbp);
fprintf(stream, "%*sr8: 0x%.16llx r9: 0x%.16llx "
"r10: 0x%.16llx r11: 0x%.16llx\n",
indent, "",
regs->r8, regs->r9, regs->r10, regs->r11);
fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx "
"r14: 0x%.16llx r15: 0x%.16llx\n",
indent, "",
regs->r12, regs->r13, regs->r14, regs->r15);
fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n",
indent, "",
regs->rip, regs->rflags);
}
/* Segment Dump
*
* Input Args:
* indent - Left margin indent amount
* segment - KVM segment
*
* Output Args:
* stream - Output FILE stream
*
* Return: None
*
* Dumps the state of the KVM segment given by segment, to the FILE stream
* given by steam.
*/
static void segment_dump(FILE *stream, struct kvm_segment *segment,
uint8_t indent)
{
fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x "
"selector: 0x%.4x type: 0x%.2x\n",
indent, "", segment->base, segment->limit,
segment->selector, segment->type);
fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x "
"db: 0x%.2x s: 0x%.2x l: 0x%.2x\n",
indent, "", segment->present, segment->dpl,
segment->db, segment->s, segment->l);
fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x "
"unusable: 0x%.2x padding: 0x%.2x\n",
indent, "", segment->g, segment->avl,
segment->unusable, segment->padding);
}
/* dtable Dump
*
* Input Args:
* indent - Left margin indent amount
* dtable - KVM dtable
*
* Output Args:
* stream - Output FILE stream
*
* Return: None
*
* Dumps the state of the KVM dtable given by dtable, to the FILE stream
* given by steam.
*/
static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
uint8_t indent)
{
fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x "
"padding: 0x%.4x 0x%.4x 0x%.4x\n",
indent, "", dtable->base, dtable->limit,
dtable->padding[0], dtable->padding[1], dtable->padding[2]);
}
/* System Register Dump
*
* Input Args:
* indent - Left margin indent amount
* sregs - System registers
*
* Output Args:
* stream - Output FILE stream
*
* Return: None
*
* Dumps the state of the system registers given by sregs, to the FILE stream
* given by steam.
*/
void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
uint8_t indent)
{
unsigned int i;
fprintf(stream, "%*scs:\n", indent, "");
segment_dump(stream, &sregs->cs, indent + 2);
fprintf(stream, "%*sds:\n", indent, "");
segment_dump(stream, &sregs->ds, indent + 2);
fprintf(stream, "%*ses:\n", indent, "");
segment_dump(stream, &sregs->es, indent + 2);
fprintf(stream, "%*sfs:\n", indent, "");
segment_dump(stream, &sregs->fs, indent + 2);
fprintf(stream, "%*sgs:\n", indent, "");
segment_dump(stream, &sregs->gs, indent + 2);
fprintf(stream, "%*sss:\n", indent, "");
segment_dump(stream, &sregs->ss, indent + 2);
fprintf(stream, "%*str:\n", indent, "");
segment_dump(stream, &sregs->tr, indent + 2);
fprintf(stream, "%*sldt:\n", indent, "");
segment_dump(stream, &sregs->ldt, indent + 2);
fprintf(stream, "%*sgdt:\n", indent, "");
dtable_dump(stream, &sregs->gdt, indent + 2);
fprintf(stream, "%*sidt:\n", indent, "");
dtable_dump(stream, &sregs->idt, indent + 2);
fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx "
"cr3: 0x%.16llx cr4: 0x%.16llx\n",
indent, "",
sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4);
fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx "
"apic_base: 0x%.16llx\n",
indent, "",
sregs->cr8, sregs->efer, sregs->apic_base);
fprintf(stream, "%*sinterrupt_bitmap:\n", indent, "");
for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) {
fprintf(stream, "%*s%.16llx\n", indent + 2, "",
sregs->interrupt_bitmap[i]);
}
}
void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
{
int rc;
TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
/* If needed, create page map l4 table. */
if (!vm->pgd_created) {
vm_paddr_t paddr = vm_phy_page_alloc(vm,
KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
vm->pgd = paddr;
/* Set pointer to pgd tables in all the VCPUs that
* have already been created. Future VCPUs will have
* the value set as each one is created.
*/
for (struct vcpu *vcpu = vm->vcpu_head; vcpu;
vcpu = vcpu->next) {
struct kvm_sregs sregs;
/* Obtain the current system register settings */
vcpu_sregs_get(vm, vcpu->id, &sregs);
/* Set and store the pointer to the start of the
* pgd tables.
*/
sregs.cr3 = vm->pgd;
vcpu_sregs_set(vm, vcpu->id, &sregs);
}
vm->pgd_created = true;
}
}
/* VM Virtual Page Map
*
* Input Args:
* vm - Virtual Machine
* vaddr - VM Virtual Address
* paddr - VM Physical Address
* pgd_memslot - Memory region slot for new virtual translation tables
*
* Output Args: None
*
* Return: None
*
* Within the VM given by vm, creates a virtual translation for the page
* starting at vaddr to the page starting at paddr.
*/
void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
uint32_t pgd_memslot)
{
uint16_t index[4];
struct pageMapL4Entry *pml4e;
TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
TEST_ASSERT((vaddr % vm->page_size) == 0,
"Virtual address not on page boundary,\n"
" vaddr: 0x%lx vm->page_size: 0x%x",
vaddr, vm->page_size);
TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
(vaddr >> vm->page_shift)),
"Invalid virtual address, vaddr: 0x%lx",
vaddr);
TEST_ASSERT((paddr % vm->page_size) == 0,
"Physical address not on page boundary,\n"
" paddr: 0x%lx vm->page_size: 0x%x",
paddr, vm->page_size);
TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
"Physical address beyond beyond maximum supported,\n"
" paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
paddr, vm->max_gfn, vm->page_size);
index[0] = (vaddr >> 12) & 0x1ffu;
index[1] = (vaddr >> 21) & 0x1ffu;
index[2] = (vaddr >> 30) & 0x1ffu;
index[3] = (vaddr >> 39) & 0x1ffu;
/* Allocate page directory pointer table if not present. */
pml4e = addr_gpa2hva(vm, vm->pgd);
if (!pml4e[index[3]].present) {
pml4e[index[3]].address = vm_phy_page_alloc(vm,
KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
>> vm->page_shift;
pml4e[index[3]].writable = true;
pml4e[index[3]].present = true;
}
/* Allocate page directory table if not present. */
struct pageDirectoryPointerEntry *pdpe;
pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
if (!pdpe[index[2]].present) {
pdpe[index[2]].address = vm_phy_page_alloc(vm,
KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
>> vm->page_shift;
pdpe[index[2]].writable = true;
pdpe[index[2]].present = true;
}
/* Allocate page table if not present. */
struct pageDirectoryEntry *pde;
pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
if (!pde[index[1]].present) {
pde[index[1]].address = vm_phy_page_alloc(vm,
KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
>> vm->page_shift;
pde[index[1]].writable = true;
pde[index[1]].present = true;
}
/* Fill in page table entry. */
struct pageTableEntry *pte;
pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
pte[index[0]].address = paddr >> vm->page_shift;
pte[index[0]].writable = true;
pte[index[0]].present = 1;
}
/* Virtual Translation Tables Dump
*
* Input Args:
* vm - Virtual Machine
* indent - Left margin indent amount
*
* Output Args:
* stream - Output FILE stream
*
* Return: None
*
* Dumps to the FILE stream given by stream, the contents of all the
* virtual translation tables for the VM given by vm.
*/
void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
struct pageMapL4Entry *pml4e, *pml4e_start;
struct pageDirectoryPointerEntry *pdpe, *pdpe_start;
struct pageDirectoryEntry *pde, *pde_start;
struct pageTableEntry *pte, *pte_start;
if (!vm->pgd_created)
return;
fprintf(stream, "%*s "
" no\n", indent, "");
fprintf(stream, "%*s index hvaddr gpaddr "
"addr w exec dirty\n",
indent, "");
pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm,
vm->pgd);
for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
pml4e = &pml4e_start[n1];
if (!pml4e->present)
continue;
fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u "
" %u\n",
indent, "",
pml4e - pml4e_start, pml4e,
addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address,
pml4e->writable, pml4e->execute_disable);
pdpe_start = addr_gpa2hva(vm, pml4e->address
* vm->page_size);
for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
pdpe = &pdpe_start[n2];
if (!pdpe->present)
continue;
fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx "
"%u %u\n",
indent, "",
pdpe - pdpe_start, pdpe,
addr_hva2gpa(vm, pdpe),
(uint64_t) pdpe->address, pdpe->writable,
pdpe->execute_disable);
pde_start = addr_gpa2hva(vm,
pdpe->address * vm->page_size);
for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
pde = &pde_start[n3];
if (!pde->present)
continue;
fprintf(stream, "%*spde 0x%-3zx %p "
"0x%-12lx 0x%-10lx %u %u\n",
indent, "", pde - pde_start, pde,
addr_hva2gpa(vm, pde),
(uint64_t) pde->address, pde->writable,
pde->execute_disable);
pte_start = addr_gpa2hva(vm,
pde->address * vm->page_size);
for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
pte = &pte_start[n4];
if (!pte->present)
continue;
fprintf(stream, "%*spte 0x%-3zx %p "
"0x%-12lx 0x%-10lx %u %u "
" %u 0x%-10lx\n",
indent, "",
pte - pte_start, pte,
addr_hva2gpa(vm, pte),
(uint64_t) pte->address,
pte->writable,
pte->execute_disable,
pte->dirty,
((uint64_t) n1 << 27)
| ((uint64_t) n2 << 18)
| ((uint64_t) n3 << 9)
| ((uint64_t) n4));
}
}
}
}
}
/* Set Unusable Segment
*
* Input Args: None
*
* Output Args:
* segp - Pointer to segment register
*
* Return: None
*
* Sets the segment register pointed to by segp to an unusable state.
*/
static void kvm_seg_set_unusable(struct kvm_segment *segp)
{
memset(segp, 0, sizeof(*segp));
segp->unusable = true;
}
/* Set Long Mode Flat Kernel Code Segment
*
* Input Args:
* selector - selector value
*
* Output Args:
* segp - Pointer to KVM segment
*
* Return: None
*
* Sets up the KVM segment pointed to by segp, to be a code segment
* with the selector value given by selector.
*/
static void kvm_seg_set_kernel_code_64bit(uint16_t selector,
struct kvm_segment *segp)
{
memset(segp, 0, sizeof(*segp));
segp->selector = selector;
segp->limit = 0xFFFFFFFFu;
segp->s = 0x1; /* kTypeCodeData */
segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed
* | kFlagCodeReadable
*/
segp->g = true;
segp->l = true;
segp->present = 1;
}
/* Set Long Mode Flat Kernel Data Segment
*
* Input Args:
* selector - selector value
*
* Output Args:
* segp - Pointer to KVM segment
*
* Return: None
*
* Sets up the KVM segment pointed to by segp, to be a data segment
* with the selector value given by selector.
*/
static void kvm_seg_set_kernel_data_64bit(uint16_t selector,
struct kvm_segment *segp)
{
memset(segp, 0, sizeof(*segp));
segp->selector = selector;
segp->limit = 0xFFFFFFFFu;
segp->s = 0x1; /* kTypeCodeData */
segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed
* | kFlagDataWritable
*/
segp->g = true;
segp->present = true;
}
/* Address Guest Virtual to Guest Physical
*
* Input Args:
* vm - Virtual Machine
* gpa - VM virtual address
*
* Output Args: None
*
* Return:
* Equivalent VM physical address
*
* Translates the VM virtual address given by gva to a VM physical
* address and then locates the memory region containing the VM
* physical address, within the VM given by vm. When found, the host
* virtual address providing the memory to the vm physical address is returned.
* A TEST_ASSERT failure occurs if no region containing translated
* VM virtual address exists.
*/
vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
{
uint16_t index[4];
struct pageMapL4Entry *pml4e;
struct pageDirectoryPointerEntry *pdpe;
struct pageDirectoryEntry *pde;
struct pageTableEntry *pte;
void *hva;
TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
index[0] = (gva >> 12) & 0x1ffu;
index[1] = (gva >> 21) & 0x1ffu;
index[2] = (gva >> 30) & 0x1ffu;
index[3] = (gva >> 39) & 0x1ffu;
if (!vm->pgd_created)
goto unmapped_gva;
pml4e = addr_gpa2hva(vm, vm->pgd);
if (!pml4e[index[3]].present)
goto unmapped_gva;
pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
if (!pdpe[index[2]].present)
goto unmapped_gva;
pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
if (!pde[index[1]].present)
goto unmapped_gva;
pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
if (!pte[index[0]].present)
goto unmapped_gva;
return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu);
unmapped_gva:
TEST_ASSERT(false, "No mapping for vm virtual address, "
"gva: 0x%lx", gva);
}
void vcpu_setup(struct kvm_vm *vm, int vcpuid)
{
struct kvm_sregs sregs;
/* Set mode specific system register values. */
vcpu_sregs_get(vm, vcpuid, &sregs);
switch (vm->mode) {
case VM_MODE_FLAT48PG:
sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
sregs.cr4 |= X86_CR4_PAE;
sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
kvm_seg_set_unusable(&sregs.ldt);
kvm_seg_set_kernel_code_64bit(0x8, &sregs.cs);
kvm_seg_set_kernel_data_64bit(0x10, &sregs.ds);
kvm_seg_set_kernel_data_64bit(0x10, &sregs.es);
break;
default:
TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode);
}
vcpu_sregs_set(vm, vcpuid, &sregs);
/* If virtual translation table have been setup, set system register
* to point to the tables. It's okay if they haven't been setup yet,
* in that the code that sets up the virtual translation tables, will
* go back through any VCPUs that have already been created and set
* their values.
*/
if (vm->pgd_created) {
struct kvm_sregs sregs;
vcpu_sregs_get(vm, vcpuid, &sregs);
sregs.cr3 = vm->pgd;
vcpu_sregs_set(vm, vcpuid, &sregs);
}
}
/* Adds a vCPU with reasonable defaults (i.e., a stack)
*
* Input Args:
* vcpuid - The id of the VCPU to add to the VM.
* guest_code - The vCPU's entry point
*/
void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
{
struct kvm_mp_state mp_state;
struct kvm_regs regs;
vm_vaddr_t stack_vaddr;
stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
/* Create VCPU */
vm_vcpu_add(vm, vcpuid);
/* Setup guest general purpose registers */
vcpu_regs_get(vm, vcpuid, &regs);
regs.rflags = regs.rflags | 0x2;
regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize());
regs.rip = (unsigned long) guest_code;
vcpu_regs_set(vm, vcpuid, &regs);
/* Setup the MP state */
mp_state.mp_state = 0;
vcpu_set_mp_state(vm, vcpuid, &mp_state);
}
/* VM VCPU CPUID Set
*
* Input Args:
* vm - Virtual Machine
* vcpuid - VCPU id
* cpuid - The CPUID values to set.
*
* Output Args: None
*
* Return: void
*
* Set the VCPU's CPUID.
*/
void vcpu_set_cpuid(struct kvm_vm *vm,
uint32_t vcpuid, struct kvm_cpuid2 *cpuid)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int rc;
TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid);
TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i",
rc, errno);
}
/* Create a VM with reasonable defaults
*
* Input Args:
* vcpuid - The id of the single VCPU to add to the VM.
* guest_code - The vCPU's entry point
*
* Output Args: None
*
* Return:
* Pointer to opaque structure that describes the created VM.
*/
struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code)
{
struct kvm_vm *vm;
/* Create VM */
vm = vm_create(VM_MODE_FLAT48PG, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
/* Setup IRQ Chip */
vm_create_irqchip(vm);
/* Add the first vCPU. */
vm_vcpu_add_default(vm, vcpuid, guest_code);
return vm;
}
/*
* KVM_SET_SREGS tests
*
* Copyright (C) 2018, Google LLC.
*
* This work is licensed under the terms of the GNU GPL, version 2.
*
* This is a regression test for the bug fixed by the following commit:
* d3802286fa0f ("kvm: x86: Disallow illegal IA32_APIC_BASE MSR values")
*
* That bug allowed a user-mode program that called the KVM_SET_SREGS
* ioctl to put a VCPU's local APIC into an invalid state.
*
*/
#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include "test_util.h"
#include "kvm_util.h"
#include "x86.h"
#define VCPU_ID 5
int main(int argc, char *argv[])
{
struct kvm_sregs sregs;
struct kvm_vm *vm;
int rc;
/* Tell stdout not to buffer its content */
setbuf(stdout, NULL);
/* Create VM */
vm = vm_create_default(VCPU_ID, NULL);
vcpu_sregs_get(vm, VCPU_ID, &sregs);
sregs.apic_base = 1 << 10;
rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)",
sregs.apic_base);
sregs.apic_base = 1 << 11;
rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)",
sregs.apic_base);
kvm_vm_free(vm);
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment