Commit 69def9f0 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'kvm-updates/2.6.32' of git://git.kernel.org/pub/scm/virt/kvm/kvm

* 'kvm-updates/2.6.32' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (202 commits)
  MAINTAINERS: update KVM entry
  KVM: correct error-handling code
  KVM: fix compile warnings on s390
  KVM: VMX: Check cpl before emulating debug register access
  KVM: fix misreporting of coalesced interrupts by kvm tracer
  KVM: x86: drop duplicate kvm_flush_remote_tlb calls
  KVM: VMX: call vmx_load_host_state() only if msr is cached
  KVM: VMX: Conditionally reload debug register 6
  KVM: Use thread debug register storage instead of kvm specific data
  KVM guest: do not batch pte updates from interrupt context
  KVM: Fix coalesced interrupt reporting in IOAPIC
  KVM guest: fix bogus wallclock physical address calculation
  KVM: VMX: Fix cr8 exiting control clobbering by EPT
  KVM: Optimize kvm_mmu_unprotect_page_virt() for tdp
  KVM: Document KVM_CAP_IRQCHIP
  KVM: Protect update_cr8_intercept() when running without an apic
  KVM: VMX: Fix EPT with WP bit change during paging
  KVM: Use kvm_{read,write}_guest_virt() to read and write segment descriptors
  KVM: x86 emulator: Add adc and sbb missing decoder flags
  KVM: Add missing #include
  ...
parents 353f6dd2 8e616fc8
......@@ -193,7 +193,7 @@ Code Seq# Include File Comments
0xAD 00 Netfilter device in development:
<mailto:rusty@rustcorp.com.au>
0xAE all linux/kvm.h Kernel-based Virtual Machine
<mailto:kvm-devel@lists.sourceforge.net>
<mailto:kvm@vger.kernel.org>
0xB0 all RATIO devices in development:
<mailto:vgo@ratio.de>
0xB1 00-1F PPPoX <mailto:mostrows@styx.uwaterloo.ca>
......
......@@ -57,6 +57,7 @@ parameter is applicable:
ISAPNP ISA PnP code is enabled.
ISDN Appropriate ISDN support is enabled.
JOY Appropriate joystick support is enabled.
KVM Kernel Virtual Machine support is enabled.
LIBATA Libata driver is enabled
LP Printer support is enabled.
LOOP Loopback device support is enabled.
......@@ -1098,6 +1099,44 @@ and is between 256 and 4096 characters. It is defined in the file
kstack=N [X86] Print N words from the kernel stack
in oops dumps.
kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs.
Default is 0 (don't ignore, but inject #GP)
kvm.oos_shadow= [KVM] Disable out-of-sync shadow paging.
Default is 1 (enabled)
kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM.
Default is 0 (off)
kvm-amd.npt= [KVM,AMD] Disable nested paging (virtualized MMU)
for all guests.
Default is 1 (enabled) if in 64bit or 32bit-PAE mode
kvm-intel.bypass_guest_pf=
[KVM,Intel] Disables bypassing of guest page faults
on Intel chips. Default is 1 (enabled)
kvm-intel.ept= [KVM,Intel] Disable extended page tables
(virtualized MMU) support on capable Intel chips.
Default is 1 (enabled)
kvm-intel.emulate_invalid_guest_state=
[KVM,Intel] Enable emulation of invalid guest states
Default is 0 (disabled)
kvm-intel.flexpriority=
[KVM,Intel] Disable FlexPriority feature (TPR shadow).
Default is 1 (enabled)
kvm-intel.unrestricted_guest=
[KVM,Intel] Disable unrestricted guest feature
(virtualized real and unpaged mode) on capable
Intel chips. Default is 1 (enabled)
kvm-intel.vpid= [KVM,Intel] Disable Virtual Processor Identification
feature (tagged TLBs) on capable Intel chips.
Default is 1 (enabled)
l2cr= [PPC]
l3cr= [PPC]
......
This diff is collapsed.
......@@ -2926,6 +2926,7 @@ F: include/linux/sunrpc/
KERNEL VIRTUAL MACHINE (KVM)
M: Avi Kivity <avi@redhat.com>
M: Marcelo Tosatti <mtosatti@redhat.com>
L: kvm@vger.kernel.org
W: http://kvm.qumranet.com
S: Supported
......
......@@ -235,7 +235,8 @@ struct kvm_vm_data {
#define KVM_REQ_PTC_G 32
#define KVM_REQ_RESUME 33
#define KVM_PAGES_PER_HPAGE 1
#define KVM_NR_PAGE_SIZES 1
#define KVM_PAGES_PER_HPAGE(x) 1
struct kvm;
struct kvm_vcpu;
......@@ -465,7 +466,6 @@ struct kvm_arch {
unsigned long metaphysical_rr4;
unsigned long vmm_init_rr;
int online_vcpus;
int is_sn2;
struct kvm_ioapic *vioapic;
......
......@@ -19,9 +19,13 @@
*
*/
#ifdef __KERNEL__
static inline unsigned int kvm_arch_para_features(void)
{
return 0;
}
#endif
#endif
#
# KVM configuration
#
config HAVE_KVM
bool
config HAVE_KVM_IRQCHIP
bool
default y
source "virt/kvm/Kconfig"
menuconfig VIRTUALIZATION
bool "Virtualization"
......@@ -28,6 +24,8 @@ config KVM
depends on PCI
select PREEMPT_NOTIFIERS
select ANON_INODES
select HAVE_KVM_IRQCHIP
select KVM_APIC_ARCHITECTURE
---help---
Support hosting fully virtualized guest machines using hardware
virtualization extensions. You will need a fairly recent
......@@ -49,9 +47,6 @@ config KVM_INTEL
Provides support for KVM on Itanium 2 processors equipped with the VT
extensions.
config KVM_TRACE
bool
source drivers/virtio/Kconfig
endif # VIRTUALIZATION
......@@ -210,16 +210,6 @@ int kvm_dev_ioctl_check_extension(long ext)
}
static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
gpa_t addr, int len, int is_write)
{
struct kvm_io_device *dev;
dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len, is_write);
return dev;
}
static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
......@@ -231,6 +221,7 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
struct kvm_mmio_req *p;
struct kvm_io_device *mmio_dev;
int r;
p = kvm_get_vcpu_ioreq(vcpu);
......@@ -247,16 +238,13 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_run->exit_reason = KVM_EXIT_MMIO;
return 0;
mmio:
mmio_dev = vcpu_find_mmio_dev(vcpu, p->addr, p->size, !p->dir);
if (mmio_dev) {
if (!p->dir)
kvm_iodevice_write(mmio_dev, p->addr, p->size,
&p->data);
if (p->dir)
r = kvm_io_bus_read(&vcpu->kvm->mmio_bus, p->addr,
p->size, &p->data);
else
kvm_iodevice_read(mmio_dev, p->addr, p->size,
&p->data);
} else
r = kvm_io_bus_write(&vcpu->kvm->mmio_bus, p->addr,
p->size, &p->data);
if (r)
printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
p->state = STATE_IORESP_READY;
......@@ -337,13 +325,12 @@ static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
{
union ia64_lid lid;
int i;
struct kvm_vcpu *vcpu;
for (i = 0; i < kvm->arch.online_vcpus; i++) {
if (kvm->vcpus[i]) {
lid.val = VCPU_LID(kvm->vcpus[i]);
kvm_for_each_vcpu(i, vcpu, kvm) {
lid.val = VCPU_LID(vcpu);
if (lid.id == id && lid.eid == eid)
return kvm->vcpus[i];
}
return vcpu;
}
return NULL;
......@@ -409,21 +396,21 @@ static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
struct kvm *kvm = vcpu->kvm;
struct call_data call_data;
int i;
struct kvm_vcpu *vcpui;
call_data.ptc_g_data = p->u.ptc_g_data;
for (i = 0; i < kvm->arch.online_vcpus; i++) {
if (!kvm->vcpus[i] || kvm->vcpus[i]->arch.mp_state ==
KVM_MP_STATE_UNINITIALIZED ||
vcpu == kvm->vcpus[i])
kvm_for_each_vcpu(i, vcpui, kvm) {
if (vcpui->arch.mp_state == KVM_MP_STATE_UNINITIALIZED ||
vcpu == vcpui)
continue;
if (waitqueue_active(&kvm->vcpus[i]->wq))
wake_up_interruptible(&kvm->vcpus[i]->wq);
if (waitqueue_active(&vcpui->wq))
wake_up_interruptible(&vcpui->wq);
if (kvm->vcpus[i]->cpu != -1) {
call_data.vcpu = kvm->vcpus[i];
smp_call_function_single(kvm->vcpus[i]->cpu,
if (vcpui->cpu != -1) {
call_data.vcpu = vcpui;
smp_call_function_single(vcpui->cpu,
vcpu_global_purge, &call_data, 1);
} else
printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n");
......@@ -852,8 +839,6 @@ struct kvm *kvm_arch_create_vm(void)
kvm_init_vm(kvm);
kvm->arch.online_vcpus = 0;
return kvm;
}
......@@ -1000,10 +985,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
goto out;
if (irqchip_in_kernel(kvm)) {
__s32 status;
mutex_lock(&kvm->lock);
mutex_lock(&kvm->irq_lock);
status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
irq_event.irq, irq_event.level);
mutex_unlock(&kvm->lock);
mutex_unlock(&kvm->irq_lock);
if (ioctl == KVM_IRQ_LINE_STATUS) {
irq_event.status = status;
if (copy_to_user(argp, &irq_event,
......@@ -1216,7 +1201,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
if (IS_ERR(vmm_vcpu))
return PTR_ERR(vmm_vcpu);
if (vcpu->vcpu_id == 0) {
if (kvm_vcpu_is_bsp(vcpu)) {
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
/*Set entry address for first run.*/
......@@ -1224,7 +1209,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
/*Initialize itc offset for vcpus*/
itc_offset = 0UL - kvm_get_itc(vcpu);
for (i = 0; i < kvm->arch.online_vcpus; i++) {
for (i = 0; i < KVM_MAX_VCPUS; i++) {
v = (struct kvm_vcpu *)((char *)vcpu +
sizeof(struct kvm_vcpu_data) * i);
v->arch.itc_offset = itc_offset;
......@@ -1356,8 +1341,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
goto fail;
}
kvm->arch.online_vcpus++;
return vcpu;
fail:
return ERR_PTR(r);
......@@ -1952,19 +1935,6 @@ int kvm_highest_pending_irq(struct kvm_vcpu *vcpu)
return find_highest_bits((int *)&vpd->irr[0]);
}
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
{
if (kvm_highest_pending_irq(vcpu) != -1)
return 1;
return 0;
}
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
{
/* do real check here */
return 1;
}
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
{
return vcpu->arch.timer_fired;
......@@ -1977,7 +1947,8 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE;
return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) ||
(kvm_highest_pending_irq(vcpu) != -1);
}
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
......
......@@ -830,8 +830,8 @@ static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
kvm = (struct kvm *)KVM_VM_BASE;
if (vcpu->vcpu_id == 0) {
for (i = 0; i < kvm->arch.online_vcpus; i++) {
if (kvm_vcpu_is_bsp(vcpu)) {
for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) {
v = (struct kvm_vcpu *)((char *)vcpu +
sizeof(struct kvm_vcpu_data) * i);
VMX(v, itc_offset) = itc_offset;
......
......@@ -34,7 +34,8 @@
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
/* We don't currently support large pages. */
#define KVM_PAGES_PER_HPAGE (1UL << 31)
#define KVM_NR_PAGE_SIZES 1
#define KVM_PAGES_PER_HPAGE(x) (1UL<<31)
struct kvm;
struct kvm_run;
......@@ -153,7 +154,6 @@ struct kvm_vcpu_arch {
u32 pid;
u32 swap_pid;
u32 pvr;
u32 ccr0;
u32 ccr1;
u32 dbcr0;
......
......@@ -138,7 +138,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
}
static int kvmppc_44x_init(void)
static int __init kvmppc_44x_init(void)
{
int r;
......@@ -149,7 +149,7 @@ static int kvmppc_44x_init(void)
return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), THIS_MODULE);
}
static void kvmppc_44x_exit(void)
static void __exit kvmppc_44x_exit(void)
{
kvmppc_booke_exit();
}
......
......@@ -30,6 +30,7 @@
#include "timing.h"
#include "44x_tlb.h"
#include "trace.h"
#ifndef PPC44x_TLBE_SIZE
#define PPC44x_TLBE_SIZE PPC44x_TLB_4K
......@@ -263,7 +264,7 @@ static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
/* XXX set tlb_44x_index to stlb_index? */
KVMTRACE_1D(STLB_INVAL, &vcpu_44x->vcpu, stlb_index, handler);
trace_kvm_stlb_inval(stlb_index);
}
void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
......@@ -365,8 +366,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
/* Insert shadow mapping into hardware TLB. */
kvmppc_44x_tlbe_set_modified(vcpu_44x, victim);
kvmppc_44x_tlbwe(victim, &stlbe);
KVMTRACE_5D(STLB_WRITE, vcpu, victim, stlbe.tid, stlbe.word0, stlbe.word1,
stlbe.word2, handler);
trace_kvm_stlb_write(victim, stlbe.tid, stlbe.word0, stlbe.word1,
stlbe.word2);
}
/* For a particular guest TLB entry, invalidate the corresponding host TLB
......@@ -485,8 +486,8 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index);
}
KVMTRACE_5D(GTLB_WRITE, vcpu, gtlb_index, tlbe->tid, tlbe->word0,
tlbe->word1, tlbe->word2, handler);
trace_kvm_gtlb_write(gtlb_index, tlbe->tid, tlbe->word0, tlbe->word1,
tlbe->word2);
kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
return EMULATE_DONE;
......
......@@ -2,8 +2,7 @@
# KVM configuration
#
config HAVE_KVM_IRQCHIP
bool
source "virt/kvm/Kconfig"
menuconfig VIRTUALIZATION
bool "Virtualization"
......@@ -59,17 +58,6 @@ config KVM_E500
If unsure, say N.
config KVM_TRACE
bool "KVM trace support"
depends on KVM && MARKERS && SYSFS
select RELAY
select DEBUG_FS
default n
---help---
This option allows reading a trace of kvm-related events through
relayfs. Note the ABI is not considered stable and will be
modified in future updates.
source drivers/virtio/Kconfig
endif # VIRTUALIZATION
......@@ -8,7 +8,9 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/powerpc/kvm
common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
common-objs-$(CONFIG_KVM_TRACE) += $(addprefix ../../../virt/kvm/, kvm_trace.o)
CFLAGS_44x_tlb.o := -I.
CFLAGS_e500_tlb.o := -I.
CFLAGS_emulate.o := -I.
kvm-objs := $(common-objs-y) powerpc.o emulate.o
obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
......
......@@ -520,7 +520,7 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
return kvmppc_core_vcpu_translate(vcpu, tr);
}
int kvmppc_booke_init(void)
int __init kvmppc_booke_init(void)
{
unsigned long ivor[16];
unsigned long max_ivor = 0;
......
......@@ -60,9 +60,6 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
kvmppc_e500_tlb_setup(vcpu_e500);
/* Use the same core vertion as host's */
vcpu->arch.pvr = mfspr(SPRN_PVR);
return 0;
}
......@@ -132,7 +129,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
}
static int kvmppc_e500_init(void)
static int __init kvmppc_e500_init(void)
{
int r, i;
unsigned long ivor[3];
......@@ -160,7 +157,7 @@ static int kvmppc_e500_init(void)
return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), THIS_MODULE);
}
static void kvmppc_e500_exit(void)
static void __init kvmppc_e500_exit(void)
{
kvmppc_booke_exit();
}
......
......@@ -180,6 +180,9 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
case SPRN_MMUCSR0:
vcpu->arch.gpr[rt] = 0; break;
case SPRN_MMUCFG:
vcpu->arch.gpr[rt] = mfspr(SPRN_MMUCFG); break;
/* extra exceptions */
case SPRN_IVOR32:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
......
......@@ -22,6 +22,7 @@
#include "../mm/mmu_decl.h"
#include "e500_tlb.h"
#include "trace.h"
#define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1)
......@@ -224,9 +225,8 @@ static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel);
stlbe->mas1 = 0;
KVMTRACE_5D(STLB_INVAL, &vcpu_e500->vcpu, index_of(tlbsel, esel),
stlbe->mas1, stlbe->mas2, stlbe->mas3, stlbe->mas7,
handler);
trace_kvm_stlb_inval(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
stlbe->mas3, stlbe->mas7);
}
static void kvmppc_e500_tlb1_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
......@@ -269,7 +269,7 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
tlbsel = (vcpu_e500->mas4 >> 28) & 0x1;
victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0;
pidsel = (vcpu_e500->mas4 >> 16) & 0xf;
tsized = (vcpu_e500->mas4 >> 8) & 0xf;
tsized = (vcpu_e500->mas4 >> 7) & 0x1f;
vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
| MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
......@@ -309,7 +309,7 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
vcpu_e500->shadow_pages[tlbsel][esel] = new_page;
/* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */
stlbe->mas1 = MAS1_TSIZE(BOOKE_PAGESZ_4K)
stlbe->mas1 = MAS1_TSIZE(BOOK3E_PAGESZ_4K)
| MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID;
stlbe->mas2 = (gvaddr & MAS2_EPN)
| e500_shadow_mas2_attrib(gtlbe->mas2,
......@@ -319,9 +319,8 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
vcpu_e500->vcpu.arch.msr & MSR_PR);
stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN;
KVMTRACE_5D(STLB_WRITE, &vcpu_e500->vcpu, index_of(tlbsel, esel),
stlbe->mas1, stlbe->mas2, stlbe->mas3, stlbe->mas7,
handler);
trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
stlbe->mas3, stlbe->mas7);
}
/* XXX only map the one-one case, for now use TLB0 */
......@@ -535,9 +534,8 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
gtlbe->mas3 = vcpu_e500->mas3;
gtlbe->mas7 = vcpu_e500->mas7;
KVMTRACE_5D(GTLB_WRITE, vcpu, vcpu_e500->mas0,
gtlbe->mas1, gtlbe->mas2, gtlbe->mas3, gtlbe->mas7,
handler);
trace_kvm_gtlb_write(vcpu_e500->mas0, gtlbe->mas1, gtlbe->mas2,
gtlbe->mas3, gtlbe->mas7);
/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
if (tlbe_is_host_safe(vcpu, gtlbe)) {
......@@ -545,7 +543,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
case 0:
/* TLB0 */
gtlbe->mas1 &= ~MAS1_TSIZE(~0);
gtlbe->mas1 |= MAS1_TSIZE(BOOKE_PAGESZ_4K);
gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K);
stlbsel = 0;
sesel = kvmppc_e500_stlbe_map(vcpu_e500, 0, esel);
......@@ -679,14 +677,14 @@ void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
/* Insert large initial mapping for guest. */
tlbe = &vcpu_e500->guest_tlb[1][0];
tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_256M);
tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M);
tlbe->mas2 = 0;
tlbe->mas3 = E500_TLB_SUPER_PERM_MASK;
tlbe->mas7 = 0;
/* 4K map for serial output. Used by kernel wrapper. */
tlbe = &vcpu_e500->guest_tlb[1][1];
tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_4K);
tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K);
tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G;
tlbe->mas3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
tlbe->mas7 = 0;
......
......@@ -16,7 +16,7 @@
#define __KVM_E500_TLB_H__
#include <linux/kvm_host.h>
#include <asm/mmu-fsl-booke.h>
#include <asm/mmu-book3e.h>
#include <asm/tlb.h>
#include <asm/kvm_e500.h>
......@@ -59,7 +59,7 @@ extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *);
/* TLB helper functions */
static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
{
return (tlbe->mas1 >> 8) & 0xf;
return (tlbe->mas1 >> 7) & 0x1f;
}
static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
......@@ -70,7 +70,7 @@ static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
static inline u64 get_tlb_bytes(const struct tlbe *tlbe)
{
unsigned int pgsize = get_tlb_size(tlbe);
return 1ULL << 10 << (pgsize << 1);
return 1ULL << 10 << pgsize;
}
static inline gva_t get_tlb_end(const struct tlbe *tlbe)
......
......@@ -29,6 +29,7 @@
#include <asm/kvm_ppc.h>
#include <asm/disassemble.h>
#include "timing.h"
#include "trace.h"
#define OP_TRAP 3
......@@ -187,7 +188,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
case SPRN_SRR1:
vcpu->arch.gpr[rt] = vcpu->arch.srr1; break;
case SPRN_PVR:
vcpu->arch.gpr[rt] = vcpu->arch.pvr; break;
vcpu->arch.gpr[rt] = mfspr(SPRN_PVR); break;
case SPRN_PIR:
vcpu->arch.gpr[rt] = mfspr(SPRN_PIR); break;
/* Note: mftb and TBRL/TBWL are user-accessible, so
* the guest can always access the real TB anyways.
......@@ -417,7 +420,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
}
}
KVMTRACE_3D(PPC_INSTR, vcpu, inst, (int)vcpu->arch.pc, emulated, entryexit);
trace_kvm_ppc_instr(inst, vcpu->arch.pc, emulated);
if (advance)
vcpu->arch.pc += 4; /* Advance past emulated instruction. */
......
......@@ -31,25 +31,17 @@
#include "timing.h"
#include "../mm/mmu_decl.h"
#define CREATE_TRACE_POINTS
#include "trace.h"
gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
{
return gfn;
}
int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
{
return !!(v->arch.pending_exceptions);
}
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
{
/* do real check here */
return 1;
}
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{
return !(v->arch.msr & MSR_WE);
return !(v->arch.msr & MSR_WE) || !!(v->arch.pending_exceptions);
}
......@@ -122,13 +114,17 @@ struct kvm *kvm_arch_create_vm(void)
static void kvmppc_free_vcpus(struct kvm *kvm)
{
unsigned int i;
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_arch_vcpu_free(vcpu);
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
if (kvm->vcpus[i]) {
kvm_arch_vcpu_free(kvm->vcpus[i]);
mutex_lock(&kvm->lock);
for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
kvm->vcpus[i] = NULL;
}
}
atomic_set(&kvm->online_vcpus, 0);
mutex_unlock(&kvm->lock);
}
void kvm_arch_sync_events(struct kvm *kvm)
......
#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_KVM_H
#include <linux/tracepoint.h>
#undef TRACE_SYSTEM
#define TRACE_SYSTEM kvm
#define TRACE_INCLUDE_PATH .
#define TRACE_INCLUDE_FILE trace
/*
* Tracepoint for guest mode entry.
*/
TRACE_EVENT(kvm_ppc_instr,
TP_PROTO(unsigned int inst, unsigned long pc, unsigned int emulate),
TP_ARGS(inst, pc, emulate),
TP_STRUCT__entry(
__field( unsigned int, inst )
__field( unsigned long, pc )
__field( unsigned int, emulate )
),
TP_fast_assign(
__entry->inst = inst;
__entry->pc = pc;
__entry->emulate = emulate;
),
TP_printk("inst %u pc 0x%lx emulate %u\n",
__entry->inst, __entry->pc, __entry->emulate)
);
TRACE_EVENT(kvm_stlb_inval,
TP_PROTO(unsigned int stlb_index),
TP_ARGS(stlb_index),
TP_STRUCT__entry(
__field( unsigned int, stlb_index )
),
TP_fast_assign(
__entry->stlb_index = stlb_index;
),
TP_printk("stlb_index %u", __entry->stlb_index)
);
TRACE_EVENT(kvm_stlb_write,
TP_PROTO(unsigned int victim, unsigned int tid, unsigned int word0,
unsigned int word1, unsigned int word2),
TP_ARGS(victim, tid, word0, word1, word2),
TP_STRUCT__entry(
__field( unsigned int, victim )
__field( unsigned int, tid )
__field( unsigned int, word0 )
__field( unsigned int, word1 )
__field( unsigned int, word2 )
),
TP_fast_assign(
__entry->victim = victim;
__entry->tid = tid;
__entry->word0 = word0;
__entry->word1 = word1;
__entry->word2 = word2;
),
TP_printk("victim %u tid %u w0 %u w1 %u w2 %u",
__entry->victim, __entry->tid, __entry->word0,
__entry->word1, __entry->word2)
);
TRACE_EVENT(kvm_gtlb_write,
TP_PROTO(unsigned int gtlb_index, unsigned int tid, unsigned int word0,
unsigned int word1, unsigned int word2),
TP_ARGS(gtlb_index, tid, word0, word1, word2),
TP_STRUCT__entry(
__field( unsigned int, gtlb_index )
__field( unsigned int, tid )
__field( unsigned int, word0 )
__field( unsigned int, word1 )
__field( unsigned int, word2 )
),
TP_fast_assign(
__entry->gtlb_index = gtlb_index;
__entry->tid = tid;
__entry->word0 = word0;
__entry->word1 = word1;
__entry->word2 = word2;
),
TP_printk("gtlb_index %u tid %u w0 %u w1 %u w2 %u",
__entry->gtlb_index, __entry->tid, __entry->word0,
__entry->word1, __entry->word2)
);
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
#include <trace/define_trace.h>
......@@ -15,15 +15,6 @@
*/
#include <linux/types.h>
/* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */
struct kvm_pic_state {
/* no PIC for s390 */
};
struct kvm_ioapic_state {
/* no IOAPIC for s390 */
};
/* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs {
/* general purpose regs for s390 */
......
/*
* asm-s390/kvm_host.h - definition for kernel virtual machines on s390
*
* Copyright IBM Corp. 2008
* Copyright IBM Corp. 2008,2009
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
......@@ -40,7 +40,11 @@ struct sca_block {
struct sca_entry cpu[64];
} __attribute__((packed));
#define KVM_PAGES_PER_HPAGE 256
#define KVM_NR_PAGE_SIZES 2
#define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + ((x) - 1) * 8)
#define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x))
#define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1))
#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
#define CPUSTAT_HOST 0x80000000
#define CPUSTAT_WAIT 0x10000000
......@@ -182,8 +186,9 @@ struct kvm_s390_interrupt_info {
};
/* for local_interrupt.action_flags */
#define ACTION_STORE_ON_STOP 1
#define ACTION_STOP_ON_STOP 2
#define ACTION_STORE_ON_STOP (1<<0)
#define ACTION_STOP_ON_STOP (1<<1)
#define ACTION_RELOADVCPU_ON_STOP (1<<2)
struct kvm_s390_local_interrupt {
spinlock_t lock;
......@@ -227,8 +232,6 @@ struct kvm_vm_stat {
};
struct kvm_arch{
unsigned long guest_origin;
unsigned long guest_memsize;
struct sca_block *sca;
debug_info_t *dbf;
struct kvm_s390_float_interrupt float_int;
......
......@@ -13,6 +13,8 @@
#ifndef __S390_KVM_PARA_H
#define __S390_KVM_PARA_H
#ifdef __KERNEL__
/*
* Hypercalls for KVM on s390. The calling convention is similar to the
* s390 ABI, so we use R2-R6 for parameters 1-5. In addition we use R1
......@@ -147,4 +149,6 @@ static inline unsigned int kvm_arch_para_features(void)
return 0;
}
#endif
#endif /* __S390_KVM_PARA_H */
#
# KVM configuration
#
config HAVE_KVM
bool
config HAVE_KVM_IRQCHIP
bool
source "virt/kvm/Kconfig"
menuconfig VIRTUALIZATION
bool "Virtualization"
......@@ -38,9 +34,6 @@ config KVM
If unsure, say N.
config KVM_TRACE
bool
# OK, it's a little counter-intuitive to do this, but it puts it neatly under
# the virtualization menu.
source drivers/virtio/Kconfig
......
/*
* gaccess.h - access guest memory
*
* Copyright IBM Corp. 2008
* Copyright IBM Corp. 2008,2009
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
......@@ -16,13 +16,14 @@
#include <linux/compiler.h>
#include <linux/kvm_host.h>
#include <asm/uaccess.h>
#include "kvm-s390.h"
static inline void __user *__guestaddr_to_user(struct kvm_vcpu *vcpu,
unsigned long guestaddr)
{
unsigned long prefix = vcpu->arch.sie_block->prefix;
unsigned long origin = vcpu->kvm->arch.guest_origin;
unsigned long memsize = vcpu->kvm->arch.guest_memsize;
unsigned long origin = vcpu->arch.sie_block->gmsor;
unsigned long memsize = kvm_s390_vcpu_get_memsize(vcpu);
if (guestaddr < 2 * PAGE_SIZE)
guestaddr += prefix;
......@@ -158,8 +159,8 @@ static inline int copy_to_guest(struct kvm_vcpu *vcpu, unsigned long guestdest,
const void *from, unsigned long n)
{
unsigned long prefix = vcpu->arch.sie_block->prefix;
unsigned long origin = vcpu->kvm->arch.guest_origin;
unsigned long memsize = vcpu->kvm->arch.guest_memsize;
unsigned long origin = vcpu->arch.sie_block->gmsor;
unsigned long memsize = kvm_s390_vcpu_get_memsize(vcpu);
if ((guestdest < 2 * PAGE_SIZE) && (guestdest + n > 2 * PAGE_SIZE))
goto slowpath;
......@@ -209,8 +210,8 @@ static inline int copy_from_guest(struct kvm_vcpu *vcpu, void *to,
unsigned long guestsrc, unsigned long n)
{
unsigned long prefix = vcpu->arch.sie_block->prefix;
unsigned long origin = vcpu->kvm->arch.guest_origin;
unsigned long memsize = vcpu->kvm->arch.guest_memsize;
unsigned long origin = vcpu->arch.sie_block->gmsor;
unsigned long memsize = kvm_s390_vcpu_get_memsize(vcpu);
if ((guestsrc < 2 * PAGE_SIZE) && (guestsrc + n > 2 * PAGE_SIZE))
goto slowpath;
......@@ -244,8 +245,8 @@ static inline int copy_to_guest_absolute(struct kvm_vcpu *vcpu,
unsigned long guestdest,
const void *from, unsigned long n)
{
unsigned long origin = vcpu->kvm->arch.guest_origin;
unsigned long memsize = vcpu->kvm->arch.guest_memsize;
unsigned long origin = vcpu->arch.sie_block->gmsor;
unsigned long memsize = kvm_s390_vcpu_get_memsize(vcpu);
if (guestdest + n > memsize)
return -EFAULT;
......@@ -262,8 +263,8 @@ static inline int copy_from_guest_absolute(struct kvm_vcpu *vcpu, void *to,
unsigned long guestsrc,
unsigned long n)
{
unsigned long origin = vcpu->kvm->arch.guest_origin;
unsigned long memsize = vcpu->kvm->arch.guest_memsize;
unsigned long origin = vcpu->arch.sie_block->gmsor;
unsigned long memsize = kvm_s390_vcpu_get_memsize(vcpu);
if (guestsrc + n > memsize)
return -EFAULT;
......
/*
* intercept.c - in-kernel handling for sie intercepts
*
* Copyright IBM Corp. 2008
* Copyright IBM Corp. 2008,2009
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
......@@ -128,7 +128,7 @@ static int handle_noop(struct kvm_vcpu *vcpu)
static int handle_stop(struct kvm_vcpu *vcpu)
{
int rc;
int rc = 0;
vcpu->stat.exit_stop_request++;
atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
......@@ -141,12 +141,18 @@ static int handle_stop(struct kvm_vcpu *vcpu)
rc = -ENOTSUPP;
}
if (vcpu->arch.local_int.action_bits & ACTION_RELOADVCPU_ON_STOP) {
vcpu->arch.local_int.action_bits &= ~ACTION_RELOADVCPU_ON_STOP;
rc = SIE_INTERCEPT_RERUNVCPU;
vcpu->run->exit_reason = KVM_EXIT_INTR;
}
if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) {
vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP;
VCPU_EVENT(vcpu, 3, "%s", "cpu stopped");
rc = -ENOTSUPP;
} else
rc = 0;
}
spin_unlock_bh(&vcpu->arch.local_int.lock);
return rc;
}
......@@ -158,9 +164,9 @@ static int handle_validity(struct kvm_vcpu *vcpu)
vcpu->stat.exit_validity++;
if ((viwhy == 0x37) && (vcpu->arch.sie_block->prefix
<= vcpu->kvm->arch.guest_memsize - 2*PAGE_SIZE)){
<= kvm_s390_vcpu_get_memsize(vcpu) - 2*PAGE_SIZE)) {
rc = fault_in_pages_writeable((char __user *)
vcpu->kvm->arch.guest_origin +
vcpu->arch.sie_block->gmsor +
vcpu->arch.sie_block->prefix,
2*PAGE_SIZE);
if (rc)
......
......@@ -283,7 +283,7 @@ static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
return 1;
}
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
static int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
......@@ -320,12 +320,6 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
return rc;
}
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
{
/* do real check here */
return 1;
}
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
{
return 0;
......
/*
* s390host.c -- hosting zSeries kernel virtual machines
*
* Copyright IBM Corp. 2008
* Copyright IBM Corp. 2008,2009
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
......@@ -10,6 +10,7 @@
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
* Heiko Carstens <heiko.carstens@de.ibm.com>
* Christian Ehrhardt <ehrhardt@de.ibm.com>
*/
#include <linux/compiler.h>
......@@ -210,13 +211,17 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
static void kvm_free_vcpus(struct kvm *kvm)
{
unsigned int i;
struct kvm_vcpu *vcpu;
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
if (kvm->vcpus[i]) {
kvm_arch_vcpu_destroy(kvm->vcpus[i]);
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_arch_vcpu_destroy(vcpu);
mutex_lock(&kvm->lock);
for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
kvm->vcpus[i] = NULL;
}
}
atomic_set(&kvm->online_vcpus, 0);
mutex_unlock(&kvm->lock);
}
void kvm_arch_sync_events(struct kvm *kvm)
......@@ -278,16 +283,10 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->gbea = 1;
}
/* The current code can have up to 256 pages for virtio */
#define VIRTIODESCSPACE (256ul * 4096ul)
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
vcpu->arch.sie_block->gmslm = vcpu->kvm->arch.guest_memsize +
vcpu->kvm->arch.guest_origin +
VIRTIODESCSPACE - 1ul;
vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin;
set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests);
vcpu->arch.sie_block->ecb = 2;
vcpu->arch.sie_block->eca = 0xC1002001U;
vcpu->arch.sie_block->fac = (int) (long) facilities;
......@@ -319,8 +318,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
BUG_ON(!kvm->arch.sca);
if (!kvm->arch.sca->cpu[id].sda)
kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
else
BUG_ON(!kvm->vcpus[id]); /* vcpu does already exist */
vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
......@@ -490,9 +487,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
vcpu_load(vcpu);
rerun_vcpu:
if (vcpu->requests)
if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
kvm_s390_vcpu_set_mem(vcpu);
/* verify, that memory has been registered */
if (!vcpu->kvm->arch.guest_memsize) {
if (!vcpu->arch.sie_block->gmslm) {
vcpu_put(vcpu);
VCPU_EVENT(vcpu, 3, "%s", "no memory registered to run vcpu");
return -EINVAL;
}
......@@ -509,6 +512,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr;
break;
case KVM_EXIT_UNKNOWN:
case KVM_EXIT_INTR:
case KVM_EXIT_S390_RESET:
break;
default:
......@@ -522,8 +526,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
rc = kvm_handle_sie_intercept(vcpu);
} while (!signal_pending(current) && !rc);
if (signal_pending(current) && !rc)
if (rc == SIE_INTERCEPT_RERUNVCPU)
goto rerun_vcpu;
if (signal_pending(current) && !rc) {
kvm_run->exit_reason = KVM_EXIT_INTR;
rc = -EINTR;
}
if (rc == -ENOTSUPP) {
/* intercept cannot be handled in-kernel, prepare kvm-run */
......@@ -676,6 +685,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
int user_alloc)
{
int i;
struct kvm_vcpu *vcpu;
/* A few sanity checks. We can have exactly one memory slot which has
to start at guest virtual zero and which has to be located at a
......@@ -684,7 +694,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
vmas. It is okay to mmap() and munmap() stuff in this slot after
doing this call at any time */
if (mem->slot || kvm->arch.guest_memsize)
if (mem->slot)
return -EINVAL;
if (mem->guest_phys_addr)
......@@ -699,36 +709,14 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
if (!user_alloc)
return -EINVAL;
/* lock all vcpus */
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
if (!kvm->vcpus[i])
/* request update of sie control block for all available vcpus */
kvm_for_each_vcpu(i, vcpu, kvm) {
if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
continue;
if (!mutex_trylock(&kvm->vcpus[i]->mutex))
goto fail_out;
}
kvm->arch.guest_origin = mem->userspace_addr;
kvm->arch.guest_memsize = mem->memory_size;
/* update sie control blocks, and unlock all vcpus */
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
if (kvm->vcpus[i]) {
kvm->vcpus[i]->arch.sie_block->gmsor =
kvm->arch.guest_origin;
kvm->vcpus[i]->arch.sie_block->gmslm =
kvm->arch.guest_memsize +
kvm->arch.guest_origin +
VIRTIODESCSPACE - 1ul;
mutex_unlock(&kvm->vcpus[i]->mutex);
}
kvm_s390_inject_sigp_stop(vcpu, ACTION_RELOADVCPU_ON_STOP);
}
return 0;
fail_out:
for (; i >= 0; i--)
mutex_unlock(&kvm->vcpus[i]->mutex);
return -EINVAL;
}
void kvm_arch_flush_shadow(struct kvm *kvm)
......
/*
* kvm_s390.h - definition for kvm on s390
*
* Copyright IBM Corp. 2008
* Copyright IBM Corp. 2008,2009
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
......@@ -9,6 +9,7 @@
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
* Christian Ehrhardt <ehrhardt@de.ibm.com>
*/
#ifndef ARCH_S390_KVM_S390_H
......@@ -18,8 +19,13 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
/* The current code can have up to 256 pages for virtio */
#define VIRTIODESCSPACE (256ul * 4096ul)
typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
/* negativ values are error codes, positive values for internal conditions */
#define SIE_INTERCEPT_RERUNVCPU (1<<0)
int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
......@@ -50,6 +56,30 @@ int kvm_s390_inject_vm(struct kvm *kvm,
int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
struct kvm_s390_interrupt *s390int);
int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action);
static inline int kvm_s390_vcpu_get_memsize(struct kvm_vcpu *vcpu)
{
return vcpu->arch.sie_block->gmslm
- vcpu->arch.sie_block->gmsor
- VIRTIODESCSPACE + 1ul;
}
static inline void kvm_s390_vcpu_set_mem(struct kvm_vcpu *vcpu)
{
struct kvm_memory_slot *mem;
down_read(&vcpu->kvm->slots_lock);
mem = &vcpu->kvm->memslots[0];
vcpu->arch.sie_block->gmsor = mem->userspace_addr;
vcpu->arch.sie_block->gmslm =
mem->userspace_addr +
(mem->npages << PAGE_SHIFT) +
VIRTIODESCSPACE - 1ul;
up_read(&vcpu->kvm->slots_lock);
}
/* implemented in priv.c */
int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
......
/*
* sigp.c - handlinge interprocessor communication
*
* Copyright IBM Corp. 2008
* Copyright IBM Corp. 2008,2009
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
......@@ -9,6 +9,7 @@
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
* Christian Ehrhardt <ehrhardt@de.ibm.com>
*/
#include <linux/kvm.h>
......@@ -107,46 +108,57 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
return rc;
}
static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store)
static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
{
struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
struct kvm_s390_local_interrupt *li;
struct kvm_s390_interrupt_info *inti;
int rc;
if (cpu_addr >= KVM_MAX_VCPUS)
return 3; /* not operational */
inti = kzalloc(sizeof(*inti), GFP_KERNEL);
if (!inti)
return -ENOMEM;
inti->type = KVM_S390_SIGP_STOP;
spin_lock(&fi->lock);
li = fi->local_int[cpu_addr];
if (li == NULL) {
rc = 3; /* not operational */
kfree(inti);
goto unlock;
}
spin_lock_bh(&li->lock);
list_add_tail(&inti->list, &li->list);
atomic_set(&li->active, 1);
atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
if (store)
li->action_bits |= ACTION_STORE_ON_STOP;
li->action_bits |= ACTION_STOP_ON_STOP;
li->action_bits |= action;
if (waitqueue_active(&li->wq))
wake_up_interruptible(&li->wq);
spin_unlock_bh(&li->lock);
rc = 0; /* order accepted */
return 0; /* order accepted */
}
static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action)
{
struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
struct kvm_s390_local_interrupt *li;
int rc;
if (cpu_addr >= KVM_MAX_VCPUS)
return 3; /* not operational */
spin_lock(&fi->lock);
li = fi->local_int[cpu_addr];
if (li == NULL) {
rc = 3; /* not operational */
goto unlock;
}
rc = __inject_sigp_stop(li, action);
unlock:
spin_unlock(&fi->lock);
VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
return rc;
}
int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
return __inject_sigp_stop(li, action);
}
static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
{
int rc;
......@@ -177,9 +189,9 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
/* make sure that the new value is valid memory */
address = address & 0x7fffe000u;
if ((copy_from_guest(vcpu, &tmp,
(u64) (address + vcpu->kvm->arch.guest_origin) , 1)) ||
(u64) (address + vcpu->arch.sie_block->gmsor) , 1)) ||
(copy_from_guest(vcpu, &tmp, (u64) (address +
vcpu->kvm->arch.guest_origin + PAGE_SIZE), 1))) {
vcpu->arch.sie_block->gmsor + PAGE_SIZE), 1))) {
*reg |= SIGP_STAT_INVALID_PARAMETER;
return 1; /* invalid parameter */
}
......@@ -262,11 +274,11 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
break;
case SIGP_STOP:
vcpu->stat.instruction_sigp_stop++;
rc = __sigp_stop(vcpu, cpu_addr, 0);
rc = __sigp_stop(vcpu, cpu_addr, ACTION_STOP_ON_STOP);
break;
case SIGP_STOP_STORE_STATUS:
vcpu->stat.instruction_sigp_stop++;
rc = __sigp_stop(vcpu, cpu_addr, 1);
rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP);
break;
case SIGP_SET_ARCH:
vcpu->stat.instruction_sigp_arch++;
......
......@@ -15,6 +15,7 @@
#define APIC_LVR 0x30
#define APIC_LVR_MASK 0xFF00FF
#define APIC_LVR_DIRECTED_EOI (1 << 24)
#define GET_APIC_VERSION(x) ((x) & 0xFFu)
#define GET_APIC_MAXLVT(x) (((x) >> 16) & 0xFFu)
#ifdef CONFIG_X86_32
......@@ -41,6 +42,7 @@
#define APIC_DFR_CLUSTER 0x0FFFFFFFul
#define APIC_DFR_FLAT 0xFFFFFFFFul
#define APIC_SPIV 0xF0
#define APIC_SPIV_DIRECTED_EOI (1 << 12)
#define APIC_SPIV_FOCUS_DISABLED (1 << 9)
#define APIC_SPIV_APIC_ENABLED (1 << 8)
#define APIC_ISR 0x100
......
......@@ -17,6 +17,8 @@
#define __KVM_HAVE_USER_NMI
#define __KVM_HAVE_GUEST_DEBUG
#define __KVM_HAVE_MSIX
#define __KVM_HAVE_MCE
#define __KVM_HAVE_PIT_STATE2
/* Architectural interrupt line count. */
#define KVM_NR_INTERRUPTS 256
......@@ -236,6 +238,14 @@ struct kvm_pit_state {
struct kvm_pit_channel_state channels[3];
};
#define KVM_PIT_FLAGS_HPET_LEGACY 0x00000001
struct kvm_pit_state2 {
struct kvm_pit_channel_state channels[3];
__u32 flags;
__u32 reserved[9];
};
struct kvm_reinject_control {
__u8 pit_reinject;
__u8 reserved[31];
......
......@@ -14,6 +14,7 @@
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/mmu_notifier.h>
#include <linux/tracepoint.h>
#include <linux/kvm.h>
#include <linux/kvm_para.h>
......@@ -37,12 +38,14 @@
#define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \
0xFFFFFF0000000000ULL)
#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \
(X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD)
#define KVM_GUEST_CR0_MASK \
(X86_CR0_PG | X86_CR0_PE | X86_CR0_WP | X86_CR0_NE \
| X86_CR0_NW | X86_CR0_CD)
(KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \
(X86_CR0_WP | X86_CR0_NE | X86_CR0_TS | X86_CR0_MP)
#define KVM_VM_CR0_ALWAYS_ON \
(X86_CR0_PG | X86_CR0_PE | X86_CR0_WP | X86_CR0_NE | X86_CR0_TS \
| X86_CR0_MP)
(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
#define KVM_GUEST_CR4_MASK \
(X86_CR4_VME | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_PGE | X86_CR4_VMXE)
#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
......@@ -51,12 +54,12 @@
#define INVALID_PAGE (~(hpa_t)0)
#define UNMAPPED_GVA (~(gpa_t)0)
/* shadow tables are PAE even on non-PAE hosts */
#define KVM_HPAGE_SHIFT 21
#define KVM_HPAGE_SIZE (1UL << KVM_HPAGE_SHIFT)
#define KVM_HPAGE_MASK (~(KVM_HPAGE_SIZE - 1))
#define KVM_PAGES_PER_HPAGE (KVM_HPAGE_SIZE / PAGE_SIZE)
/* KVM Hugepage definitions for x86 */
#define KVM_NR_PAGE_SIZES 3
#define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + (((x) - 1) * 9))
#define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x))
#define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1))
#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
#define DE_VECTOR 0
#define DB_VECTOR 1
......@@ -120,6 +123,10 @@ enum kvm_reg {
NR_VCPU_REGS
};
enum kvm_reg_ex {
VCPU_EXREG_PDPTR = NR_VCPU_REGS,
};
enum {
VCPU_SREG_ES,
VCPU_SREG_CS,
......@@ -131,7 +138,7 @@ enum {
VCPU_SREG_LDTR,
};
#include <asm/kvm_x86_emulate.h>
#include <asm/kvm_emulate.h>
#define KVM_NR_MEM_OBJS 40
......@@ -308,7 +315,6 @@ struct kvm_vcpu_arch {
struct {
gfn_t gfn; /* presumed gfn during guest pte update */
pfn_t pfn; /* pfn corresponding to that gfn */
int largepage;
unsigned long mmu_seq;
} update_pte;
......@@ -334,16 +340,6 @@ struct kvm_vcpu_arch {
u8 nr;
} interrupt;
struct {
int vm86_active;
u8 save_iopl;
struct kvm_save_segment {
u16 selector;
unsigned long base;
u32 limit;
u32 ar;
} tr, es, ds, fs, gs;
} rmode;
int halt_request; /* real mode on Intel only */
int cpuid_nent;
......@@ -366,13 +362,15 @@ struct kvm_vcpu_arch {
u32 pat;
int switch_db_regs;
unsigned long host_db[KVM_NR_DB_REGS];
unsigned long host_dr6;
unsigned long host_dr7;
unsigned long db[KVM_NR_DB_REGS];
unsigned long dr6;
unsigned long dr7;
unsigned long eff_db[KVM_NR_DB_REGS];
u64 mcg_cap;
u64 mcg_status;
u64 mcg_ctl;
u64 *mce_banks;
};
struct kvm_mem_alias {
......@@ -409,6 +407,7 @@ struct kvm_arch{
struct page *ept_identity_pagetable;
bool ept_identity_pagetable_done;
gpa_t ept_identity_map_addr;
unsigned long irq_sources_bitmap;
unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
......@@ -526,6 +525,9 @@ struct kvm_x86_ops {
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*get_tdp_level)(void);
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
bool (*gb_page_enable)(void);
const struct trace_print_flags *exit_reasons_str;
};
extern struct kvm_x86_ops *kvm_x86_ops;
......@@ -618,6 +620,7 @@ void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
u32 error_code);
bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
int kvm_pic_set_irq(void *opaque, int irq, int level);
......@@ -752,8 +755,6 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
}
#define MSR_IA32_TIME_STAMP_COUNTER 0x010
#define TSS_IOPB_BASE_OFFSET 0x66
#define TSS_BASE_SIZE 0x68
#define TSS_IOPB_SIZE (65536 / 8)
......@@ -796,5 +797,8 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
int kvm_age_hva(struct kvm *kvm, unsigned long hva);
int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
#endif /* _ASM_X86_KVM_HOST_H */
#ifndef _ASM_X86_KVM_PARA_H
#define _ASM_X86_KVM_PARA_H
#include <linux/types.h>
/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It
* should be used to determine that a VM is running under KVM.
*/
......
......@@ -374,6 +374,7 @@
/* AMD-V MSRs */
#define MSR_VM_CR 0xc0010114
#define MSR_VM_IGNNE 0xc0010115
#define MSR_VM_HSAVE_PA 0xc0010117
#endif /* _ASM_X86_MSR_INDEX_H */
......@@ -55,6 +55,7 @@
#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
#define PIN_BASED_EXT_INTR_MASK 0x00000001
......@@ -351,9 +352,16 @@ enum vmcs_field {
#define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0
#define VMX_EPT_EXTENT_CONTEXT 1
#define VMX_EPT_EXTENT_GLOBAL 2
#define VMX_EPT_EXECUTE_ONLY_BIT (1ull)
#define VMX_EPT_PAGE_WALK_4_BIT (1ull << 6)
#define VMX_EPTP_UC_BIT (1ull << 8)
#define VMX_EPTP_WB_BIT (1ull << 14)
#define VMX_EPT_2MB_PAGE_BIT (1ull << 16)
#define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24)
#define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25)
#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26)
#define VMX_EPT_DEFAULT_GAW 3
#define VMX_EPT_MAX_GAW 0x4
#define VMX_EPT_MT_EPTE_SHIFT 3
......
......@@ -34,7 +34,6 @@
struct kvm_para_state {
u8 mmu_queue[MMU_QUEUE_SIZE];
int mmu_queue_len;
enum paravirt_lazy_mode mode;
};
static DEFINE_PER_CPU(struct kvm_para_state, para_state);
......@@ -77,7 +76,7 @@ static void kvm_deferred_mmu_op(void *buffer, int len)
{
struct kvm_para_state *state = kvm_para_state();
if (state->mode != PARAVIRT_LAZY_MMU) {
if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU) {
kvm_mmu_op(buffer, len);
return;
}
......@@ -185,10 +184,7 @@ static void kvm_release_pt(unsigned long pfn)
static void kvm_enter_lazy_mmu(void)
{
struct kvm_para_state *state = kvm_para_state();
paravirt_enter_lazy_mmu();
state->mode = paravirt_get_lazy_mode();
}
static void kvm_leave_lazy_mmu(void)
......@@ -197,7 +193,6 @@ static void kvm_leave_lazy_mmu(void)
mmu_queue_flush(state);
paravirt_leave_lazy_mmu();
state->mode = paravirt_get_lazy_mode();
}
static void __init paravirt_ops_setup(void)
......
......@@ -50,8 +50,8 @@ static unsigned long kvm_get_wallclock(void)
struct timespec ts;
int low, high;
low = (int)__pa(&wall_clock);
high = ((u64)__pa(&wall_clock) >> 32);
low = (int)__pa_symbol(&wall_clock);
high = ((u64)__pa_symbol(&wall_clock) >> 32);
native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
vcpu_time = &get_cpu_var(hv_clock);
......
#
# KVM configuration
#
config HAVE_KVM
bool
config HAVE_KVM_IRQCHIP
bool
default y
source "virt/kvm/Kconfig"
menuconfig VIRTUALIZATION
bool "Virtualization"
......@@ -29,6 +25,9 @@ config KVM
select PREEMPT_NOTIFIERS
select MMU_NOTIFIER
select ANON_INODES
select HAVE_KVM_IRQCHIP
select HAVE_KVM_EVENTFD
select KVM_APIC_ARCHITECTURE
---help---
Support hosting fully virtualized guest machines using hardware
virtualization extensions. You will need a fairly recent
......@@ -63,18 +62,6 @@ config KVM_AMD
To compile this as a module, choose M here: the module
will be called kvm-amd.
config KVM_TRACE
bool "KVM trace support"
depends on KVM && SYSFS
select MARKERS
select RELAY
select DEBUG_FS
default n
---help---
This option allows reading a trace of kvm-related events through
relayfs. Note the ABI is not considered stable and will be
modified in future updates.
# OK, it's a little counter-intuitive to do this, but it puts it neatly under
# the virtualization menu.
source drivers/lguest/Kconfig
......
#
# Makefile for Kernel-based Virtual Machine module
#
common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
coalesced_mmio.o irq_comm.o)
ifeq ($(CONFIG_KVM_TRACE),y)
common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
endif
ifeq ($(CONFIG_IOMMU_API),y)
common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
endif
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
CFLAGS_x86.o := -I.
CFLAGS_svm.o := -I.
CFLAGS_vmx.o := -I.
kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
coalesced_mmio.o irq_comm.o eventfd.o)
kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o)
kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
i8254.o timer.o
kvm-intel-y += vmx.o
kvm-amd-y += svm.o
obj-$(CONFIG_KVM) += kvm.o
kvm-intel-objs = vmx.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
kvm-amd-objs = svm.o
obj-$(CONFIG_KVM_AMD) += kvm-amd.o
/******************************************************************************
* x86_emulate.c
* emulate.c
*
* Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
*
......@@ -30,7 +30,9 @@
#define DPRINTF(x...) do {} while (0)
#endif
#include <linux/module.h>
#include <asm/kvm_x86_emulate.h>
#include <asm/kvm_emulate.h>
#include "mmu.h" /* for is_long_mode() */
/*
* Opcode effective-address decode tables.
......@@ -60,6 +62,7 @@
#define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */
#define SrcOne (7<<4) /* Implied '1' */
#define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */
#define SrcImmU (9<<4) /* Immediate operand, unsigned */
#define SrcMask (0xf<<4)
/* Generic ModRM decode. */
#define ModRM (1<<8)
......@@ -97,11 +100,11 @@ static u32 opcode_table[256] = {
/* 0x10 - 0x17 */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
0, 0, 0, 0,
ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
/* 0x18 - 0x1F */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
0, 0, 0, 0,
ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
/* 0x20 - 0x27 */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
......@@ -195,7 +198,7 @@ static u32 opcode_table[256] = {
ByteOp | SrcImmUByte, SrcImmUByte,
/* 0xE8 - 0xEF */
SrcImm | Stack, SrcImm | ImplicitOps,
SrcImm | Src2Imm16, SrcImmByte | ImplicitOps,
SrcImmU | Src2Imm16, SrcImmByte | ImplicitOps,
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
/* 0xF0 - 0xF7 */
......@@ -208,7 +211,7 @@ static u32 opcode_table[256] = {
static u32 twobyte_table[256] = {
/* 0x00 - 0x0F */
0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0,
0, Group | GroupDual | Group7, 0, 0, 0, ImplicitOps, ImplicitOps, 0,
ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
/* 0x10 - 0x1F */
0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
......@@ -216,7 +219,9 @@ static u32 twobyte_table[256] = {
ModRM | ImplicitOps, ModRM, ModRM | ImplicitOps, ModRM, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
/* 0x30 - 0x3F */
ImplicitOps, 0, ImplicitOps, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
ImplicitOps, 0, ImplicitOps, 0,
ImplicitOps, ImplicitOps, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
/* 0x40 - 0x47 */
DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
......@@ -319,8 +324,11 @@ static u32 group2_table[] = {
};
/* EFLAGS bit definitions. */
#define EFLG_VM (1<<17)
#define EFLG_RF (1<<16)
#define EFLG_OF (1<<11)
#define EFLG_DF (1<<10)
#define EFLG_IF (1<<9)
#define EFLG_SF (1<<7)
#define EFLG_ZF (1<<6)
#define EFLG_AF (1<<4)
......@@ -1027,6 +1035,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
c->src.type = OP_MEM;
break;
case SrcImm:
case SrcImmU:
c->src.type = OP_IMM;
c->src.ptr = (unsigned long *)c->eip;
c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
......@@ -1044,6 +1053,19 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
c->src.val = insn_fetch(s32, 4, c->eip);
break;
}
if ((c->d & SrcMask) == SrcImmU) {
switch (c->src.bytes) {
case 1:
c->src.val &= 0xff;
break;
case 2:
c->src.val &= 0xffff;
break;
case 4:
c->src.val &= 0xffffffff;
break;
}
}
break;
case SrcImmByte:
case SrcImmUByte:
......@@ -1375,6 +1397,217 @@ static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask)
ctxt->interruptibility = mask;
}
static inline void
setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
struct kvm_segment *cs, struct kvm_segment *ss)
{
memset(cs, 0, sizeof(struct kvm_segment));
kvm_x86_ops->get_segment(ctxt->vcpu, cs, VCPU_SREG_CS);
memset(ss, 0, sizeof(struct kvm_segment));
cs->l = 0; /* will be adjusted later */
cs->base = 0; /* flat segment */
cs->g = 1; /* 4kb granularity */
cs->limit = 0xffffffff; /* 4GB limit */
cs->type = 0x0b; /* Read, Execute, Accessed */
cs->s = 1;
cs->dpl = 0; /* will be adjusted later */
cs->present = 1;
cs->db = 1;
ss->unusable = 0;
ss->base = 0; /* flat segment */
ss->limit = 0xffffffff; /* 4GB limit */
ss->g = 1; /* 4kb granularity */
ss->s = 1;
ss->type = 0x03; /* Read/Write, Accessed */
ss->db = 1; /* 32bit stack segment */
ss->dpl = 0;
ss->present = 1;
}
static int
emulate_syscall(struct x86_emulate_ctxt *ctxt)
{
struct decode_cache *c = &ctxt->decode;
struct kvm_segment cs, ss;
u64 msr_data;
/* syscall is not available in real mode */
if (c->lock_prefix || ctxt->mode == X86EMUL_MODE_REAL
|| !(ctxt->vcpu->arch.cr0 & X86_CR0_PE))
return -1;
setup_syscalls_segments(ctxt, &cs, &ss);
kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
msr_data >>= 32;
cs.selector = (u16)(msr_data & 0xfffc);
ss.selector = (u16)(msr_data + 8);
if (is_long_mode(ctxt->vcpu)) {
cs.db = 0;
cs.l = 1;
}
kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
c->regs[VCPU_REGS_RCX] = c->eip;
if (is_long_mode(ctxt->vcpu)) {
#ifdef CONFIG_X86_64
c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF;
kvm_x86_ops->get_msr(ctxt->vcpu,
ctxt->mode == X86EMUL_MODE_PROT64 ?
MSR_LSTAR : MSR_CSTAR, &msr_data);
c->eip = msr_data;
kvm_x86_ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data);
ctxt->eflags &= ~(msr_data | EFLG_RF);
#endif
} else {
/* legacy mode */
kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
c->eip = (u32)msr_data;
ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
}
return 0;
}
static int
emulate_sysenter(struct x86_emulate_ctxt *ctxt)
{
struct decode_cache *c = &ctxt->decode;
struct kvm_segment cs, ss;
u64 msr_data;
/* inject #UD if LOCK prefix is used */
if (c->lock_prefix)
return -1;
/* inject #GP if in real mode or paging is disabled */
if (ctxt->mode == X86EMUL_MODE_REAL ||
!(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) {
kvm_inject_gp(ctxt->vcpu, 0);
return -1;
}
/* XXX sysenter/sysexit have not been tested in 64bit mode.
* Therefore, we inject an #UD.
*/
if (ctxt->mode == X86EMUL_MODE_PROT64)
return -1;
setup_syscalls_segments(ctxt, &cs, &ss);
kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
switch (ctxt->mode) {
case X86EMUL_MODE_PROT32:
if ((msr_data & 0xfffc) == 0x0) {
kvm_inject_gp(ctxt->vcpu, 0);
return -1;
}
break;
case X86EMUL_MODE_PROT64:
if (msr_data == 0x0) {
kvm_inject_gp(ctxt->vcpu, 0);
return -1;
}
break;
}
ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
cs.selector = (u16)msr_data;
cs.selector &= ~SELECTOR_RPL_MASK;
ss.selector = cs.selector + 8;
ss.selector &= ~SELECTOR_RPL_MASK;
if (ctxt->mode == X86EMUL_MODE_PROT64
|| is_long_mode(ctxt->vcpu)) {
cs.db = 0;
cs.l = 1;
}
kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data);
c->eip = msr_data;
kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data);
c->regs[VCPU_REGS_RSP] = msr_data;
return 0;
}
static int
emulate_sysexit(struct x86_emulate_ctxt *ctxt)
{
struct decode_cache *c = &ctxt->decode;
struct kvm_segment cs, ss;
u64 msr_data;
int usermode;
/* inject #UD if LOCK prefix is used */
if (c->lock_prefix)
return -1;
/* inject #GP if in real mode or paging is disabled */
if (ctxt->mode == X86EMUL_MODE_REAL
|| !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) {
kvm_inject_gp(ctxt->vcpu, 0);
return -1;
}
/* sysexit must be called from CPL 0 */
if (kvm_x86_ops->get_cpl(ctxt->vcpu) != 0) {
kvm_inject_gp(ctxt->vcpu, 0);
return -1;
}
setup_syscalls_segments(ctxt, &cs, &ss);
if ((c->rex_prefix & 0x8) != 0x0)
usermode = X86EMUL_MODE_PROT64;
else
usermode = X86EMUL_MODE_PROT32;
cs.dpl = 3;
ss.dpl = 3;
kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
switch (usermode) {
case X86EMUL_MODE_PROT32:
cs.selector = (u16)(msr_data + 16);
if ((msr_data & 0xfffc) == 0x0) {
kvm_inject_gp(ctxt->vcpu, 0);
return -1;
}
ss.selector = (u16)(msr_data + 24);
break;
case X86EMUL_MODE_PROT64:
cs.selector = (u16)(msr_data + 32);
if (msr_data == 0x0) {
kvm_inject_gp(ctxt->vcpu, 0);
return -1;
}
ss.selector = cs.selector + 8;
cs.db = 0;
cs.l = 1;
break;
}
cs.selector |= SELECTOR_RPL_MASK;
ss.selector |= SELECTOR_RPL_MASK;
kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX];
c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX];
return 0;
}
int
x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
{
......@@ -1970,6 +2203,12 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
goto cannot_emulate;
}
break;
case 0x05: /* syscall */
if (emulate_syscall(ctxt) == -1)
goto cannot_emulate;
else
goto writeback;
break;
case 0x06:
emulate_clts(ctxt->vcpu);
c->dst.type = OP_NONE;
......@@ -2036,6 +2275,18 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
rc = X86EMUL_CONTINUE;
c->dst.type = OP_NONE;
break;
case 0x34: /* sysenter */
if (emulate_sysenter(ctxt) == -1)
goto cannot_emulate;
else
goto writeback;
break;
case 0x35: /* sysexit */
if (emulate_sysexit(ctxt) == -1)
goto cannot_emulate;
else
goto writeback;
break;
case 0x40 ... 0x4f: /* cmov */
c->dst.val = c->dst.orig_val = c->src.val;
if (!test_cc(c->b, ctxt->eflags))
......
......@@ -231,7 +231,7 @@ int pit_has_pending_timer(struct kvm_vcpu *vcpu)
{
struct kvm_pit *pit = vcpu->kvm->arch.vpit;
if (pit && vcpu->vcpu_id == 0 && pit->pit_state.irq_ack)
if (pit && kvm_vcpu_is_bsp(vcpu) && pit->pit_state.irq_ack)
return atomic_read(&pit->pit_state.pit_timer.pending);
return 0;
}
......@@ -252,7 +252,7 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
struct kvm_pit *pit = vcpu->kvm->arch.vpit;
struct hrtimer *timer;
if (vcpu->vcpu_id != 0 || !pit)
if (!kvm_vcpu_is_bsp(vcpu) || !pit)
return;
timer = &pit->pit_state.pit_timer.timer;
......@@ -294,7 +294,7 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
pt->timer.function = kvm_timer_fn;
pt->t_ops = &kpit_ops;
pt->kvm = ps->pit->kvm;
pt->vcpu_id = 0;
pt->vcpu = pt->kvm->bsp_vcpu;
atomic_set(&pt->pending, 0);
ps->irq_ack = 1;
......@@ -332,33 +332,62 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
case 1:
/* FIXME: enhance mode 4 precision */
case 4:
if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)) {
create_pit_timer(ps, val, 0);
}
break;
case 2:
case 3:
if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)){
create_pit_timer(ps, val, 1);
}
break;
default:
destroy_pit_timer(&ps->pit_timer);
}
}
void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val)
void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start)
{
mutex_lock(&kvm->arch.vpit->pit_state.lock);
u8 saved_mode;
if (hpet_legacy_start) {
/* save existing mode for later reenablement */
saved_mode = kvm->arch.vpit->pit_state.channels[0].mode;
kvm->arch.vpit->pit_state.channels[0].mode = 0xff; /* disable timer */
pit_load_count(kvm, channel, val);
mutex_unlock(&kvm->arch.vpit->pit_state.lock);
kvm->arch.vpit->pit_state.channels[0].mode = saved_mode;
} else {
pit_load_count(kvm, channel, val);
}
}
static inline struct kvm_pit *dev_to_pit(struct kvm_io_device *dev)
{
return container_of(dev, struct kvm_pit, dev);
}
static void pit_ioport_write(struct kvm_io_device *this,
static inline struct kvm_pit *speaker_to_pit(struct kvm_io_device *dev)
{
return container_of(dev, struct kvm_pit, speaker_dev);
}
static inline int pit_in_range(gpa_t addr)
{
return ((addr >= KVM_PIT_BASE_ADDRESS) &&
(addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
}
static int pit_ioport_write(struct kvm_io_device *this,
gpa_t addr, int len, const void *data)
{
struct kvm_pit *pit = (struct kvm_pit *)this->private;
struct kvm_pit *pit = dev_to_pit(this);
struct kvm_kpit_state *pit_state = &pit->pit_state;
struct kvm *kvm = pit->kvm;
int channel, access;
struct kvm_kpit_channel_state *s;
u32 val = *(u32 *) data;
if (!pit_in_range(addr))
return -EOPNOTSUPP;
val &= 0xff;
addr &= KVM_PIT_CHANNEL_MASK;
......@@ -421,16 +450,19 @@ static void pit_ioport_write(struct kvm_io_device *this,
}
mutex_unlock(&pit_state->lock);
return 0;
}
static void pit_ioport_read(struct kvm_io_device *this,
static int pit_ioport_read(struct kvm_io_device *this,
gpa_t addr, int len, void *data)
{
struct kvm_pit *pit = (struct kvm_pit *)this->private;
struct kvm_pit *pit = dev_to_pit(this);
struct kvm_kpit_state *pit_state = &pit->pit_state;
struct kvm *kvm = pit->kvm;
int ret, count;
struct kvm_kpit_channel_state *s;
if (!pit_in_range(addr))
return -EOPNOTSUPP;
addr &= KVM_PIT_CHANNEL_MASK;
s = &pit_state->channels[addr];
......@@ -485,37 +517,36 @@ static void pit_ioport_read(struct kvm_io_device *this,
memcpy(data, (char *)&ret, len);
mutex_unlock(&pit_state->lock);
return 0;
}
static int pit_in_range(struct kvm_io_device *this, gpa_t addr,
int len, int is_write)
{
return ((addr >= KVM_PIT_BASE_ADDRESS) &&
(addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
}
static void speaker_ioport_write(struct kvm_io_device *this,
static int speaker_ioport_write(struct kvm_io_device *this,
gpa_t addr, int len, const void *data)
{
struct kvm_pit *pit = (struct kvm_pit *)this->private;
struct kvm_pit *pit = speaker_to_pit(this);
struct kvm_kpit_state *pit_state = &pit->pit_state;
struct kvm *kvm = pit->kvm;
u32 val = *(u32 *) data;
if (addr != KVM_SPEAKER_BASE_ADDRESS)
return -EOPNOTSUPP;
mutex_lock(&pit_state->lock);
pit_state->speaker_data_on = (val >> 1) & 1;
pit_set_gate(kvm, 2, val & 1);
mutex_unlock(&pit_state->lock);
return 0;
}
static void speaker_ioport_read(struct kvm_io_device *this,
static int speaker_ioport_read(struct kvm_io_device *this,
gpa_t addr, int len, void *data)
{
struct kvm_pit *pit = (struct kvm_pit *)this->private;
struct kvm_pit *pit = speaker_to_pit(this);
struct kvm_kpit_state *pit_state = &pit->pit_state;
struct kvm *kvm = pit->kvm;
unsigned int refresh_clock;
int ret;
if (addr != KVM_SPEAKER_BASE_ADDRESS)
return -EOPNOTSUPP;
/* Refresh clock toggles at about 15us. We approximate as 2^14ns. */
refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1;
......@@ -527,12 +558,7 @@ static void speaker_ioport_read(struct kvm_io_device *this,
len = sizeof(ret);
memcpy(data, (char *)&ret, len);
mutex_unlock(&pit_state->lock);
}
static int speaker_in_range(struct kvm_io_device *this, gpa_t addr,
int len, int is_write)
{
return (addr == KVM_SPEAKER_BASE_ADDRESS);
return 0;
}
void kvm_pit_reset(struct kvm_pit *pit)
......@@ -541,6 +567,7 @@ void kvm_pit_reset(struct kvm_pit *pit)
struct kvm_kpit_channel_state *c;
mutex_lock(&pit->pit_state.lock);
pit->pit_state.flags = 0;
for (i = 0; i < 3; i++) {
c = &pit->pit_state.channels[i];
c->mode = 0xff;
......@@ -563,10 +590,22 @@ static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
}
}
struct kvm_pit *kvm_create_pit(struct kvm *kvm)
static const struct kvm_io_device_ops pit_dev_ops = {
.read = pit_ioport_read,
.write = pit_ioport_write,
};
static const struct kvm_io_device_ops speaker_dev_ops = {
.read = speaker_ioport_read,
.write = speaker_ioport_write,
};
/* Caller must have writers lock on slots_lock */
struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
{
struct kvm_pit *pit;
struct kvm_kpit_state *pit_state;
int ret;
pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL);
if (!pit)
......@@ -582,19 +621,6 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
mutex_lock(&pit->pit_state.lock);
spin_lock_init(&pit->pit_state.inject_lock);
/* Initialize PIO device */
pit->dev.read = pit_ioport_read;
pit->dev.write = pit_ioport_write;
pit->dev.in_range = pit_in_range;
pit->dev.private = pit;
kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
pit->speaker_dev.read = speaker_ioport_read;
pit->speaker_dev.write = speaker_ioport_write;
pit->speaker_dev.in_range = speaker_in_range;
pit->speaker_dev.private = pit;
kvm_io_bus_register_dev(&kvm->pio_bus, &pit->speaker_dev);
kvm->arch.vpit = pit;
pit->kvm = kvm;
......@@ -613,7 +639,30 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
pit->mask_notifier.func = pit_mask_notifer;
kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
kvm_iodevice_init(&pit->dev, &pit_dev_ops);
ret = __kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
if (ret < 0)
goto fail;
if (flags & KVM_PIT_SPEAKER_DUMMY) {
kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops);
ret = __kvm_io_bus_register_dev(&kvm->pio_bus,
&pit->speaker_dev);
if (ret < 0)
goto fail_unregister;
}
return pit;
fail_unregister:
__kvm_io_bus_unregister_dev(&kvm->pio_bus, &pit->dev);
fail:
if (pit->irq_source_id >= 0)
kvm_free_irq_source_id(kvm, pit->irq_source_id);
kfree(pit);
return NULL;
}
void kvm_free_pit(struct kvm *kvm)
......@@ -623,6 +672,8 @@ void kvm_free_pit(struct kvm *kvm)
if (kvm->arch.vpit) {
kvm_unregister_irq_mask_notifier(kvm, 0,
&kvm->arch.vpit->mask_notifier);
kvm_unregister_irq_ack_notifier(kvm,
&kvm->arch.vpit->pit_state.irq_ack_notifier);
mutex_lock(&kvm->arch.vpit->pit_state.lock);
timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
hrtimer_cancel(timer);
......@@ -637,10 +688,10 @@ static void __inject_pit_timer_intr(struct kvm *kvm)
struct kvm_vcpu *vcpu;
int i;
mutex_lock(&kvm->lock);
mutex_lock(&kvm->irq_lock);
kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
mutex_unlock(&kvm->lock);
mutex_unlock(&kvm->irq_lock);
/*
* Provides NMI watchdog support via Virtual Wire mode.
......@@ -652,11 +703,8 @@ static void __inject_pit_timer_intr(struct kvm *kvm)
* VCPU0, and only if its LVT0 is in EXTINT mode.
*/
if (kvm->arch.vapics_in_nmi_mode > 0)
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
vcpu = kvm->vcpus[i];
if (vcpu)
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_apic_nmi_wd_deliver(vcpu);
}
}
void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
......@@ -665,7 +713,7 @@ void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
struct kvm *kvm = vcpu->kvm;
struct kvm_kpit_state *ps;
if (vcpu && pit) {
if (pit) {
int inject = 0;
ps = &pit->pit_state;
......
......@@ -21,6 +21,7 @@ struct kvm_kpit_channel_state {
struct kvm_kpit_state {
struct kvm_kpit_channel_state channels[3];
u32 flags;
struct kvm_timer pit_timer;
bool is_periodic;
u32 speaker_data_on;
......@@ -49,8 +50,8 @@ struct kvm_pit {
#define KVM_PIT_CHANNEL_MASK 0x3
void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val);
struct kvm_pit *kvm_create_pit(struct kvm *kvm);
void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start);
struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags);
void kvm_free_pit(struct kvm *kvm);
void kvm_pit_reset(struct kvm_pit *pit);
......
......@@ -30,50 +30,24 @@
#include "irq.h"
#include <linux/kvm_host.h>
static void pic_lock(struct kvm_pic *s)
__acquires(&s->lock)
{
spin_lock(&s->lock);
}
static void pic_unlock(struct kvm_pic *s)
__releases(&s->lock)
{
struct kvm *kvm = s->kvm;
unsigned acks = s->pending_acks;
bool wakeup = s->wakeup_needed;
struct kvm_vcpu *vcpu;
s->pending_acks = 0;
s->wakeup_needed = false;
spin_unlock(&s->lock);
while (acks) {
kvm_notify_acked_irq(kvm, SELECT_PIC(__ffs(acks)),
__ffs(acks));
acks &= acks - 1;
}
if (wakeup) {
vcpu = s->kvm->vcpus[0];
if (vcpu)
kvm_vcpu_kick(vcpu);
}
}
#include "trace.h"
static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
{
s->isr &= ~(1 << irq);
s->isr_ack |= (1 << irq);
if (s != &s->pics_state->pics[0])
irq += 8;
kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq);
}
void kvm_pic_clear_isr_ack(struct kvm *kvm)
{
struct kvm_pic *s = pic_irqchip(kvm);
spin_lock(&s->lock);
s->pics[0].isr_ack = 0xff;
s->pics[1].isr_ack = 0xff;
spin_unlock(&s->lock);
}
/*
......@@ -174,9 +148,9 @@ static void pic_update_irq(struct kvm_pic *s)
void kvm_pic_update_irq(struct kvm_pic *s)
{
pic_lock(s);
spin_lock(&s->lock);
pic_update_irq(s);
pic_unlock(s);
spin_unlock(&s->lock);
}
int kvm_pic_set_irq(void *opaque, int irq, int level)
......@@ -184,12 +158,14 @@ int kvm_pic_set_irq(void *opaque, int irq, int level)
struct kvm_pic *s = opaque;
int ret = -1;
pic_lock(s);
spin_lock(&s->lock);
if (irq >= 0 && irq < PIC_NUM_PINS) {
ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
pic_update_irq(s);
trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr,
s->pics[irq >> 3].imr, ret == 0);
}
pic_unlock(s);
spin_unlock(&s->lock);
return ret;
}
......@@ -217,7 +193,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
int irq, irq2, intno;
struct kvm_pic *s = pic_irqchip(kvm);
pic_lock(s);
spin_lock(&s->lock);
irq = pic_get_irq(&s->pics[0]);
if (irq >= 0) {
pic_intack(&s->pics[0], irq);
......@@ -242,8 +218,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
intno = s->pics[0].irq_base + irq;
}
pic_update_irq(s);
pic_unlock(s);
kvm_notify_acked_irq(kvm, SELECT_PIC(irq), irq);
spin_unlock(&s->lock);
return intno;
}
......@@ -252,7 +227,7 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
{
int irq, irqbase, n;
struct kvm *kvm = s->pics_state->irq_request_opaque;
struct kvm_vcpu *vcpu0 = kvm->vcpus[0];
struct kvm_vcpu *vcpu0 = kvm->bsp_vcpu;
if (s == &s->pics_state->pics[0])
irqbase = 0;
......@@ -263,7 +238,7 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0))
if (s->irr & (1 << irq) || s->isr & (1 << irq)) {
n = irq + irqbase;
s->pics_state->pending_acks |= 1 << n;
kvm_notify_acked_irq(kvm, SELECT_PIC(n), n);
}
}
s->last_irr = 0;
......@@ -428,8 +403,7 @@ static u32 elcr_ioport_read(void *opaque, u32 addr1)
return s->elcr;
}
static int picdev_in_range(struct kvm_io_device *this, gpa_t addr,
int len, int is_write)
static int picdev_in_range(gpa_t addr)
{
switch (addr) {
case 0x20:
......@@ -444,18 +418,25 @@ static int picdev_in_range(struct kvm_io_device *this, gpa_t addr,
}
}
static void picdev_write(struct kvm_io_device *this,
static inline struct kvm_pic *to_pic(struct kvm_io_device *dev)
{
return container_of(dev, struct kvm_pic, dev);
}
static int picdev_write(struct kvm_io_device *this,
gpa_t addr, int len, const void *val)
{
struct kvm_pic *s = this->private;
struct kvm_pic *s = to_pic(this);
unsigned char data = *(unsigned char *)val;
if (!picdev_in_range(addr))
return -EOPNOTSUPP;
if (len != 1) {
if (printk_ratelimit())
printk(KERN_ERR "PIC: non byte write\n");
return;
return 0;
}
pic_lock(s);
spin_lock(&s->lock);
switch (addr) {
case 0x20:
case 0x21:
......@@ -468,21 +449,24 @@ static void picdev_write(struct kvm_io_device *this,
elcr_ioport_write(&s->pics[addr & 1], addr, data);
break;
}
pic_unlock(s);
spin_unlock(&s->lock);
return 0;
}
static void picdev_read(struct kvm_io_device *this,
static int picdev_read(struct kvm_io_device *this,
gpa_t addr, int len, void *val)
{
struct kvm_pic *s = this->private;
struct kvm_pic *s = to_pic(this);
unsigned char data = 0;
if (!picdev_in_range(addr))
return -EOPNOTSUPP;
if (len != 1) {
if (printk_ratelimit())
printk(KERN_ERR "PIC: non byte read\n");
return;
return 0;
}
pic_lock(s);
spin_lock(&s->lock);
switch (addr) {
case 0x20:
case 0x21:
......@@ -496,7 +480,8 @@ static void picdev_read(struct kvm_io_device *this,
break;
}
*(unsigned char *)val = data;
pic_unlock(s);
spin_unlock(&s->lock);
return 0;
}
/*
......@@ -505,20 +490,27 @@ static void picdev_read(struct kvm_io_device *this,
static void pic_irq_request(void *opaque, int level)
{
struct kvm *kvm = opaque;
struct kvm_vcpu *vcpu = kvm->vcpus[0];
struct kvm_vcpu *vcpu = kvm->bsp_vcpu;
struct kvm_pic *s = pic_irqchip(kvm);
int irq = pic_get_irq(&s->pics[0]);
s->output = level;
if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) {
s->pics[0].isr_ack &= ~(1 << irq);
s->wakeup_needed = true;
kvm_vcpu_kick(vcpu);
}
}
static const struct kvm_io_device_ops picdev_ops = {
.read = picdev_read,
.write = picdev_write,
};
struct kvm_pic *kvm_create_pic(struct kvm *kvm)
{
struct kvm_pic *s;
int ret;
s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
if (!s)
return NULL;
......@@ -534,10 +526,12 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
/*
* Initialize PIO device
*/
s->dev.read = picdev_read;
s->dev.write = picdev_write;
s->dev.in_range = picdev_in_range;
s->dev.private = s;
kvm_io_bus_register_dev(&kvm->pio_bus, &s->dev);
kvm_iodevice_init(&s->dev, &picdev_ops);
ret = kvm_io_bus_register_dev(kvm, &kvm->pio_bus, &s->dev);
if (ret < 0) {
kfree(s);
return NULL;
}
return s;
}
......@@ -63,7 +63,6 @@ struct kvm_kpic_state {
struct kvm_pic {
spinlock_t lock;
bool wakeup_needed;
unsigned pending_acks;
struct kvm *kvm;
struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
......
......@@ -29,4 +29,13 @@ static inline void kvm_rip_write(struct kvm_vcpu *vcpu, unsigned long val)
kvm_register_write(vcpu, VCPU_REGS_RIP, val);
}
static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)
{
if (!test_bit(VCPU_EXREG_PDPTR,
(unsigned long *)&vcpu->arch.regs_avail))
kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR);
return vcpu->arch.pdptrs[index];
}
#endif
#ifndef __KVM_SVM_H
#define __KVM_SVM_H
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/list.h>
#include <linux/kvm_host.h>
#include <asm/msr.h>
#include <asm/svm.h>
static const u32 host_save_user_msrs[] = {
#ifdef CONFIG_X86_64
MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
MSR_FS_BASE,
#endif
MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
};
#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
struct kvm_vcpu;
struct vcpu_svm {
struct kvm_vcpu vcpu;
struct vmcb *vmcb;
unsigned long vmcb_pa;
struct svm_cpu_data *svm_data;
uint64_t asid_generation;
u64 next_rip;
u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
u64 host_gs_base;
unsigned long host_cr2;
u32 *msrpm;
struct vmcb *hsave;
u64 hsave_msr;
u64 nested_vmcb;
/* These are the merged vectors */
u32 *nested_msrpm;
/* gpa pointers to the real vectors */
u64 nested_vmcb_msrpm;
};
#endif
......@@ -6,7 +6,7 @@ struct kvm_timer {
bool reinject;
struct kvm_timer_ops *t_ops;
struct kvm *kvm;
int vcpu_id;
struct kvm_vcpu *vcpu;
};
struct kvm_timer_ops {
......
This diff is collapsed.
......@@ -12,6 +12,7 @@ struct kvm_lapic {
struct kvm_timer lapic_timer;
u32 divide_count;
struct kvm_vcpu *vcpu;
bool irr_pending;
struct page *regs_page;
void *regs;
gpa_t vapic_addr;
......@@ -28,6 +29,7 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
void kvm_apic_set_version(struct kvm_vcpu *vcpu);
int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
......@@ -44,4 +46,6 @@ void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
#endif
This diff is collapsed.
......@@ -37,6 +37,8 @@
#define PT32_ROOT_LEVEL 2
#define PT32E_ROOT_LEVEL 3
int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
{
if (unlikely(vcpu->kvm->arch.n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
......@@ -75,7 +77,7 @@ static inline int is_paging(struct kvm_vcpu *vcpu)
return vcpu->arch.cr0 & X86_CR0_PG;
}
static inline int is_present_pte(unsigned long pte)
static inline int is_present_gpte(unsigned long pte)
{
return pte & PT_PRESENT_MASK;
}
......
#if !defined(_TRACE_KVMMMU_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_KVMMMU_H
#include <linux/tracepoint.h>
#include <linux/ftrace_event.h>
#undef TRACE_SYSTEM
#define TRACE_SYSTEM kvmmmu
#define TRACE_INCLUDE_PATH .
#define TRACE_INCLUDE_FILE mmutrace
#define KVM_MMU_PAGE_FIELDS \
__field(__u64, gfn) \
__field(__u32, role) \
__field(__u32, root_count) \
__field(__u32, unsync)
#define KVM_MMU_PAGE_ASSIGN(sp) \
__entry->gfn = sp->gfn; \
__entry->role = sp->role.word; \
__entry->root_count = sp->root_count; \
__entry->unsync = sp->unsync;
#define KVM_MMU_PAGE_PRINTK() ({ \
const char *ret = p->buffer + p->len; \
static const char *access_str[] = { \
"---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux" \
}; \
union kvm_mmu_page_role role; \
\
role.word = __entry->role; \
\
trace_seq_printf(p, "sp gfn %llx %u/%u q%u%s %s%s %spge" \
" %snxe root %u %s%c", \
__entry->gfn, role.level, role.glevels, \
role.quadrant, \
role.direct ? " direct" : "", \
access_str[role.access], \
role.invalid ? " invalid" : "", \
role.cr4_pge ? "" : "!", \
role.nxe ? "" : "!", \
__entry->root_count, \
__entry->unsync ? "unsync" : "sync", 0); \
ret; \
})
#define kvm_mmu_trace_pferr_flags \
{ PFERR_PRESENT_MASK, "P" }, \
{ PFERR_WRITE_MASK, "W" }, \
{ PFERR_USER_MASK, "U" }, \
{ PFERR_RSVD_MASK, "RSVD" }, \
{ PFERR_FETCH_MASK, "F" }
/*
* A pagetable walk has started
*/
TRACE_EVENT(
kvm_mmu_pagetable_walk,
TP_PROTO(u64 addr, int write_fault, int user_fault, int fetch_fault),
TP_ARGS(addr, write_fault, user_fault, fetch_fault),
TP_STRUCT__entry(
__field(__u64, addr)
__field(__u32, pferr)
),
TP_fast_assign(
__entry->addr = addr;
__entry->pferr = (!!write_fault << 1) | (!!user_fault << 2)
| (!!fetch_fault << 4);
),
TP_printk("addr %llx pferr %x %s", __entry->addr, __entry->pferr,
__print_flags(__entry->pferr, "|", kvm_mmu_trace_pferr_flags))
);
/* We just walked a paging element */
TRACE_EVENT(
kvm_mmu_paging_element,
TP_PROTO(u64 pte, int level),
TP_ARGS(pte, level),
TP_STRUCT__entry(
__field(__u64, pte)
__field(__u32, level)
),
TP_fast_assign(
__entry->pte = pte;
__entry->level = level;
),
TP_printk("pte %llx level %u", __entry->pte, __entry->level)
);
/* We set a pte accessed bit */
TRACE_EVENT(
kvm_mmu_set_accessed_bit,
TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size),
TP_ARGS(table_gfn, index, size),
TP_STRUCT__entry(
__field(__u64, gpa)
),
TP_fast_assign(
__entry->gpa = ((u64)table_gfn << PAGE_SHIFT)
+ index * size;
),
TP_printk("gpa %llx", __entry->gpa)
);
/* We set a pte dirty bit */
TRACE_EVENT(
kvm_mmu_set_dirty_bit,
TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size),
TP_ARGS(table_gfn, index, size),
TP_STRUCT__entry(
__field(__u64, gpa)
),
TP_fast_assign(
__entry->gpa = ((u64)table_gfn << PAGE_SHIFT)
+ index * size;
),
TP_printk("gpa %llx", __entry->gpa)
);
TRACE_EVENT(
kvm_mmu_walker_error,
TP_PROTO(u32 pferr),
TP_ARGS(pferr),
TP_STRUCT__entry(
__field(__u32, pferr)
),
TP_fast_assign(
__entry->pferr = pferr;
),
TP_printk("pferr %x %s", __entry->pferr,
__print_flags(__entry->pferr, "|", kvm_mmu_trace_pferr_flags))
);
TRACE_EVENT(
kvm_mmu_get_page,
TP_PROTO(struct kvm_mmu_page *sp, bool created),
TP_ARGS(sp, created),
TP_STRUCT__entry(
KVM_MMU_PAGE_FIELDS
__field(bool, created)
),
TP_fast_assign(
KVM_MMU_PAGE_ASSIGN(sp)
__entry->created = created;
),
TP_printk("%s %s", KVM_MMU_PAGE_PRINTK(),
__entry->created ? "new" : "existing")
);
TRACE_EVENT(
kvm_mmu_sync_page,
TP_PROTO(struct kvm_mmu_page *sp),
TP_ARGS(sp),
TP_STRUCT__entry(
KVM_MMU_PAGE_FIELDS
),
TP_fast_assign(
KVM_MMU_PAGE_ASSIGN(sp)
),
TP_printk("%s", KVM_MMU_PAGE_PRINTK())
);
TRACE_EVENT(
kvm_mmu_unsync_page,
TP_PROTO(struct kvm_mmu_page *sp),
TP_ARGS(sp),
TP_STRUCT__entry(
KVM_MMU_PAGE_FIELDS
),
TP_fast_assign(
KVM_MMU_PAGE_ASSIGN(sp)
),
TP_printk("%s", KVM_MMU_PAGE_PRINTK())
);
TRACE_EVENT(
kvm_mmu_zap_page,
TP_PROTO(struct kvm_mmu_page *sp),
TP_ARGS(sp),
TP_STRUCT__entry(
KVM_MMU_PAGE_FIELDS
),
TP_fast_assign(
KVM_MMU_PAGE_ASSIGN(sp)
),
TP_printk("%s", KVM_MMU_PAGE_PRINTK())
);
#endif /* _TRACE_KVMMMU_H */
/* This part must be outside protection */
#include <trace/define_trace.h>
This diff is collapsed.
This diff is collapsed.
......@@ -9,12 +9,16 @@ static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer)
int restart_timer = 0;
wait_queue_head_t *q = &vcpu->wq;
/*
* There is a race window between reading and incrementing, but we do
* not care about potentially loosing timer events in the !reinject
* case anyway.
*/
if (ktimer->reinject || !atomic_read(&ktimer->pending)) {
atomic_inc(&ktimer->pending);
/* FIXME: this code should not know anything about vcpus */
if (!atomic_inc_and_test(&ktimer->pending))
set_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
if (!ktimer->reinject)
atomic_set(&ktimer->pending, 1);
}
if (waitqueue_active(q))
wake_up_interruptible(q);
......@@ -33,7 +37,7 @@ enum hrtimer_restart kvm_timer_fn(struct hrtimer *data)
struct kvm_vcpu *vcpu;
struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
vcpu = ktimer->kvm->vcpus[ktimer->vcpu_id];
vcpu = ktimer->vcpu;
if (!vcpu)
return HRTIMER_NORESTART;
......
#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_KVM_H
#include <linux/tracepoint.h>
#undef TRACE_SYSTEM
#define TRACE_SYSTEM kvm
#define TRACE_INCLUDE_PATH arch/x86/kvm
#define TRACE_INCLUDE_FILE trace
/*
* Tracepoint for guest mode entry.
*/
TRACE_EVENT(kvm_entry,
TP_PROTO(unsigned int vcpu_id),
TP_ARGS(vcpu_id),
TP_STRUCT__entry(
__field( unsigned int, vcpu_id )
),
TP_fast_assign(
__entry->vcpu_id = vcpu_id;
),
TP_printk("vcpu %u", __entry->vcpu_id)
);
/*
* Tracepoint for hypercall.
*/
TRACE_EVENT(kvm_hypercall,
TP_PROTO(unsigned long nr, unsigned long a0, unsigned long a1,
unsigned long a2, unsigned long a3),
TP_ARGS(nr, a0, a1, a2, a3),
TP_STRUCT__entry(
__field( unsigned long, nr )
__field( unsigned long, a0 )
__field( unsigned long, a1 )
__field( unsigned long, a2 )
__field( unsigned long, a3 )
),
TP_fast_assign(
__entry->nr = nr;
__entry->a0 = a0;
__entry->a1 = a1;
__entry->a2 = a2;
__entry->a3 = a3;
),
TP_printk("nr 0x%lx a0 0x%lx a1 0x%lx a2 0x%lx a3 0x%lx",
__entry->nr, __entry->a0, __entry->a1, __entry->a2,
__entry->a3)
);
/*
* Tracepoint for PIO.
*/
TRACE_EVENT(kvm_pio,
TP_PROTO(unsigned int rw, unsigned int port, unsigned int size,
unsigned int count),
TP_ARGS(rw, port, size, count),
TP_STRUCT__entry(
__field( unsigned int, rw )
__field( unsigned int, port )
__field( unsigned int, size )
__field( unsigned int, count )
),
TP_fast_assign(
__entry->rw = rw;
__entry->port = port;
__entry->size = size;
__entry->count = count;
),
TP_printk("pio_%s at 0x%x size %d count %d",
__entry->rw ? "write" : "read",
__entry->port, __entry->size, __entry->count)
);
/*
* Tracepoint for cpuid.
*/
TRACE_EVENT(kvm_cpuid,
TP_PROTO(unsigned int function, unsigned long rax, unsigned long rbx,
unsigned long rcx, unsigned long rdx),
TP_ARGS(function, rax, rbx, rcx, rdx),
TP_STRUCT__entry(
__field( unsigned int, function )
__field( unsigned long, rax )
__field( unsigned long, rbx )
__field( unsigned long, rcx )
__field( unsigned long, rdx )
),
TP_fast_assign(
__entry->function = function;
__entry->rax = rax;
__entry->rbx = rbx;
__entry->rcx = rcx;
__entry->rdx = rdx;
),
TP_printk("func %x rax %lx rbx %lx rcx %lx rdx %lx",
__entry->function, __entry->rax,
__entry->rbx, __entry->rcx, __entry->rdx)
);
#define AREG(x) { APIC_##x, "APIC_" #x }
#define kvm_trace_symbol_apic \
AREG(ID), AREG(LVR), AREG(TASKPRI), AREG(ARBPRI), AREG(PROCPRI), \
AREG(EOI), AREG(RRR), AREG(LDR), AREG(DFR), AREG(SPIV), AREG(ISR), \
AREG(TMR), AREG(IRR), AREG(ESR), AREG(ICR), AREG(ICR2), AREG(LVTT), \
AREG(LVTTHMR), AREG(LVTPC), AREG(LVT0), AREG(LVT1), AREG(LVTERR), \
AREG(TMICT), AREG(TMCCT), AREG(TDCR), AREG(SELF_IPI), AREG(EFEAT), \
AREG(ECTRL)
/*
* Tracepoint for apic access.
*/
TRACE_EVENT(kvm_apic,
TP_PROTO(unsigned int rw, unsigned int reg, unsigned int val),
TP_ARGS(rw, reg, val),
TP_STRUCT__entry(
__field( unsigned int, rw )
__field( unsigned int, reg )
__field( unsigned int, val )
),
TP_fast_assign(
__entry->rw = rw;
__entry->reg = reg;
__entry->val = val;
),
TP_printk("apic_%s %s = 0x%x",
__entry->rw ? "write" : "read",
__print_symbolic(__entry->reg, kvm_trace_symbol_apic),
__entry->val)
);
#define trace_kvm_apic_read(reg, val) trace_kvm_apic(0, reg, val)
#define trace_kvm_apic_write(reg, val) trace_kvm_apic(1, reg, val)
/*
* Tracepoint for kvm guest exit:
*/
TRACE_EVENT(kvm_exit,
TP_PROTO(unsigned int exit_reason, unsigned long guest_rip),
TP_ARGS(exit_reason, guest_rip),
TP_STRUCT__entry(
__field( unsigned int, exit_reason )
__field( unsigned long, guest_rip )
),
TP_fast_assign(
__entry->exit_reason = exit_reason;
__entry->guest_rip = guest_rip;
),
TP_printk("reason %s rip 0x%lx",
ftrace_print_symbols_seq(p, __entry->exit_reason,
kvm_x86_ops->exit_reasons_str),
__entry->guest_rip)
);
/*
* Tracepoint for kvm interrupt injection:
*/
TRACE_EVENT(kvm_inj_virq,
TP_PROTO(unsigned int irq),
TP_ARGS(irq),
TP_STRUCT__entry(
__field( unsigned int, irq )
),
TP_fast_assign(
__entry->irq = irq;
),
TP_printk("irq %u", __entry->irq)
);
/*
* Tracepoint for page fault.
*/
TRACE_EVENT(kvm_page_fault,
TP_PROTO(unsigned long fault_address, unsigned int error_code),
TP_ARGS(fault_address, error_code),
TP_STRUCT__entry(
__field( unsigned long, fault_address )
__field( unsigned int, error_code )
),
TP_fast_assign(
__entry->fault_address = fault_address;
__entry->error_code = error_code;
),
TP_printk("address %lx error_code %x",
__entry->fault_address, __entry->error_code)
);
/*
* Tracepoint for guest MSR access.
*/
TRACE_EVENT(kvm_msr,
TP_PROTO(unsigned int rw, unsigned int ecx, unsigned long data),
TP_ARGS(rw, ecx, data),
TP_STRUCT__entry(
__field( unsigned int, rw )
__field( unsigned int, ecx )
__field( unsigned long, data )
),
TP_fast_assign(
__entry->rw = rw;
__entry->ecx = ecx;
__entry->data = data;
),
TP_printk("msr_%s %x = 0x%lx",
__entry->rw ? "write" : "read",
__entry->ecx, __entry->data)
);
#define trace_kvm_msr_read(ecx, data) trace_kvm_msr(0, ecx, data)
#define trace_kvm_msr_write(ecx, data) trace_kvm_msr(1, ecx, data)
/*
* Tracepoint for guest CR access.
*/
TRACE_EVENT(kvm_cr,
TP_PROTO(unsigned int rw, unsigned int cr, unsigned long val),
TP_ARGS(rw, cr, val),
TP_STRUCT__entry(
__field( unsigned int, rw )
__field( unsigned int, cr )
__field( unsigned long, val )
),
TP_fast_assign(
__entry->rw = rw;
__entry->cr = cr;
__entry->val = val;
),
TP_printk("cr_%s %x = 0x%lx",
__entry->rw ? "write" : "read",
__entry->cr, __entry->val)
);
#define trace_kvm_cr_read(cr, val) trace_kvm_cr(0, cr, val)
#define trace_kvm_cr_write(cr, val) trace_kvm_cr(1, cr, val)
TRACE_EVENT(kvm_pic_set_irq,
TP_PROTO(__u8 chip, __u8 pin, __u8 elcr, __u8 imr, bool coalesced),
TP_ARGS(chip, pin, elcr, imr, coalesced),
TP_STRUCT__entry(
__field( __u8, chip )
__field( __u8, pin )
__field( __u8, elcr )
__field( __u8, imr )
__field( bool, coalesced )
),
TP_fast_assign(
__entry->chip = chip;
__entry->pin = pin;
__entry->elcr = elcr;
__entry->imr = imr;
__entry->coalesced = coalesced;
),
TP_printk("chip %u pin %u (%s%s)%s",
__entry->chip, __entry->pin,
(__entry->elcr & (1 << __entry->pin)) ? "level":"edge",
(__entry->imr & (1 << __entry->pin)) ? "|masked":"",
__entry->coalesced ? " (coalesced)" : "")
);
#define kvm_apic_dst_shorthand \
{0x0, "dst"}, \
{0x1, "self"}, \
{0x2, "all"}, \
{0x3, "all-but-self"}
TRACE_EVENT(kvm_apic_ipi,
TP_PROTO(__u32 icr_low, __u32 dest_id),
TP_ARGS(icr_low, dest_id),
TP_STRUCT__entry(
__field( __u32, icr_low )
__field( __u32, dest_id )
),
TP_fast_assign(
__entry->icr_low = icr_low;
__entry->dest_id = dest_id;
),
TP_printk("dst %x vec %u (%s|%s|%s|%s|%s)",
__entry->dest_id, (u8)__entry->icr_low,
__print_symbolic((__entry->icr_low >> 8 & 0x7),
kvm_deliver_mode),
(__entry->icr_low & (1<<11)) ? "logical" : "physical",
(__entry->icr_low & (1<<14)) ? "assert" : "de-assert",
(__entry->icr_low & (1<<15)) ? "level" : "edge",
__print_symbolic((__entry->icr_low >> 18 & 0x3),
kvm_apic_dst_shorthand))
);
TRACE_EVENT(kvm_apic_accept_irq,
TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec, bool coalesced),
TP_ARGS(apicid, dm, tm, vec, coalesced),
TP_STRUCT__entry(
__field( __u32, apicid )
__field( __u16, dm )
__field( __u8, tm )
__field( __u8, vec )
__field( bool, coalesced )
),
TP_fast_assign(
__entry->apicid = apicid;
__entry->dm = dm;
__entry->tm = tm;
__entry->vec = vec;
__entry->coalesced = coalesced;
),
TP_printk("apicid %x vec %u (%s|%s)%s",
__entry->apicid, __entry->vec,
__print_symbolic((__entry->dm >> 8 & 0x7), kvm_deliver_mode),
__entry->tm ? "level" : "edge",
__entry->coalesced ? " (coalesced)" : "")
);
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
#include <trace/define_trace.h>
This diff is collapsed.
This diff is collapsed.
......@@ -31,4 +31,8 @@ static inline bool kvm_exception_is_soft(unsigned int nr)
{
return (nr == BP_VECTOR) || (nr == OF_VECTOR);
}
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
u32 function, u32 index);
#endif
......@@ -104,6 +104,7 @@ EXPORT_SYMBOL(kunmap);
EXPORT_SYMBOL(kmap_atomic);
EXPORT_SYMBOL(kunmap_atomic);
EXPORT_SYMBOL(kmap_atomic_prot);
EXPORT_SYMBOL(kmap_atomic_to_page);
void __init set_highmem_pages_init(void)
{
......
......@@ -3,6 +3,11 @@ ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/kvm.h \
header-y += kvm.h
endif
ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/kvm_para.h \
$(srctree)/include/asm-$(SRCARCH)/kvm_para.h),)
header-y += kvm_para.h
endif
ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/a.out.h \
$(srctree)/include/asm-$(SRCARCH)/a.out.h),)
unifdef-y += a.out.h
......
......@@ -268,6 +268,10 @@ ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/kvm.h \
$(srctree)/include/asm-$(SRCARCH)/kvm.h),)
unifdef-y += kvm.h
endif
ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/kvm_para.h \
$(srctree)/include/asm-$(SRCARCH)/kvm_para.h),)
unifdef-y += kvm_para.h
endif
unifdef-y += llc.h
unifdef-y += loop.h
unifdef-y += lp.h
......
This diff is collapsed.
This diff is collapsed.
......@@ -13,6 +13,7 @@
#define KVM_ENOSYS 1000
#define KVM_EFAULT EFAULT
#define KVM_E2BIG E2BIG
#define KVM_EPERM EPERM
#define KVM_HC_VAPIC_POLL_IRQ 1
#define KVM_HC_MMU_OP 2
......
This diff is collapsed.
......@@ -234,6 +234,7 @@ unsigned long vma_kernel_pagesize(struct vm_area_struct *vma)
return 1UL << (hstate->order + PAGE_SHIFT);
}
EXPORT_SYMBOL_GPL(vma_kernel_pagesize);
/*
* Return the page size being used by the MMU to back a VMA. In the majority
......
# KVM common configuration items and defaults
config HAVE_KVM
bool
config HAVE_KVM_IRQCHIP
bool
config HAVE_KVM_EVENTFD
bool
select EVENTFD
config KVM_APIC_ARCHITECTURE
bool
This diff is collapsed.
......@@ -12,6 +12,7 @@
struct kvm_coalesced_mmio_dev {
struct kvm_io_device dev;
struct kvm *kvm;
spinlock_t lock;
int nb_zones;
struct kvm_coalesced_mmio_zone zone[KVM_COALESCED_MMIO_ZONE_MAX];
};
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment