Commit 1bc87b00 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'kvm-updates/3.2' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm

* 'kvm-updates/3.2' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: (75 commits)
  KVM: SVM: Keep intercepting task switching with NPT enabled
  KVM: s390: implement sigp external call
  KVM: s390: fix register setting
  KVM: s390: fix return value of kvm_arch_init_vm
  KVM: s390: check cpu_id prior to using it
  KVM: emulate lapic tsc deadline timer for guest
  x86: TSC deadline definitions
  KVM: Fix simultaneous NMIs
  KVM: x86 emulator: convert push %sreg/pop %sreg to direct decode
  KVM: x86 emulator: switch lds/les/lss/lfs/lgs to direct decode
  KVM: x86 emulator: streamline decode of segment registers
  KVM: x86 emulator: simplify OpMem64 decode
  KVM: x86 emulator: switch src decode to decode_operand()
  KVM: x86 emulator: qualify OpReg inhibit_byte_regs hack
  KVM: x86 emulator: switch OpImmUByte decode to decode_imm()
  KVM: x86 emulator: free up some flag bits near src, dst
  KVM: x86 emulator: switch src2 to generic decode_operand()
  KVM: x86 emulator: expand decode flags to 64 bits
  KVM: x86 emulator: split dst decode to a generic decode_operand()
  KVM: x86 emulator: move memop, memopp into emulation context
  ...
parents acff987d f1c1da2b
......@@ -1201,6 +1201,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
[KVM,Intel] Disable FlexPriority feature (TPR shadow).
Default is 1 (enabled)
kvm-intel.nested=
[KVM,Intel] Enable VMX nesting (nVMX).
Default is 0 (disabled)
kvm-intel.unrestricted_guest=
[KVM,Intel] Disable unrestricted guest feature
(virtualized real and unpaged mode) on capable
......
......@@ -175,10 +175,30 @@ Parameters: vcpu id (apic id on x86)
Returns: vcpu fd on success, -1 on error
This API adds a vcpu to a virtual machine. The vcpu id is a small integer
in the range [0, max_vcpus). You can use KVM_CAP_NR_VCPUS of the
KVM_CHECK_EXTENSION ioctl() to determine the value for max_vcpus at run-time.
in the range [0, max_vcpus).
The recommended max_vcpus value can be retrieved using the KVM_CAP_NR_VCPUS of
the KVM_CHECK_EXTENSION ioctl() at run-time.
The maximum possible value for max_vcpus can be retrieved using the
KVM_CAP_MAX_VCPUS of the KVM_CHECK_EXTENSION ioctl() at run-time.
If the KVM_CAP_NR_VCPUS does not exist, you should assume that max_vcpus is 4
cpus max.
If the KVM_CAP_MAX_VCPUS does not exist, you should assume that max_vcpus is
same as the value returned from KVM_CAP_NR_VCPUS.
On powerpc using book3s_hv mode, the vcpus are mapped onto virtual
threads in one or more virtual CPU cores. (This is because the
hardware requires all the hardware threads in a CPU core to be in the
same partition.) The KVM_CAP_PPC_SMT capability indicates the number
of vcpus per virtual core (vcore). The vcore id is obtained by
dividing the vcpu id by the number of vcpus per vcore. The vcpus in a
given vcore will always be in the same physical core as each other
(though that might be a different physical core from time to time).
Userspace can control the threading (SMT) mode of the guest by its
allocation of vcpu ids. For example, if userspace wants
single-threaded guest vcpus, it should make all vcpu ids be a multiple
of the number of vcpus per vcore.
On powerpc using book3s_hv mode, the vcpus are mapped onto virtual
threads in one or more virtual CPU cores. (This is because the
......@@ -1633,3 +1653,50 @@ developer registration required to access it).
char padding[256];
};
};
6. Capabilities that can be enabled
There are certain capabilities that change the behavior of the virtual CPU when
enabled. To enable them, please see section 4.37. Below you can find a list of
capabilities and what their effect on the vCPU is when enabling them.
The following information is provided along with the description:
Architectures: which instruction set architectures provide this ioctl.
x86 includes both i386 and x86_64.
Parameters: what parameters are accepted by the capability.
Returns: the return value. General error numbers (EBADF, ENOMEM, EINVAL)
are not detailed, but errors with specific meanings are.
6.1 KVM_CAP_PPC_OSI
Architectures: ppc
Parameters: none
Returns: 0 on success; -1 on error
This capability enables interception of OSI hypercalls that otherwise would
be treated as normal system calls to be injected into the guest. OSI hypercalls
were invented by Mac-on-Linux to have a standardized communication mechanism
between the guest and the host.
When this capability is enabled, KVM_EXIT_OSI can occur.
6.2 KVM_CAP_PPC_PAPR
Architectures: ppc
Parameters: none
Returns: 0 on success; -1 on error
This capability enables interception of PAPR hypercalls. PAPR hypercalls are
done using the hypercall instruction "sc 1".
It also sets the guest privilege level to "supervisor" mode. Usually the guest
runs in "hypervisor" privilege mode with a few missing features.
In addition to the above, it changes the semantics of SDR1. In this mode, the
HTAB address part of SDR1 contains an HVA instead of a GPA, as PAPR keeps the
HTAB invisible to the guest.
When this capability is enabled, KVM_EXIT_PAPR_HCALL can occur.
......@@ -148,6 +148,12 @@ struct kvm_regs {
#define KVM_SREGS_E_UPDATE_DEC (1 << 2)
#define KVM_SREGS_E_UPDATE_DBSR (1 << 3)
/*
* Book3S special bits to indicate contents in the struct by maintaining
* backwards compatibility with older structs. If adding a new field,
* please make sure to add a flag for that new field */
#define KVM_SREGS_S_HIOR (1 << 0)
/*
* In KVM_SET_SREGS, reserved/pad fields must be left untouched from a
* previous KVM_GET_REGS.
......@@ -173,6 +179,8 @@ struct kvm_sregs {
__u64 ibat[8];
__u64 dbat[8];
} ppc32;
__u64 flags; /* KVM_SREGS_S_ */
__u64 hior;
} s;
struct {
union {
......@@ -276,6 +284,11 @@ struct kvm_guest_debug_arch {
#define KVM_INTERRUPT_UNSET -2U
#define KVM_INTERRUPT_SET_LEVEL -3U
#define KVM_CPU_440 1
#define KVM_CPU_E500V2 2
#define KVM_CPU_3S_32 3
#define KVM_CPU_3S_64 4
/* for KVM_CAP_SPAPR_TCE */
struct kvm_create_spapr_tce {
__u64 liobn;
......
......@@ -90,6 +90,8 @@ struct kvmppc_vcpu_book3s {
#endif
int context_id[SID_CONTEXTS];
bool hior_sregs; /* HIOR is set by SREGS, not PVR */
struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE];
struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG];
struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE];
......@@ -139,15 +141,14 @@ extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu);
extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
extern void kvmppc_handler_lowmem_trampoline(void);
extern void kvmppc_handler_trampoline_enter(void);
extern void kvmppc_rmcall(ulong srr0, ulong srr1);
extern void kvmppc_entry_trampoline(void);
extern void kvmppc_hv_entry_trampoline(void);
extern void kvmppc_load_up_fpu(void);
extern void kvmppc_load_up_altivec(void);
extern void kvmppc_load_up_vsx(void);
extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst);
extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst);
extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd);
static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
{
......@@ -382,6 +383,39 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
}
#endif
static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
unsigned long pte_index)
{
unsigned long rb, va_low;
rb = (v & ~0x7fUL) << 16; /* AVA field */
va_low = pte_index >> 3;
if (v & HPTE_V_SECONDARY)
va_low = ~va_low;
/* xor vsid from AVA */
if (!(v & HPTE_V_1TB_SEG))
va_low ^= v >> 12;
else
va_low ^= v >> 24;
va_low &= 0x7ff;
if (v & HPTE_V_LARGE) {
rb |= 1; /* L field */
if (cpu_has_feature(CPU_FTR_ARCH_206) &&
(r & 0xff000)) {
/* non-16MB large page, must be 64k */
/* (masks depend on page size) */
rb |= 0x1000; /* page encoding in LP field */
rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */
rb |= (va_low & 0xfe); /* AVAL field (P7 doesn't seem to care) */
}
} else {
/* 4kB page */
rb |= (va_low & 0x7ff) << 12; /* remaining 11b of VA */
}
rb |= (v >> 54) & 0x300; /* B field */
return rb;
}
/* Magic register values loaded into r3 and r4 before the 'sc' assembly
* instruction for the OSI hypercalls */
#define OSI_SC_MAGIC_R3 0x113724FA
......
......@@ -75,6 +75,8 @@ struct kvmppc_host_state {
ulong scratch0;
ulong scratch1;
u8 in_guest;
u8 restore_hid5;
u8 napping;
#ifdef CONFIG_KVM_BOOK3S_64_HV
struct kvm_vcpu *kvm_vcpu;
......
......@@ -198,21 +198,29 @@ struct kvm_arch {
*/
struct kvmppc_vcore {
int n_runnable;
int n_blocked;
int n_busy;
int num_threads;
int entry_exit_count;
int n_woken;
int nap_count;
int napping_threads;
u16 pcpu;
u8 vcore_running;
u8 vcore_state;
u8 in_guest;
struct list_head runnable_threads;
spinlock_t lock;
wait_queue_head_t wq;
};
#define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff)
#define VCORE_EXIT_COUNT(vc) ((vc)->entry_exit_count >> 8)
/* Values for vcore_state */
#define VCORE_INACTIVE 0
#define VCORE_RUNNING 1
#define VCORE_EXITING 2
#define VCORE_SLEEPING 3
struct kvmppc_pte {
ulong eaddr;
u64 vpage;
......@@ -258,14 +266,6 @@ struct kvm_vcpu_arch {
ulong host_stack;
u32 host_pid;
#ifdef CONFIG_PPC_BOOK3S
ulong host_msr;
ulong host_r2;
void *host_retip;
ulong trampoline_lowmem;
ulong trampoline_enter;
ulong highmem_handler;
ulong rmcall;
ulong host_paca_phys;
struct kvmppc_slb slb[64];
int slb_max; /* 1 + index of last valid entry in slb[] */
int slb_nr; /* total number of entries in SLB */
......@@ -389,6 +389,9 @@ struct kvm_vcpu_arch {
u8 dcr_is_write;
u8 osi_needed;
u8 osi_enabled;
u8 papr_enabled;
u8 sane;
u8 cpu_type;
u8 hcall_needed;
u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
......@@ -408,11 +411,13 @@ struct kvm_vcpu_arch {
struct dtl *dtl;
struct dtl *dtl_end;
wait_queue_head_t *wqp;
struct kvmppc_vcore *vcore;
int ret;
int trap;
int state;
int ptid;
bool timer_running;
wait_queue_head_t cpu_run;
struct kvm_vcpu_arch_shared *shared;
......@@ -428,8 +433,9 @@ struct kvm_vcpu_arch {
#endif
};
#define KVMPPC_VCPU_BUSY_IN_HOST 0
#define KVMPPC_VCPU_BLOCKED 1
/* Values for vcpu->arch.state */
#define KVMPPC_VCPU_STOPPED 0
#define KVMPPC_VCPU_BUSY_IN_HOST 1
#define KVMPPC_VCPU_RUNNABLE 2
#endif /* __POWERPC_KVM_HOST_H__ */
......@@ -66,6 +66,7 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run,
extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
/* Core-specific hooks */
......
......@@ -44,6 +44,7 @@
#include <asm/compat.h>
#include <asm/mmu.h>
#include <asm/hvcall.h>
#include <asm/xics.h>
#endif
#ifdef CONFIG_PPC_ISERIES
#include <asm/iseries/alpaca.h>
......@@ -449,8 +450,6 @@ int main(void)
#ifdef CONFIG_PPC_BOOK3S
DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
DEFINE(VCPU_HOST_RETIP, offsetof(struct kvm_vcpu, arch.host_retip));
DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr));
DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr));
DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr));
DEFINE(VCPU_DSCR, offsetof(struct kvm_vcpu, arch.dscr));
......@@ -458,14 +457,12 @@ int main(void)
DEFINE(VCPU_UAMOR, offsetof(struct kvm_vcpu, arch.uamor));
DEFINE(VCPU_CTRL, offsetof(struct kvm_vcpu, arch.ctrl));
DEFINE(VCPU_DABR, offsetof(struct kvm_vcpu, arch.dabr));
DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem));
DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter));
DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler));
DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall));
DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags));
DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec));
DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires));
DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions));
DEFINE(VCPU_CEDED, offsetof(struct kvm_vcpu, arch.ceded));
DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded));
DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa));
DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr));
DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc));
......@@ -481,6 +478,7 @@ int main(void)
DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count));
DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count));
DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) -
offsetof(struct kvmppc_vcpu_book3s, vcpu));
DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige));
......@@ -537,6 +535,8 @@ int main(void)
HSTATE_FIELD(HSTATE_SCRATCH0, scratch0);
HSTATE_FIELD(HSTATE_SCRATCH1, scratch1);
HSTATE_FIELD(HSTATE_IN_GUEST, in_guest);
HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5);
HSTATE_FIELD(HSTATE_NAPPING, napping);
#ifdef CONFIG_KVM_BOOK3S_64_HV
HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
......@@ -549,6 +549,7 @@ int main(void)
HSTATE_FIELD(HSTATE_DSCR, host_dscr);
HSTATE_FIELD(HSTATE_DABR, dabr);
HSTATE_FIELD(HSTATE_DECEXP, dec_expires);
DEFINE(IPI_PRIORITY, IPI_PRIORITY);
#endif /* CONFIG_KVM_BOOK3S_64_HV */
#else /* CONFIG_PPC_BOOK3S */
......
......@@ -427,16 +427,6 @@ slb_miss_user_pseries:
b . /* prevent spec. execution */
#endif /* __DISABLED__ */
/* KVM's trampoline code needs to be close to the interrupt handlers */
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
#ifdef CONFIG_KVM_BOOK3S_PR
#include "../kvm/book3s_rmhandlers.S"
#else
#include "../kvm/book3s_hv_rmhandlers.S"
#endif
#endif
.align 7
.globl __end_interrupts
__end_interrupts:
......
......@@ -78,6 +78,8 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++)
vcpu_44x->shadow_refs[i].gtlb_index = -1;
vcpu->arch.cpu_type = KVM_CPU_440;
return 0;
}
......
......@@ -43,18 +43,22 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
fpu.o \
book3s_paired_singles.o \
book3s_pr.o \
book3s_pr_papr.o \
book3s_emulate.o \
book3s_interrupts.o \
book3s_mmu_hpte.o \
book3s_64_mmu_host.o \
book3s_64_mmu.o \
book3s_32_mmu.o
kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
book3s_rmhandlers.o
kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
book3s_hv.o \
book3s_hv_interrupts.o \
book3s_64_mmu_hv.o
kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
book3s_hv_rmhandlers.o \
book3s_hv_rm_mmu.o \
book3s_64_vio_hv.o \
book3s_hv_builtin.o
......
......@@ -31,7 +31,7 @@
* R1 = host R1
* R2 = host R2
* R3 = shadow vcpu
* all other volatile GPRS = free
* all other volatile GPRS = free except R4, R6
* SVCPU[CR] = guest CR
* SVCPU[XER] = guest XER
* SVCPU[CTR] = guest CTR
......
......@@ -128,7 +128,13 @@ static hva_t kvmppc_mmu_book3s_64_get_pteg(
dprintk("MMU: page=0x%x sdr1=0x%llx pteg=0x%llx vsid=0x%llx\n",
page, vcpu_book3s->sdr1, pteg, slbe->vsid);
r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT);
/* When running a PAPR guest, SDR1 contains a HVA address instead
of a GPA */
if (vcpu_book3s->vcpu.arch.papr_enabled)
r = pteg;
else
r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT);
if (kvm_is_error_hva(r))
return r;
return r | (pteg & ~PAGE_MASK);
......
......@@ -53,7 +53,7 @@ slb_exit_skip_ ## num:
* R1 = host R1
* R2 = host R2
* R3 = shadow vcpu
* all other volatile GPRS = free
* all other volatile GPRS = free except R4, R6
* SVCPU[CR] = guest CR
* SVCPU[XER] = guest XER
* SVCPU[CTR] = guest CTR
......
......@@ -63,6 +63,25 @@
* function pointers, so let's just disable the define. */
#undef mfsrin
enum priv_level {
PRIV_PROBLEM = 0,
PRIV_SUPER = 1,
PRIV_HYPER = 2,
};
static bool spr_allowed(struct kvm_vcpu *vcpu, enum priv_level level)
{
/* PAPR VMs only access supervisor SPRs */
if (vcpu->arch.papr_enabled && (level > PRIV_SUPER))
return false;
/* Limit user space to its own small SPR set */
if ((vcpu->arch.shared->msr & MSR_PR) && level > PRIV_PROBLEM)
return false;
return true;
}
int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int inst, int *advance)
{
......@@ -296,6 +315,8 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
switch (sprn) {
case SPRN_SDR1:
if (!spr_allowed(vcpu, PRIV_HYPER))
goto unprivileged;
to_book3s(vcpu)->sdr1 = spr_val;
break;
case SPRN_DSISR:
......@@ -390,6 +411,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
case SPRN_PMC4_GEKKO:
case SPRN_WPAR_GEKKO:
break;
unprivileged:
default:
printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn);
#ifndef DEBUG_SPR
......@@ -421,6 +443,8 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
break;
}
case SPRN_SDR1:
if (!spr_allowed(vcpu, PRIV_HYPER))
goto unprivileged;
kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1);
break;
case SPRN_DSISR:
......@@ -449,6 +473,10 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
case SPRN_HID5:
kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]);
break;
case SPRN_CFAR:
case SPRN_PURR:
kvmppc_set_gpr(vcpu, rt, 0);
break;
case SPRN_GQR0:
case SPRN_GQR1:
case SPRN_GQR2:
......@@ -476,6 +504,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
kvmppc_set_gpr(vcpu, rt, 0);
break;
default:
unprivileged:
printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn);
#ifndef DEBUG_SPR
emulated = EMULATE_FAIL;
......
......@@ -23,9 +23,7 @@
#ifdef CONFIG_KVM_BOOK3S_64_HV
EXPORT_SYMBOL_GPL(kvmppc_hv_entry_trampoline);
#else
EXPORT_SYMBOL_GPL(kvmppc_handler_trampoline_enter);
EXPORT_SYMBOL_GPL(kvmppc_handler_lowmem_trampoline);
EXPORT_SYMBOL_GPL(kvmppc_rmcall);
EXPORT_SYMBOL_GPL(kvmppc_entry_trampoline);
EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu);
#ifdef CONFIG_ALTIVEC
EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec);
......
This diff is collapsed.
......@@ -110,39 +110,6 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
return H_SUCCESS;
}
static unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
unsigned long pte_index)
{
unsigned long rb, va_low;
rb = (v & ~0x7fUL) << 16; /* AVA field */
va_low = pte_index >> 3;
if (v & HPTE_V_SECONDARY)
va_low = ~va_low;
/* xor vsid from AVA */
if (!(v & HPTE_V_1TB_SEG))
va_low ^= v >> 12;
else
va_low ^= v >> 24;
va_low &= 0x7ff;
if (v & HPTE_V_LARGE) {
rb |= 1; /* L field */
if (cpu_has_feature(CPU_FTR_ARCH_206) &&
(r & 0xff000)) {
/* non-16MB large page, must be 64k */
/* (masks depend on page size) */
rb |= 0x1000; /* page encoding in LP field */
rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */
rb |= (va_low & 0xfe); /* AVAL field (P7 doesn't seem to care) */
}
} else {
/* 4kB page */
rb |= (va_low & 0x7ff) << 12; /* remaining 11b of VA */
}
rb |= (v >> 54) & 0x300; /* B field */
return rb;
}
#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
static inline int try_lock_tlbie(unsigned int *lock)
......
This diff is collapsed.
......@@ -29,27 +29,11 @@
#define ULONG_SIZE 8
#define FUNC(name) GLUE(.,name)
#define GET_SHADOW_VCPU_R13
#define DISABLE_INTERRUPTS \
mfmsr r0; \
rldicl r0,r0,48,1; \
rotldi r0,r0,16; \
mtmsrd r0,1; \
#elif defined(CONFIG_PPC_BOOK3S_32)
#define ULONG_SIZE 4
#define FUNC(name) name
#define GET_SHADOW_VCPU_R13 \
lwz r13, (THREAD + THREAD_KVM_SVCPU)(r2)
#define DISABLE_INTERRUPTS \
mfmsr r0; \
rlwinm r0,r0,0,17,15; \
mtmsr r0; \
#endif /* CONFIG_PPC_BOOK3S_XX */
......@@ -108,44 +92,17 @@ kvm_start_entry:
kvm_start_lightweight:
GET_SHADOW_VCPU_R13
PPC_LL r3, VCPU_HIGHMEM_HANDLER(r4)
PPC_STL r3, HSTATE_VMHANDLER(r13)
PPC_LL r10, VCPU_SHADOW_MSR(r4) /* r10 = vcpu->arch.shadow_msr */
DISABLE_INTERRUPTS
#ifdef CONFIG_PPC_BOOK3S_64
/* Some guests may need to have dcbz set to 32 byte length.
*
* Usually we ensure that by patching the guest's instructions
* to trap on dcbz and emulate it in the hypervisor.
*
* If we can, we should tell the CPU to use 32 byte dcbz though,
* because that's a lot faster.
*/
PPC_LL r3, VCPU_HFLAGS(r4)
rldicl. r3, r3, 0, 63 /* CR = ((r3 & 1) == 0) */
beq no_dcbz32_on
mfspr r3,SPRN_HID5
ori r3, r3, 0x80 /* XXX HID5_dcbz32 = 0x80 */
mtspr SPRN_HID5,r3
no_dcbz32_on:
rldicl r3, r3, 0, 63 /* r3 &= 1 */
stb r3, HSTATE_RESTORE_HID5(r13)
#endif /* CONFIG_PPC_BOOK3S_64 */
PPC_LL r6, VCPU_RMCALL(r4)
mtctr r6
PPC_LL r3, VCPU_TRAMPOLINE_ENTER(r4)
LOAD_REG_IMMEDIATE(r4, MSR_KERNEL & ~(MSR_IR | MSR_DR))
PPC_LL r4, VCPU_SHADOW_MSR(r4) /* get shadow_msr */
/* Jump to segment patching handler and into our guest */
bctr
bl FUNC(kvmppc_entry_trampoline)
nop
/*
* This is the handler in module memory. It gets jumped at from the
......@@ -170,21 +127,6 @@ kvmppc_handler_highmem:
/* R7 = vcpu */
PPC_LL r7, GPR4(r1)
#ifdef CONFIG_PPC_BOOK3S_64
PPC_LL r5, VCPU_HFLAGS(r7)
rldicl. r5, r5, 0, 63 /* CR = ((r5 & 1) == 0) */
beq no_dcbz32_off
li r4, 0
mfspr r5,SPRN_HID5
rldimi r5,r4,6,56
mtspr SPRN_HID5,r5
no_dcbz32_off:
#endif /* CONFIG_PPC_BOOK3S_64 */
PPC_STL r14, VCPU_GPR(r14)(r7)
PPC_STL r15, VCPU_GPR(r15)(r7)
PPC_STL r16, VCPU_GPR(r16)(r7)
......@@ -204,67 +146,6 @@ no_dcbz32_off:
PPC_STL r30, VCPU_GPR(r30)(r7)
PPC_STL r31, VCPU_GPR(r31)(r7)
/* Restore host msr -> SRR1 */
PPC_LL r6, VCPU_HOST_MSR(r7)
/*
* For some interrupts, we need to call the real Linux
* handler, so it can do work for us. This has to happen
* as if the interrupt arrived from the kernel though,
* so let's fake it here where most state is restored.
*
* Call Linux for hardware interrupts/decrementer
* r3 = address of interrupt handler (exit reason)
*/
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
beq call_linux_handler
cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER
beq call_linux_handler
cmpwi r12, BOOK3S_INTERRUPT_PERFMON
beq call_linux_handler
/* Back to EE=1 */
mtmsr r6
sync
b kvm_return_point
call_linux_handler:
/*
* If we land here we need to jump back to the handler we
* came from.
*
* We have a page that we can access from real mode, so let's
* jump back to that and use it as a trampoline to get back into the
* interrupt handler!
*
* R3 still contains the exit code,
* R5 VCPU_HOST_RETIP and
* R6 VCPU_HOST_MSR
*/
/* Restore host IP -> SRR0 */
PPC_LL r5, VCPU_HOST_RETIP(r7)
/* XXX Better move to a safe function?
* What if we get an HTAB flush in between mtsrr0 and mtsrr1? */
mtlr r12
PPC_LL r4, VCPU_TRAMPOLINE_LOWMEM(r7)
mtsrr0 r4
LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR))
mtsrr1 r3
RFI
.global kvm_return_point
kvm_return_point:
/* Jump back to lightweight entry if we're supposed to */
/* go back into the guest */
/* Pass the exit number as 3rd argument to kvmppc_handle_exit */
mr r5, r12
......
......@@ -150,16 +150,22 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
#ifdef CONFIG_PPC_BOOK3S_64
if ((pvr >= 0x330000) && (pvr < 0x70330000)) {
kvmppc_mmu_book3s_64_init(vcpu);
to_book3s(vcpu)->hior = 0xfff00000;
if (!to_book3s(vcpu)->hior_sregs)
to_book3s(vcpu)->hior = 0xfff00000;
to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL;
vcpu->arch.cpu_type = KVM_CPU_3S_64;
} else
#endif
{
kvmppc_mmu_book3s_32_init(vcpu);
to_book3s(vcpu)->hior = 0;
if (!to_book3s(vcpu)->hior_sregs)
to_book3s(vcpu)->hior = 0;
to_book3s(vcpu)->msr_mask = 0xffffffffULL;
vcpu->arch.cpu_type = KVM_CPU_3S_32;
}
kvmppc_sanity_check(vcpu);
/* If we are in hypervisor level on 970, we can tell the CPU to
* treat DCBZ as 32 bytes store */
vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32;
......@@ -646,7 +652,27 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
}
case BOOK3S_INTERRUPT_SYSCALL:
if (vcpu->arch.osi_enabled &&
if (vcpu->arch.papr_enabled &&
(kvmppc_get_last_inst(vcpu) == 0x44000022) &&
!(vcpu->arch.shared->msr & MSR_PR)) {
/* SC 1 papr hypercalls */
ulong cmd = kvmppc_get_gpr(vcpu, 3);
int i;
if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) {
r = RESUME_GUEST;
break;
}
run->papr_hcall.nr = cmd;
for (i = 0; i < 9; ++i) {
ulong gpr = kvmppc_get_gpr(vcpu, 4 + i);
run->papr_hcall.args[i] = gpr;
}
run->exit_reason = KVM_EXIT_PAPR_HCALL;
vcpu->arch.hcall_needed = 1;
r = RESUME_HOST;
} else if (vcpu->arch.osi_enabled &&
(((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) &&
(((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) {
/* MOL hypercalls */
......@@ -770,6 +796,9 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
}
}
if (sregs->u.s.flags & KVM_SREGS_S_HIOR)
sregs->u.s.hior = to_book3s(vcpu)->hior;
return 0;
}
......@@ -806,6 +835,11 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
/* Flush the MMU after messing with the segments */
kvmppc_mmu_pte_flush(vcpu, 0, 0);
if (sregs->u.s.flags & KVM_SREGS_S_HIOR) {
to_book3s(vcpu)->hior_sregs = true;
to_book3s(vcpu)->hior = sregs->u.s.hior;
}
return 0;
}
......@@ -841,8 +875,6 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
if (!p)
goto uninit_vcpu;
vcpu->arch.host_retip = kvm_return_point;
vcpu->arch.host_msr = mfmsr();
#ifdef CONFIG_PPC_BOOK3S_64
/* default to book3s_64 (970fx) */
vcpu->arch.pvr = 0x3C0301;
......@@ -853,16 +885,6 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
vcpu->arch.slb_nr = 64;
/* remember where some real-mode handlers are */
vcpu->arch.trampoline_lowmem = __pa(kvmppc_handler_lowmem_trampoline);
vcpu->arch.trampoline_enter = __pa(kvmppc_handler_trampoline_enter);
vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem;
#ifdef CONFIG_PPC_BOOK3S_64
vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall;
#else
vcpu->arch.rmcall = (ulong)kvmppc_rmcall;
#endif
vcpu->arch.shadow_msr = MSR_USER64;
err = kvmppc_mmu_init(vcpu);
......@@ -908,6 +930,12 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
#endif
ulong ext_msr;
/* Check if we can run the vcpu at all */
if (!vcpu->arch.sane) {
kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
return -EINVAL;
}
/* No need to go into the guest when all we do is going out */
if (signal_pending(current)) {
kvm_run->exit_reason = KVM_EXIT_INTR;
......
/*
* Copyright (C) 2011. Freescale Inc. All rights reserved.
*
* Authors:
* Alexander Graf <agraf@suse.de>
* Paul Mackerras <paulus@samba.org>
*
* Description:
*
* Hypercall handling for running PAPR guests in PR KVM on Book 3S
* processors.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2, as
* published by the Free Software Foundation.
*/
#include <asm/uaccess.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
static unsigned long get_pteg_addr(struct kvm_vcpu *vcpu, long pte_index)
{
struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
unsigned long pteg_addr;
pte_index <<= 4;
pte_index &= ((1 << ((vcpu_book3s->sdr1 & 0x1f) + 11)) - 1) << 7 | 0x70;
pteg_addr = vcpu_book3s->sdr1 & 0xfffffffffffc0000ULL;
pteg_addr |= pte_index;
return pteg_addr;
}
static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu)
{
long flags = kvmppc_get_gpr(vcpu, 4);
long pte_index = kvmppc_get_gpr(vcpu, 5);
unsigned long pteg[2 * 8];
unsigned long pteg_addr, i, *hpte;
pte_index &= ~7UL;
pteg_addr = get_pteg_addr(vcpu, pte_index);
copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg));
hpte = pteg;
if (likely((flags & H_EXACT) == 0)) {
pte_index &= ~7UL;
for (i = 0; ; ++i) {
if (i == 8)
return H_PTEG_FULL;
if ((*hpte & HPTE_V_VALID) == 0)
break;
hpte += 2;
}
} else {
i = kvmppc_get_gpr(vcpu, 5) & 7UL;
hpte += i * 2;
}
hpte[0] = kvmppc_get_gpr(vcpu, 6);
hpte[1] = kvmppc_get_gpr(vcpu, 7);
copy_to_user((void __user *)pteg_addr, pteg, sizeof(pteg));
kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
kvmppc_set_gpr(vcpu, 4, pte_index | i);
return EMULATE_DONE;
}
static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu)
{
unsigned long flags= kvmppc_get_gpr(vcpu, 4);
unsigned long pte_index = kvmppc_get_gpr(vcpu, 5);
unsigned long avpn = kvmppc_get_gpr(vcpu, 6);
unsigned long v = 0, pteg, rb;
unsigned long pte[2];
pteg = get_pteg_addr(vcpu, pte_index);
copy_from_user(pte, (void __user *)pteg, sizeof(pte));
if ((pte[0] & HPTE_V_VALID) == 0 ||
((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn) ||
((flags & H_ANDCOND) && (pte[0] & avpn) != 0)) {
kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND);
return EMULATE_DONE;
}
copy_to_user((void __user *)pteg, &v, sizeof(v));
rb = compute_tlbie_rb(pte[0], pte[1], pte_index);
vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
kvmppc_set_gpr(vcpu, 4, pte[0]);
kvmppc_set_gpr(vcpu, 5, pte[1]);
return EMULATE_DONE;
}
static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
{
unsigned long flags = kvmppc_get_gpr(vcpu, 4);
unsigned long pte_index = kvmppc_get_gpr(vcpu, 5);
unsigned long avpn = kvmppc_get_gpr(vcpu, 6);
unsigned long rb, pteg, r, v;
unsigned long pte[2];
pteg = get_pteg_addr(vcpu, pte_index);
copy_from_user(pte, (void __user *)pteg, sizeof(pte));
if ((pte[0] & HPTE_V_VALID) == 0 ||
((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn)) {
kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND);
return EMULATE_DONE;
}
v = pte[0];
r = pte[1];
r &= ~(HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_HI |
HPTE_R_KEY_LO);
r |= (flags << 55) & HPTE_R_PP0;
r |= (flags << 48) & HPTE_R_KEY_HI;
r |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
pte[1] = r;
rb = compute_tlbie_rb(v, r, pte_index);
vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
copy_to_user((void __user *)pteg, pte, sizeof(pte));
kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
return EMULATE_DONE;
}
int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
{
switch (cmd) {
case H_ENTER:
return kvmppc_h_pr_enter(vcpu);
case H_REMOVE:
return kvmppc_h_pr_remove(vcpu);
case H_PROTECT:
return kvmppc_h_pr_protect(vcpu);
case H_BULK_REMOVE:
/* We just flush all PTEs, so user space can
handle the HPT modifications */
kvmppc_mmu_pte_flush(vcpu, 0, 0);
break;
case H_CEDE:
kvm_vcpu_block(vcpu);
vcpu->stat.halt_wakeup++;
return EMULATE_DONE;
}
return EMULATE_FAIL;
}
......@@ -20,6 +20,7 @@
#include <asm/ppc_asm.h>
#include <asm/kvm_asm.h>
#include <asm/reg.h>
#include <asm/mmu.h>
#include <asm/page.h>
#include <asm/asm-offsets.h>
......@@ -35,10 +36,10 @@
#if defined(CONFIG_PPC_BOOK3S_64)
#define LOAD_SHADOW_VCPU(reg) GET_PACA(reg)
#define MSR_NOIRQ MSR_KERNEL & ~(MSR_IR | MSR_DR)
#define FUNC(name) GLUE(.,name)
#define MTMSR_EERI(reg) mtmsrd (reg),1
.globl kvmppc_skip_interrupt
kvmppc_skip_interrupt:
/*
* Here all GPRs are unchanged from when the interrupt happened
......@@ -51,6 +52,7 @@ kvmppc_skip_interrupt:
rfid
b .
.globl kvmppc_skip_Hinterrupt
kvmppc_skip_Hinterrupt:
/*
* Here all GPRs are unchanged from when the interrupt happened
......@@ -65,8 +67,8 @@ kvmppc_skip_Hinterrupt:
#elif defined(CONFIG_PPC_BOOK3S_32)
#define MSR_NOIRQ MSR_KERNEL
#define FUNC(name) name
#define MTMSR_EERI(reg) mtmsr (reg)
.macro INTERRUPT_TRAMPOLINE intno
......@@ -167,40 +169,24 @@ kvmppc_handler_skip_ins:
#endif
/*
* This trampoline brings us back to a real mode handler
*
* Input Registers:
*
* R5 = SRR0
* R6 = SRR1
* LR = real-mode IP
* Call kvmppc_handler_trampoline_enter in real mode
*
* On entry, r4 contains the guest shadow MSR
*/
.global kvmppc_handler_lowmem_trampoline
kvmppc_handler_lowmem_trampoline:
mtsrr0 r5
_GLOBAL(kvmppc_entry_trampoline)
mfmsr r5
LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter)
toreal(r7)
li r9, MSR_RI
ori r9, r9, MSR_EE
andc r9, r5, r9 /* Clear EE and RI in MSR value */
li r6, MSR_IR | MSR_DR
ori r6, r6, MSR_EE
andc r6, r5, r6 /* Clear EE, DR and IR in MSR value */
MTMSR_EERI(r9) /* Clear EE and RI in MSR */
mtsrr0 r7 /* before we set srr0/1 */
mtsrr1 r6
blr
kvmppc_handler_lowmem_trampoline_end:
/*
* Call a function in real mode
*
* Input Registers:
*
* R3 = function
* R4 = MSR
* R5 = scratch register
*
*/
_GLOBAL(kvmppc_rmcall)
LOAD_REG_IMMEDIATE(r5, MSR_NOIRQ)
mtmsr r5 /* Disable relocation and interrupts, so mtsrr
doesn't get interrupted */
sync
mtsrr0 r3
mtsrr1 r4
RFI
#if defined(CONFIG_PPC_BOOK3S_32)
......
......@@ -23,6 +23,7 @@
#define GET_SHADOW_VCPU(reg) \
mr reg, r13
#define MTMSR_EERI(reg) mtmsrd (reg),1
#elif defined(CONFIG_PPC_BOOK3S_32)
......@@ -30,6 +31,7 @@
tophys(reg, r2); \
lwz reg, (THREAD + THREAD_KVM_SVCPU)(reg); \
tophys(reg, reg)
#define MTMSR_EERI(reg) mtmsr (reg)
#endif
......@@ -57,10 +59,12 @@ kvmppc_handler_trampoline_enter:
/* Required state:
*
* MSR = ~IR|DR
* R13 = PACA
* R1 = host R1
* R2 = host R2
* R10 = guest MSR
* R4 = guest shadow MSR
* R5 = normal host MSR
* R6 = current host MSR (EE, IR, DR off)
* LR = highmem guest exit code
* all other volatile GPRS = free
* SVCPU[CR] = guest CR
* SVCPU[XER] = guest XER
......@@ -71,15 +75,15 @@ kvmppc_handler_trampoline_enter:
/* r3 = shadow vcpu */
GET_SHADOW_VCPU(r3)
/* Save guest exit handler address and MSR */
mflr r0
PPC_STL r0, HSTATE_VMHANDLER(r3)
PPC_STL r5, HSTATE_HOST_MSR(r3)
/* Save R1/R2 in the PACA (64-bit) or shadow_vcpu (32-bit) */
PPC_STL r1, HSTATE_HOST_R1(r3)
PPC_STL r2, HSTATE_HOST_R2(r3)
/* Move SRR0 and SRR1 into the respective regs */
PPC_LL r9, SVCPU_PC(r3)
mtsrr0 r9
mtsrr1 r10
/* Activate guest mode, so faults get handled by KVM */
li r11, KVM_GUEST_MODE_GUEST
stb r11, HSTATE_IN_GUEST(r3)
......@@ -87,17 +91,46 @@ kvmppc_handler_trampoline_enter:
/* Switch to guest segment. This is subarch specific. */
LOAD_GUEST_SEGMENTS
#ifdef CONFIG_PPC_BOOK3S_64
/* Some guests may need to have dcbz set to 32 byte length.
*
* Usually we ensure that by patching the guest's instructions
* to trap on dcbz and emulate it in the hypervisor.
*
* If we can, we should tell the CPU to use 32 byte dcbz though,
* because that's a lot faster.
*/
lbz r0, HSTATE_RESTORE_HID5(r3)
cmpwi r0, 0
beq no_dcbz32_on
mfspr r0,SPRN_HID5
ori r0, r0, 0x80 /* XXX HID5_dcbz32 = 0x80 */
mtspr SPRN_HID5,r0
no_dcbz32_on:
#endif /* CONFIG_PPC_BOOK3S_64 */
/* Enter guest */
PPC_LL r4, SVCPU_CTR(r3)
PPC_LL r5, SVCPU_LR(r3)
lwz r6, SVCPU_CR(r3)
lwz r7, SVCPU_XER(r3)
PPC_LL r8, SVCPU_CTR(r3)
PPC_LL r9, SVCPU_LR(r3)
lwz r10, SVCPU_CR(r3)
lwz r11, SVCPU_XER(r3)
mtctr r8
mtlr r9
mtcr r10
mtxer r11
mtctr r4
mtlr r5
mtcr r6
mtxer r7
/* Move SRR0 and SRR1 into the respective regs */
PPC_LL r9, SVCPU_PC(r3)
/* First clear RI in our current MSR value */
li r0, MSR_RI
andc r6, r6, r0
MTMSR_EERI(r6)
mtsrr0 r9
mtsrr1 r4
PPC_LL r0, SVCPU_R0(r3)
PPC_LL r1, SVCPU_R1(r3)
......@@ -213,11 +246,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
beq ld_last_inst
cmpwi r12, BOOK3S_INTERRUPT_PROGRAM
beq ld_last_inst
cmpwi r12, BOOK3S_INTERRUPT_SYSCALL
beq ld_last_prev_inst
cmpwi r12, BOOK3S_INTERRUPT_ALIGNMENT
beq- ld_last_inst
b no_ld_last_inst
ld_last_prev_inst:
addi r3, r3, -4
ld_last_inst:
/* Save off the guest instruction we're at */
......@@ -254,6 +292,43 @@ no_ld_last_inst:
/* Switch back to host MMU */
LOAD_HOST_SEGMENTS
#ifdef CONFIG_PPC_BOOK3S_64
lbz r5, HSTATE_RESTORE_HID5(r13)
cmpwi r5, 0
beq no_dcbz32_off
li r4, 0
mfspr r5,SPRN_HID5
rldimi r5,r4,6,56
mtspr SPRN_HID5,r5
no_dcbz32_off:
#endif /* CONFIG_PPC_BOOK3S_64 */
/*
* For some interrupts, we need to call the real Linux
* handler, so it can do work for us. This has to happen
* as if the interrupt arrived from the kernel though,
* so let's fake it here where most state is restored.
*
* Having set up SRR0/1 with the address where we want
* to continue with relocation on (potentially in module
* space), we either just go straight there with rfi[d],
* or we jump to an interrupt handler with bctr if there
* is an interrupt to be handled first. In the latter
* case, the rfi[d] at the end of the interrupt handler
* will get us back to where we want to continue.
*/
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
beq 1f
cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER
beq 1f
cmpwi r12, BOOK3S_INTERRUPT_PERFMON
1: mtctr r12
/* Register usage at this point:
*
* R1 = host R1
......@@ -264,13 +339,15 @@ no_ld_last_inst:
*
*/
/* RFI into the highmem handler */
mfmsr r7
ori r7, r7, MSR_IR|MSR_DR|MSR_RI|MSR_ME /* Enable paging */
mtsrr1 r7
/* Load highmem handler address */
PPC_LL r6, HSTATE_HOST_MSR(r13)
PPC_LL r8, HSTATE_VMHANDLER(r13)
/* Restore host msr -> SRR1 */
mtsrr1 r6
/* Load highmem handler address */
mtsrr0 r8
/* RFI into the highmem handler, or jump to interrupt handler */
beqctr
RFI
kvmppc_handler_trampoline_exit_end:
......@@ -316,6 +316,11 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
{
int ret;
if (!vcpu->arch.sane) {
kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
return -EINVAL;
}
local_irq_disable();
kvm_guest_enter();
ret = __kvmppc_vcpu_run(kvm_run, vcpu);
......@@ -618,6 +623,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
int i;
int r;
vcpu->arch.pc = 0;
vcpu->arch.shared->msr = 0;
......@@ -634,7 +640,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
kvmppc_init_timing_stats(vcpu);
return kvmppc_core_vcpu_setup(vcpu);
r = kvmppc_core_vcpu_setup(vcpu);
kvmppc_sanity_check(vcpu);
return r;
}
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
......
......@@ -73,6 +73,8 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
/* Since booke kvm only support one core, update all vcpus' PIR to 0 */
vcpu->vcpu_id = 0;
vcpu->arch.cpu_type = KVM_CPU_E500V2;
return 0;
}
......
......@@ -39,12 +39,8 @@
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{
#ifndef CONFIG_KVM_BOOK3S_64_HV
return !(v->arch.shared->msr & MSR_WE) ||
!!(v->arch.pending_exceptions);
#else
return !(v->arch.ceded) || !!(v->arch.pending_exceptions);
#endif
}
int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
......@@ -95,6 +91,31 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
return r;
}
int kvmppc_sanity_check(struct kvm_vcpu *vcpu)
{
int r = false;
/* We have to know what CPU to virtualize */
if (!vcpu->arch.pvr)
goto out;
/* PAPR only works with book3s_64 */
if ((vcpu->arch.cpu_type != KVM_CPU_3S_64) && vcpu->arch.papr_enabled)
goto out;
#ifdef CONFIG_KVM_BOOK3S_64_HV
/* HV KVM can only do PAPR mode for now */
if (!vcpu->arch.papr_enabled)
goto out;
#endif
r = true;
out:
vcpu->arch.sane = r;
return r ? 0 : -EINVAL;
}
int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
enum emulation_result er;
......@@ -188,6 +209,8 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_PPC_BOOKE_SREGS:
#else
case KVM_CAP_PPC_SEGSTATE:
case KVM_CAP_PPC_HIOR:
case KVM_CAP_PPC_PAPR:
#endif
case KVM_CAP_PPC_UNSET_IRQ:
case KVM_CAP_PPC_IRQ_LEVEL:
......@@ -258,6 +281,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
{
struct kvm_vcpu *vcpu;
vcpu = kvmppc_core_vcpu_create(kvm, id);
vcpu->arch.wqp = &vcpu->wq;
if (!IS_ERR(vcpu))
kvmppc_create_vcpu_debugfs(vcpu, id);
return vcpu;
......@@ -289,8 +313,8 @@ static void kvmppc_decrementer_func(unsigned long data)
kvmppc_core_queue_dec(vcpu);
if (waitqueue_active(&vcpu->wq)) {
wake_up_interruptible(&vcpu->wq);
if (waitqueue_active(vcpu->arch.wqp)) {
wake_up_interruptible(vcpu->arch.wqp);
vcpu->stat.halt_wakeup++;
}
}
......@@ -543,13 +567,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
{
if (irq->irq == KVM_INTERRUPT_UNSET)
if (irq->irq == KVM_INTERRUPT_UNSET) {
kvmppc_core_dequeue_external(vcpu, irq);
else
kvmppc_core_queue_external(vcpu, irq);
return 0;
}
kvmppc_core_queue_external(vcpu, irq);
if (waitqueue_active(&vcpu->wq)) {
wake_up_interruptible(&vcpu->wq);
if (waitqueue_active(vcpu->arch.wqp)) {
wake_up_interruptible(vcpu->arch.wqp);
vcpu->stat.halt_wakeup++;
} else if (vcpu->cpu != -1) {
smp_send_reschedule(vcpu->cpu);
......@@ -571,11 +597,18 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
r = 0;
vcpu->arch.osi_enabled = true;
break;
case KVM_CAP_PPC_PAPR:
r = 0;
vcpu->arch.papr_enabled = true;
break;
default:
r = -EINVAL;
break;
}
if (!r)
r = kvmppc_sanity_check(vcpu);
return r;
}
......
......@@ -119,6 +119,7 @@ struct kvm_vcpu_stat {
u32 instruction_lctlg;
u32 exit_program_interruption;
u32 exit_instr_and_program;
u32 deliver_external_call;
u32 deliver_emergency_signal;
u32 deliver_service_signal;
u32 deliver_virtio_interrupt;
......@@ -138,6 +139,7 @@ struct kvm_vcpu_stat {
u32 instruction_stfl;
u32 instruction_tprot;
u32 instruction_sigp_sense;
u32 instruction_sigp_external_call;
u32 instruction_sigp_emergency;
u32 instruction_sigp_stop;
u32 instruction_sigp_arch;
......@@ -174,6 +176,10 @@ struct kvm_s390_prefix_info {
__u32 address;
};
struct kvm_s390_extcall_info {
__u16 code;
};
struct kvm_s390_emerg_info {
__u16 code;
};
......@@ -186,6 +192,7 @@ struct kvm_s390_interrupt_info {
struct kvm_s390_ext_info ext;
struct kvm_s390_pgm_info pgm;
struct kvm_s390_emerg_info emerg;
struct kvm_s390_extcall_info extcall;
struct kvm_s390_prefix_info prefix;
};
};
......
......@@ -38,6 +38,11 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
struct kvm_s390_interrupt_info *inti)
{
switch (inti->type) {
case KVM_S390_INT_EXTERNAL_CALL:
if (psw_extint_disabled(vcpu))
return 0;
if (vcpu->arch.sie_block->gcr[0] & 0x2000ul)
return 1;
case KVM_S390_INT_EMERGENCY:
if (psw_extint_disabled(vcpu))
return 0;
......@@ -98,6 +103,7 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
struct kvm_s390_interrupt_info *inti)
{
switch (inti->type) {
case KVM_S390_INT_EXTERNAL_CALL:
case KVM_S390_INT_EMERGENCY:
case KVM_S390_INT_SERVICE:
case KVM_S390_INT_VIRTIO:
......@@ -143,6 +149,28 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
exception = 1;
break;
case KVM_S390_INT_EXTERNAL_CALL:
VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
vcpu->stat.deliver_external_call++;
rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1202);
if (rc == -EFAULT)
exception = 1;
rc = put_guest_u16(vcpu, __LC_CPU_ADDRESS, inti->extcall.code);
if (rc == -EFAULT)
exception = 1;
rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
if (rc == -EFAULT)
exception = 1;
rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
__LC_EXT_NEW_PSW, sizeof(psw_t));
if (rc == -EFAULT)
exception = 1;
break;
case KVM_S390_INT_SERVICE:
VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
inti->ext.ext_params);
......@@ -522,6 +550,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
break;
case KVM_S390_PROGRAM_INT:
case KVM_S390_SIGP_STOP:
case KVM_S390_INT_EXTERNAL_CALL:
case KVM_S390_INT_EMERGENCY:
default:
kfree(inti);
......@@ -581,6 +610,7 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
break;
case KVM_S390_SIGP_STOP:
case KVM_S390_RESTART:
case KVM_S390_INT_EXTERNAL_CALL:
case KVM_S390_INT_EMERGENCY:
VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type);
inti->type = s390int->type;
......
......@@ -46,6 +46,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
......@@ -64,6 +65,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
......@@ -175,6 +177,8 @@ int kvm_arch_init_vm(struct kvm *kvm)
if (rc)
goto out_err;
rc = -ENOMEM;
kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
if (!kvm->arch.sca)
goto out_err;
......@@ -312,11 +316,17 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
unsigned int id)
{
struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
int rc = -ENOMEM;
struct kvm_vcpu *vcpu;
int rc = -EINVAL;
if (id >= KVM_MAX_VCPUS)
goto out;
rc = -ENOMEM;
vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
if (!vcpu)
goto out_nomem;
goto out;
vcpu->arch.sie_block = (struct kvm_s390_sie_block *)
get_zeroed_page(GFP_KERNEL);
......@@ -352,7 +362,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
free_page((unsigned long)(vcpu->arch.sie_block));
out_free_cpu:
kfree(vcpu);
out_nomem:
out:
return ERR_PTR(rc);
}
......@@ -386,6 +396,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
{
memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs));
memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
restore_access_regs(vcpu->arch.guest_acrs);
return 0;
}
......@@ -401,6 +412,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
vcpu->arch.guest_fpregs.fpc = fpu->fpc;
restore_fp_regs(&vcpu->arch.guest_fpregs);
return 0;
}
......
......@@ -87,6 +87,7 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
return -ENOMEM;
inti->type = KVM_S390_INT_EMERGENCY;
inti->emerg.code = vcpu->vcpu_id;
spin_lock(&fi->lock);
li = fi->local_int[cpu_addr];
......@@ -103,9 +104,47 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
wake_up_interruptible(&li->wq);
spin_unlock_bh(&li->lock);
rc = 0; /* order accepted */
VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
unlock:
spin_unlock(&fi->lock);
return rc;
}
static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
{
struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
struct kvm_s390_local_interrupt *li;
struct kvm_s390_interrupt_info *inti;
int rc;
if (cpu_addr >= KVM_MAX_VCPUS)
return 3; /* not operational */
inti = kzalloc(sizeof(*inti), GFP_KERNEL);
if (!inti)
return -ENOMEM;
inti->type = KVM_S390_INT_EXTERNAL_CALL;
inti->extcall.code = vcpu->vcpu_id;
spin_lock(&fi->lock);
li = fi->local_int[cpu_addr];
if (li == NULL) {
rc = 3; /* not operational */
kfree(inti);
goto unlock;
}
spin_lock_bh(&li->lock);
list_add_tail(&inti->list, &li->list);
atomic_set(&li->active, 1);
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
if (waitqueue_active(&li->wq))
wake_up_interruptible(&li->wq);
spin_unlock_bh(&li->lock);
rc = 0; /* order accepted */
VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr);
unlock:
spin_unlock(&fi->lock);
VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
return rc;
}
......@@ -267,6 +306,10 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
rc = __sigp_sense(vcpu, cpu_addr,
&vcpu->arch.guest_gprs[r1]);
break;
case SIGP_EXTERNAL_CALL:
vcpu->stat.instruction_sigp_external_call++;
rc = __sigp_external_call(vcpu, cpu_addr);
break;
case SIGP_EMERGENCY:
vcpu->stat.instruction_sigp_emergency++;
rc = __sigp_emergency(vcpu, cpu_addr);
......
......@@ -100,7 +100,9 @@
#define APIC_TIMER_BASE_CLKIN 0x0
#define APIC_TIMER_BASE_TMBASE 0x1
#define APIC_TIMER_BASE_DIV 0x2
#define APIC_LVT_TIMER_ONESHOT (0 << 17)
#define APIC_LVT_TIMER_PERIODIC (1 << 17)
#define APIC_LVT_TIMER_TSCDEADLINE (2 << 17)
#define APIC_LVT_MASKED (1 << 16)
#define APIC_LVT_LEVEL_TRIGGER (1 << 15)
#define APIC_LVT_REMOTE_IRR (1 << 14)
......
......@@ -121,6 +121,7 @@
#define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */
#define X86_FEATURE_MOVBE (4*32+22) /* MOVBE instruction */
#define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */
#define X86_FEATURE_TSC_DEADLINE_TIMER (4*32+24) /* Tsc deadline timer */
#define X86_FEATURE_AES (4*32+25) /* AES instructions */
#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
#define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */
......
......@@ -262,7 +262,7 @@ struct x86_emulate_ctxt {
struct operand dst;
bool has_seg_override;
u8 seg_override;
unsigned int d;
u64 d;
int (*execute)(struct x86_emulate_ctxt *ctxt);
int (*check_perm)(struct x86_emulate_ctxt *ctxt);
/* modrm */
......@@ -275,6 +275,8 @@ struct x86_emulate_ctxt {
unsigned long _eip;
/* Fields above regs are cleared together. */
unsigned long regs[NR_VCPU_REGS];
struct operand memop;
struct operand *memopp;
struct fetch_cache fetch;
struct read_cache io_read;
struct read_cache mem_read;
......
......@@ -26,7 +26,8 @@
#include <asm/mtrr.h>
#include <asm/msr-index.h>
#define KVM_MAX_VCPUS 64
#define KVM_MAX_VCPUS 254
#define KVM_SOFT_MAX_VCPUS 64
#define KVM_MEMORY_SLOTS 32
/* memory slots that does not exposed to userspace */
#define KVM_PRIVATE_MEM_SLOTS 4
......@@ -264,6 +265,7 @@ struct kvm_mmu {
void (*new_cr3)(struct kvm_vcpu *vcpu);
void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
unsigned long (*get_cr3)(struct kvm_vcpu *vcpu);
u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index);
int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err,
bool prefault);
void (*inject_page_fault)(struct kvm_vcpu *vcpu,
......@@ -411,8 +413,9 @@ struct kvm_vcpu_arch {
u32 tsc_catchup_mult;
s8 tsc_catchup_shift;
bool nmi_pending;
bool nmi_injected;
atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
unsigned nmi_pending; /* NMI queued after currently running handler */
bool nmi_injected; /* Trying to inject an NMI this entry */
struct mtrr_state_type mtrr_state;
u32 pat;
......@@ -628,14 +631,13 @@ struct kvm_x86_ops {
void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc);
u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu);
void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
int (*check_intercept)(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
enum x86_intercept_stage stage);
const struct trace_print_flags *exit_reasons_str;
};
struct kvm_arch_async_pf {
......@@ -672,6 +674,8 @@ u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
extern bool tdp_enabled;
u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
/* control of guest tsc rate supported? */
extern bool kvm_has_tsc_control;
/* minimum supported tsc_khz for guests */
......
......@@ -229,6 +229,8 @@
#define MSR_IA32_APICBASE_ENABLE (1<<11)
#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
#define MSR_IA32_TSCDEADLINE 0x000006e0
#define MSR_IA32_UCODE_WRITE 0x00000079
#define MSR_IA32_UCODE_REV 0x0000008b
......
......@@ -350,6 +350,18 @@ enum vmcs_field {
#define DEBUG_REG_ACCESS_REG(eq) (((eq) >> 8) & 0xf) /* 11:8, general purpose reg. */
/*
* Exit Qualifications for APIC-Access
*/
#define APIC_ACCESS_OFFSET 0xfff /* 11:0, offset within the APIC page */
#define APIC_ACCESS_TYPE 0xf000 /* 15:12, access type */
#define TYPE_LINEAR_APIC_INST_READ (0 << 12)
#define TYPE_LINEAR_APIC_INST_WRITE (1 << 12)
#define TYPE_LINEAR_APIC_INST_FETCH (2 << 12)
#define TYPE_LINEAR_APIC_EVENT (3 << 12)
#define TYPE_PHYSICAL_APIC_EVENT (10 << 12)
#define TYPE_PHYSICAL_APIC_INST (15 << 12)
/* segment AR */
#define SEGMENT_AR_L_MASK (1 << 13)
......
This diff is collapsed.
......@@ -713,14 +713,16 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
kvm_iodevice_init(&pit->dev, &pit_dev_ops);
ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &pit->dev);
ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, KVM_PIT_BASE_ADDRESS,
KVM_PIT_MEM_LENGTH, &pit->dev);
if (ret < 0)
goto fail;
if (flags & KVM_PIT_SPEAKER_DUMMY) {
kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops);
ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS,
&pit->speaker_dev);
KVM_SPEAKER_BASE_ADDRESS, 4,
&pit->speaker_dev);
if (ret < 0)
goto fail_unregister;
}
......
......@@ -34,6 +34,9 @@
#include <linux/kvm_host.h>
#include "trace.h"
#define pr_pic_unimpl(fmt, ...) \
pr_err_ratelimited("kvm: pic: " fmt, ## __VA_ARGS__)
static void pic_irq_request(struct kvm *kvm, int level);
static void pic_lock(struct kvm_pic *s)
......@@ -306,10 +309,10 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
}
s->init_state = 1;
if (val & 0x02)
printk(KERN_ERR "single mode not supported");
pr_pic_unimpl("single mode not supported");
if (val & 0x08)
printk(KERN_ERR
"level sensitive irq not supported");
pr_pic_unimpl(
"level sensitive irq not supported");
} else if (val & 0x08) {
if (val & 0x04)
s->poll = 1;
......@@ -459,22 +462,15 @@ static int picdev_in_range(gpa_t addr)
}
}
static inline struct kvm_pic *to_pic(struct kvm_io_device *dev)
{
return container_of(dev, struct kvm_pic, dev);
}
static int picdev_write(struct kvm_io_device *this,
static int picdev_write(struct kvm_pic *s,
gpa_t addr, int len, const void *val)
{
struct kvm_pic *s = to_pic(this);
unsigned char data = *(unsigned char *)val;
if (!picdev_in_range(addr))
return -EOPNOTSUPP;
if (len != 1) {
if (printk_ratelimit())
printk(KERN_ERR "PIC: non byte write\n");
pr_pic_unimpl("non byte write\n");
return 0;
}
pic_lock(s);
......@@ -494,17 +490,15 @@ static int picdev_write(struct kvm_io_device *this,
return 0;
}
static int picdev_read(struct kvm_io_device *this,
static int picdev_read(struct kvm_pic *s,
gpa_t addr, int len, void *val)
{
struct kvm_pic *s = to_pic(this);
unsigned char data = 0;
if (!picdev_in_range(addr))
return -EOPNOTSUPP;
if (len != 1) {
if (printk_ratelimit())
printk(KERN_ERR "PIC: non byte read\n");
pr_pic_unimpl("non byte read\n");
return 0;
}
pic_lock(s);
......@@ -525,6 +519,48 @@ static int picdev_read(struct kvm_io_device *this,
return 0;
}
static int picdev_master_write(struct kvm_io_device *dev,
gpa_t addr, int len, const void *val)
{
return picdev_write(container_of(dev, struct kvm_pic, dev_master),
addr, len, val);
}
static int picdev_master_read(struct kvm_io_device *dev,
gpa_t addr, int len, void *val)
{
return picdev_read(container_of(dev, struct kvm_pic, dev_master),
addr, len, val);
}
static int picdev_slave_write(struct kvm_io_device *dev,
gpa_t addr, int len, const void *val)
{
return picdev_write(container_of(dev, struct kvm_pic, dev_slave),
addr, len, val);
}
static int picdev_slave_read(struct kvm_io_device *dev,
gpa_t addr, int len, void *val)
{
return picdev_read(container_of(dev, struct kvm_pic, dev_slave),
addr, len, val);
}
static int picdev_eclr_write(struct kvm_io_device *dev,
gpa_t addr, int len, const void *val)
{
return picdev_write(container_of(dev, struct kvm_pic, dev_eclr),
addr, len, val);
}
static int picdev_eclr_read(struct kvm_io_device *dev,
gpa_t addr, int len, void *val)
{
return picdev_read(container_of(dev, struct kvm_pic, dev_eclr),
addr, len, val);
}
/*
* callback when PIC0 irq status changed
*/
......@@ -537,9 +573,19 @@ static void pic_irq_request(struct kvm *kvm, int level)
s->output = level;
}
static const struct kvm_io_device_ops picdev_ops = {
.read = picdev_read,
.write = picdev_write,
static const struct kvm_io_device_ops picdev_master_ops = {
.read = picdev_master_read,
.write = picdev_master_write,
};
static const struct kvm_io_device_ops picdev_slave_ops = {
.read = picdev_slave_read,
.write = picdev_slave_write,
};
static const struct kvm_io_device_ops picdev_eclr_ops = {
.read = picdev_eclr_read,
.write = picdev_eclr_write,
};
struct kvm_pic *kvm_create_pic(struct kvm *kvm)
......@@ -560,16 +606,39 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
/*
* Initialize PIO device
*/
kvm_iodevice_init(&s->dev, &picdev_ops);
kvm_iodevice_init(&s->dev_master, &picdev_master_ops);
kvm_iodevice_init(&s->dev_slave, &picdev_slave_ops);
kvm_iodevice_init(&s->dev_eclr, &picdev_eclr_ops);
mutex_lock(&kvm->slots_lock);
ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &s->dev);
ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x20, 2,
&s->dev_master);
if (ret < 0)
goto fail_unlock;
ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0xa0, 2, &s->dev_slave);
if (ret < 0)
goto fail_unreg_2;
ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x4d0, 2, &s->dev_eclr);
if (ret < 0)
goto fail_unreg_1;
mutex_unlock(&kvm->slots_lock);
if (ret < 0) {
kfree(s);
return NULL;
}
return s;
fail_unreg_1:
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &s->dev_slave);
fail_unreg_2:
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &s->dev_master);
fail_unlock:
mutex_unlock(&kvm->slots_lock);
kfree(s);
return NULL;
}
void kvm_destroy_pic(struct kvm *kvm)
......@@ -577,7 +646,9 @@ void kvm_destroy_pic(struct kvm *kvm)
struct kvm_pic *vpic = kvm->arch.vpic;
if (vpic) {
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev);
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_master);
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_slave);
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_eclr);
kvm->arch.vpic = NULL;
kfree(vpic);
}
......
......@@ -66,7 +66,9 @@ struct kvm_pic {
struct kvm *kvm;
struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
int output; /* intr from master PIC */
struct kvm_io_device dev;
struct kvm_io_device dev_master;
struct kvm_io_device dev_slave;
struct kvm_io_device dev_eclr;
void (*ack_notifier)(void *opaque, int irq);
unsigned long irq_states[16];
};
......
......@@ -45,13 +45,6 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)
return vcpu->arch.walk_mmu->pdptrs[index];
}
static inline u64 kvm_pdptr_read_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, int index)
{
load_pdptrs(vcpu, mmu, mmu->get_cr3(vcpu));
return mmu->pdptrs[index];
}
static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask)
{
ulong tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS;
......
......@@ -2,6 +2,8 @@
struct kvm_timer {
struct hrtimer timer;
s64 period; /* unit: ns */
u32 timer_mode_mask;
u64 tscdeadline;
atomic_t pending; /* accumulated triggered timers */
bool reinject;
struct kvm_timer_ops *t_ops;
......
......@@ -68,6 +68,9 @@
#define VEC_POS(v) ((v) & (32 - 1))
#define REG_POS(v) (((v) >> 5) << 4)
static unsigned int min_timer_period_us = 500;
module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
static inline u32 apic_get_reg(struct kvm_lapic *apic, int reg_off)
{
return *((u32 *) (apic->regs + reg_off));
......@@ -135,9 +138,23 @@ static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
return apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
}
static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
{
return ((apic_get_reg(apic, APIC_LVTT) &
apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT);
}
static inline int apic_lvtt_period(struct kvm_lapic *apic)
{
return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC;
return ((apic_get_reg(apic, APIC_LVTT) &
apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC);
}
static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
{
return ((apic_get_reg(apic, APIC_LVTT) &
apic->lapic_timer.timer_mode_mask) ==
APIC_LVT_TIMER_TSCDEADLINE);
}
static inline int apic_lvt_nmi_mode(u32 lvt_val)
......@@ -166,7 +183,7 @@ static inline int apic_x2apic_mode(struct kvm_lapic *apic)
}
static unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */
LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */
LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */
LVT_MASK | APIC_MODE_MASK, /* LVTPC */
LINT_MASK, LINT_MASK, /* LVT0-1 */
......@@ -316,8 +333,8 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
result = 1;
break;
default:
printk(KERN_WARNING "Bad DFR vcpu %d: %08x\n",
apic->vcpu->vcpu_id, apic_get_reg(apic, APIC_DFR));
apic_debug("Bad DFR vcpu %d: %08x\n",
apic->vcpu->vcpu_id, apic_get_reg(apic, APIC_DFR));
break;
}
......@@ -354,8 +371,8 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
result = (target != source);
break;
default:
printk(KERN_WARNING "Bad dest shorthand value %x\n",
short_hand);
apic_debug("kvm: apic: Bad dest shorthand value %x\n",
short_hand);
break;
}
......@@ -401,11 +418,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
break;
case APIC_DM_REMRD:
printk(KERN_DEBUG "Ignoring delivery mode 3\n");
apic_debug("Ignoring delivery mode 3\n");
break;
case APIC_DM_SMI:
printk(KERN_DEBUG "Ignoring guest SMI\n");
apic_debug("Ignoring guest SMI\n");
break;
case APIC_DM_NMI:
......@@ -565,11 +582,13 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
val = kvm_apic_id(apic) << 24;
break;
case APIC_ARBPRI:
printk(KERN_WARNING "Access APIC ARBPRI register "
"which is for P6\n");
apic_debug("Access APIC ARBPRI register which is for P6\n");
break;
case APIC_TMCCT: /* Timer CCR */
if (apic_lvtt_tscdeadline(apic))
return 0;
val = apic_get_tmcct(apic);
break;
......@@ -664,29 +683,40 @@ static void update_divide_count(struct kvm_lapic *apic)
static void start_apic_timer(struct kvm_lapic *apic)
{
ktime_t now = apic->lapic_timer.timer.base->get_time();
apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) *
APIC_BUS_CYCLE_NS * apic->divide_count;
ktime_t now;
atomic_set(&apic->lapic_timer.pending, 0);
if (!apic->lapic_timer.period)
return;
/*
* Do not allow the guest to program periodic timers with small
* interval, since the hrtimers are not throttled by the host
* scheduler.
*/
if (apic_lvtt_period(apic)) {
if (apic->lapic_timer.period < NSEC_PER_MSEC/2)
apic->lapic_timer.period = NSEC_PER_MSEC/2;
}
if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
/* lapic timer in oneshot or peroidic mode */
now = apic->lapic_timer.timer.base->get_time();
apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT)
* APIC_BUS_CYCLE_NS * apic->divide_count;
if (!apic->lapic_timer.period)
return;
/*
* Do not allow the guest to program periodic timers with small
* interval, since the hrtimers are not throttled by the host
* scheduler.
*/
if (apic_lvtt_period(apic)) {
s64 min_period = min_timer_period_us * 1000LL;
if (apic->lapic_timer.period < min_period) {
pr_info_ratelimited(
"kvm: vcpu %i: requested %lld ns "
"lapic timer period limited to %lld ns\n",
apic->vcpu->vcpu_id,
apic->lapic_timer.period, min_period);
apic->lapic_timer.period = min_period;
}
}
hrtimer_start(&apic->lapic_timer.timer,
ktime_add_ns(now, apic->lapic_timer.period),
HRTIMER_MODE_ABS);
hrtimer_start(&apic->lapic_timer.timer,
ktime_add_ns(now, apic->lapic_timer.period),
HRTIMER_MODE_ABS);
apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
PRIx64 ", "
"timer initial count 0x%x, period %lldns, "
"expire @ 0x%016" PRIx64 ".\n", __func__,
......@@ -695,6 +725,30 @@ static void start_apic_timer(struct kvm_lapic *apic)
apic->lapic_timer.period,
ktime_to_ns(ktime_add_ns(now,
apic->lapic_timer.period)));
} else if (apic_lvtt_tscdeadline(apic)) {
/* lapic timer in tsc deadline mode */
u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline;
u64 ns = 0;
struct kvm_vcpu *vcpu = apic->vcpu;
unsigned long this_tsc_khz = vcpu_tsc_khz(vcpu);
unsigned long flags;
if (unlikely(!tscdeadline || !this_tsc_khz))
return;
local_irq_save(flags);
now = apic->lapic_timer.timer.base->get_time();
guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu);
if (likely(tscdeadline > guest_tsc)) {
ns = (tscdeadline - guest_tsc) * 1000000ULL;
do_div(ns, this_tsc_khz);
}
hrtimer_start(&apic->lapic_timer.timer,
ktime_add_ns(now, ns), HRTIMER_MODE_ABS);
local_irq_restore(flags);
}
}
static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
......@@ -782,7 +836,6 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
case APIC_LVT0:
apic_manage_nmi_watchdog(apic, val);
case APIC_LVTT:
case APIC_LVTTHMR:
case APIC_LVTPC:
case APIC_LVT1:
......@@ -796,7 +849,22 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
break;
case APIC_LVTT:
if ((apic_get_reg(apic, APIC_LVTT) &
apic->lapic_timer.timer_mode_mask) !=
(val & apic->lapic_timer.timer_mode_mask))
hrtimer_cancel(&apic->lapic_timer.timer);
if (!apic_sw_enabled(apic))
val |= APIC_LVT_MASKED;
val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
apic_set_reg(apic, APIC_LVTT, val);
break;
case APIC_TMICT:
if (apic_lvtt_tscdeadline(apic))
break;
hrtimer_cancel(&apic->lapic_timer.timer);
apic_set_reg(apic, APIC_TMICT, val);
start_apic_timer(apic);
......@@ -804,14 +872,14 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
case APIC_TDCR:
if (val & 4)
printk(KERN_ERR "KVM_WRITE:TDCR %x\n", val);
apic_debug("KVM_WRITE:TDCR %x\n", val);
apic_set_reg(apic, APIC_TDCR, val);
update_divide_count(apic);
break;
case APIC_ESR:
if (apic_x2apic_mode(apic) && val != 0) {
printk(KERN_ERR "KVM_WRITE:ESR not zero %x\n", val);
apic_debug("KVM_WRITE:ESR not zero %x\n", val);
ret = 1;
}
break;
......@@ -864,6 +932,15 @@ static int apic_mmio_write(struct kvm_io_device *this,
return 0;
}
void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
if (apic)
apic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
}
EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
void kvm_free_lapic(struct kvm_vcpu *vcpu)
{
if (!vcpu->arch.apic)
......@@ -883,6 +960,32 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
*----------------------------------------------------------------------
*/
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
if (!apic)
return 0;
if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic))
return 0;
return apic->lapic_timer.tscdeadline;
}
void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
{
struct kvm_lapic *apic = vcpu->arch.apic;
if (!apic)
return;
if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic))
return;
hrtimer_cancel(&apic->lapic_timer.timer);
apic->lapic_timer.tscdeadline = data;
start_apic_timer(apic);
}
void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
{
struct kvm_lapic *apic = vcpu->arch.apic;
......
......@@ -26,6 +26,7 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
void kvm_lapic_reset(struct kvm_vcpu *vcpu);
u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu);
void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
void kvm_apic_set_version(struct kvm_vcpu *vcpu);
......@@ -41,6 +42,9 @@ int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
bool kvm_apic_present(struct kvm_vcpu *vcpu);
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data);
void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
......
......@@ -2770,7 +2770,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
ASSERT(!VALID_PAGE(root));
if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) {
pdptr = kvm_pdptr_read_mmu(vcpu, &vcpu->arch.mmu, i);
pdptr = vcpu->arch.mmu.get_pdptr(vcpu, i);
if (!is_present_gpte(pdptr)) {
vcpu->arch.mmu.pae_root[i] = 0;
continue;
......@@ -3318,6 +3318,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
context->direct_map = true;
context->set_cr3 = kvm_x86_ops->set_tdp_cr3;
context->get_cr3 = get_cr3;
context->get_pdptr = kvm_pdptr_read;
context->inject_page_fault = kvm_inject_page_fault;
context->nx = is_nx(vcpu);
......@@ -3376,6 +3377,7 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
vcpu->arch.walk_mmu->set_cr3 = kvm_x86_ops->set_cr3;
vcpu->arch.walk_mmu->get_cr3 = get_cr3;
vcpu->arch.walk_mmu->get_pdptr = kvm_pdptr_read;
vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
return r;
......@@ -3386,6 +3388,7 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
struct kvm_mmu *g_context = &vcpu->arch.nested_mmu;
g_context->get_cr3 = get_cr3;
g_context->get_pdptr = kvm_pdptr_read;
g_context->inject_page_fault = kvm_inject_page_fault;
/*
......
......@@ -121,16 +121,16 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level)
static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
{
static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
unsigned long *rmapp;
struct kvm_mmu_page *rev_sp;
gfn_t gfn;
rev_sp = page_header(__pa(sptep));
gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt);
if (!gfn_to_memslot(kvm, gfn)) {
if (!printk_ratelimit())
if (!__ratelimit(&ratelimit_state))
return;
audit_printk(kvm, "no memslot for gfn %llx\n", gfn);
audit_printk(kvm, "index %ld of sp (gfn=%llx)\n",
......@@ -141,7 +141,7 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
rmapp = gfn_to_rmap(kvm, gfn, rev_sp->role.level);
if (!*rmapp) {
if (!printk_ratelimit())
if (!__ratelimit(&ratelimit_state))
return;
audit_printk(kvm, "no rmap for writable spte %llx\n",
*sptep);
......
......@@ -147,7 +147,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
gfn_t table_gfn;
unsigned index, pt_access, uninitialized_var(pte_access);
gpa_t pte_gpa;
bool eperm;
bool eperm, last_gpte;
int offset;
const int write_fault = access & PFERR_WRITE_MASK;
const int user_fault = access & PFERR_USER_MASK;
......@@ -163,7 +163,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
#if PTTYPE == 64
if (walker->level == PT32E_ROOT_LEVEL) {
pte = kvm_pdptr_read_mmu(vcpu, mmu, (addr >> 30) & 3);
pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3);
trace_kvm_mmu_paging_element(pte, walker->level);
if (!is_present_gpte(pte))
goto error;
......@@ -221,6 +221,17 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
eperm = true;
#endif
last_gpte = FNAME(is_last_gpte)(walker, vcpu, mmu, pte);
if (last_gpte) {
pte_access = pt_access &
FNAME(gpte_access)(vcpu, pte, true);
/* check if the kernel is fetching from user page */
if (unlikely(pte_access & PT_USER_MASK) &&
kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
if (fetch_fault && !user_fault)
eperm = true;
}
if (!eperm && unlikely(!(pte & PT_ACCESSED_MASK))) {
int ret;
trace_kvm_mmu_set_accessed_bit(table_gfn, index,
......@@ -238,18 +249,12 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
walker->ptes[walker->level - 1] = pte;
if (FNAME(is_last_gpte)(walker, vcpu, mmu, pte)) {
if (last_gpte) {
int lvl = walker->level;
gpa_t real_gpa;
gfn_t gfn;
u32 ac;
/* check if the kernel is fetching from user page */
if (unlikely(pte_access & PT_USER_MASK) &&
kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
if (fetch_fault && !user_fault)
eperm = true;
gfn = gpte_to_gfn_lvl(pte, lvl);
gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) >> PAGE_SHIFT;
......@@ -295,7 +300,6 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
walker->ptes[walker->level - 1] = pte;
}
pte_access = pt_access & FNAME(gpte_access)(vcpu, pte, true);
walker->pt_access = pt_access;
walker->pte_access = pte_access;
pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
......
......@@ -1084,7 +1084,6 @@ static void init_vmcb(struct vcpu_svm *svm)
if (npt_enabled) {
/* Setup VMCB for Nested Paging */
control->nested_ctl = 1;
clr_intercept(svm, INTERCEPT_TASK_SWITCH);
clr_intercept(svm, INTERCEPT_INVLPG);
clr_exception_intercept(svm, PF_VECTOR);
clr_cr_intercept(svm, INTERCEPT_CR3_READ);
......@@ -1844,6 +1843,20 @@ static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
return svm->nested.nested_cr3;
}
static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
{
struct vcpu_svm *svm = to_svm(vcpu);
u64 cr3 = svm->nested.nested_cr3;
u64 pdpte;
int ret;
ret = kvm_read_guest_page(vcpu->kvm, gpa_to_gfn(cr3), &pdpte,
offset_in_page(cr3) + index * 8, 8);
if (ret)
return 0;
return pdpte;
}
static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
unsigned long root)
{
......@@ -1875,6 +1888,7 @@ static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3;
vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3;
vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr;
vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
vcpu->arch.mmu.shadow_root_level = get_npt_level();
vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
......@@ -2182,7 +2196,8 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
vmcb->control.exit_info_1,
vmcb->control.exit_info_2,
vmcb->control.exit_int_info,
vmcb->control.exit_int_info_err);
vmcb->control.exit_int_info_err,
KVM_ISA_SVM);
nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page);
if (!nested_vmcb)
......@@ -2894,15 +2909,20 @@ static int cr8_write_interception(struct vcpu_svm *svm)
return 0;
}
u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu)
{
struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu));
return vmcb->control.tsc_offset +
svm_scale_tsc(vcpu, native_read_tsc());
}
static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
{
struct vcpu_svm *svm = to_svm(vcpu);
switch (ecx) {
case MSR_IA32_TSC: {
struct vmcb *vmcb = get_host_vmcb(svm);
*data = vmcb->control.tsc_offset +
*data = svm->vmcb->control.tsc_offset +
svm_scale_tsc(vcpu, native_read_tsc());
break;
......@@ -3314,8 +3334,6 @@ static int handle_exit(struct kvm_vcpu *vcpu)
struct kvm_run *kvm_run = vcpu->run;
u32 exit_code = svm->vmcb->control.exit_code;
trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
vcpu->arch.cr0 = svm->vmcb->save.cr0;
if (npt_enabled)
......@@ -3335,7 +3353,8 @@ static int handle_exit(struct kvm_vcpu *vcpu)
svm->vmcb->control.exit_info_1,
svm->vmcb->control.exit_info_2,
svm->vmcb->control.exit_int_info,
svm->vmcb->control.exit_int_info_err);
svm->vmcb->control.exit_int_info_err,
KVM_ISA_SVM);
vmexit = nested_svm_exit_special(svm);
......@@ -3768,6 +3787,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
trace_kvm_exit(svm->vmcb->control.exit_code, vcpu, KVM_ISA_SVM);
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
kvm_before_handle_nmi(&svm->vcpu);
......@@ -3897,60 +3918,6 @@ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
}
}
static const struct trace_print_flags svm_exit_reasons_str[] = {
{ SVM_EXIT_READ_CR0, "read_cr0" },
{ SVM_EXIT_READ_CR3, "read_cr3" },
{ SVM_EXIT_READ_CR4, "read_cr4" },
{ SVM_EXIT_READ_CR8, "read_cr8" },
{ SVM_EXIT_WRITE_CR0, "write_cr0" },
{ SVM_EXIT_WRITE_CR3, "write_cr3" },
{ SVM_EXIT_WRITE_CR4, "write_cr4" },
{ SVM_EXIT_WRITE_CR8, "write_cr8" },
{ SVM_EXIT_READ_DR0, "read_dr0" },
{ SVM_EXIT_READ_DR1, "read_dr1" },
{ SVM_EXIT_READ_DR2, "read_dr2" },
{ SVM_EXIT_READ_DR3, "read_dr3" },
{ SVM_EXIT_WRITE_DR0, "write_dr0" },
{ SVM_EXIT_WRITE_DR1, "write_dr1" },
{ SVM_EXIT_WRITE_DR2, "write_dr2" },
{ SVM_EXIT_WRITE_DR3, "write_dr3" },
{ SVM_EXIT_WRITE_DR5, "write_dr5" },
{ SVM_EXIT_WRITE_DR7, "write_dr7" },
{ SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" },
{ SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" },
{ SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" },
{ SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" },
{ SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" },
{ SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" },
{ SVM_EXIT_INTR, "interrupt" },
{ SVM_EXIT_NMI, "nmi" },
{ SVM_EXIT_SMI, "smi" },
{ SVM_EXIT_INIT, "init" },
{ SVM_EXIT_VINTR, "vintr" },
{ SVM_EXIT_CPUID, "cpuid" },
{ SVM_EXIT_INVD, "invd" },
{ SVM_EXIT_HLT, "hlt" },
{ SVM_EXIT_INVLPG, "invlpg" },
{ SVM_EXIT_INVLPGA, "invlpga" },
{ SVM_EXIT_IOIO, "io" },
{ SVM_EXIT_MSR, "msr" },
{ SVM_EXIT_TASK_SWITCH, "task_switch" },
{ SVM_EXIT_SHUTDOWN, "shutdown" },
{ SVM_EXIT_VMRUN, "vmrun" },
{ SVM_EXIT_VMMCALL, "hypercall" },
{ SVM_EXIT_VMLOAD, "vmload" },
{ SVM_EXIT_VMSAVE, "vmsave" },
{ SVM_EXIT_STGI, "stgi" },
{ SVM_EXIT_CLGI, "clgi" },
{ SVM_EXIT_SKINIT, "skinit" },
{ SVM_EXIT_WBINVD, "wbinvd" },
{ SVM_EXIT_MONITOR, "monitor" },
{ SVM_EXIT_MWAIT, "mwait" },
{ SVM_EXIT_XSETBV, "xsetbv" },
{ SVM_EXIT_NPF, "npf" },
{ -1, NULL }
};
static int svm_get_lpage_level(void)
{
return PT_PDPE_LEVEL;
......@@ -4223,7 +4190,6 @@ static struct kvm_x86_ops svm_x86_ops = {
.get_mt_mask = svm_get_mt_mask,
.get_exit_info = svm_get_exit_info,
.exit_reasons_str = svm_exit_reasons_str,
.get_lpage_level = svm_get_lpage_level,
......@@ -4239,6 +4205,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.write_tsc_offset = svm_write_tsc_offset,
.adjust_tsc_offset = svm_adjust_tsc_offset,
.compute_tsc_offset = svm_compute_tsc_offset,
.read_l1_tsc = svm_read_l1_tsc,
.set_tdp_cr3 = set_tdp_cr3,
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -371,6 +371,7 @@ struct kvm_s390_psw {
#define KVM_S390_INT_VIRTIO 0xffff2603u
#define KVM_S390_INT_SERVICE 0xffff2401u
#define KVM_S390_INT_EMERGENCY 0xffff1201u
#define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u
struct kvm_s390_interrupt {
__u32 type;
......@@ -463,7 +464,7 @@ struct kvm_ppc_pvinfo {
#define KVM_CAP_VAPIC 6
#define KVM_CAP_EXT_CPUID 7
#define KVM_CAP_CLOCKSOURCE 8
#define KVM_CAP_NR_VCPUS 9 /* returns max vcpus per vm */
#define KVM_CAP_NR_VCPUS 9 /* returns recommended max vcpus per vm */
#define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */
#define KVM_CAP_PIT 11
#define KVM_CAP_NOP_IO_DELAY 12
......@@ -553,6 +554,9 @@ struct kvm_ppc_pvinfo {
#define KVM_CAP_SPAPR_TCE 63
#define KVM_CAP_PPC_SMT 64
#define KVM_CAP_PPC_RMA 65
#define KVM_CAP_MAX_VCPUS 66 /* returns max vcpus per vm */
#define KVM_CAP_PPC_HIOR 67
#define KVM_CAP_PPC_PAPR 68
#define KVM_CAP_S390_GMAP 71
#ifdef KVM_CAP_IRQ_ROUTING
......
......@@ -18,6 +18,7 @@
#include <linux/msi.h>
#include <linux/slab.h>
#include <linux/rcupdate.h>
#include <linux/ratelimit.h>
#include <asm/signal.h>
#include <linux/kvm.h>
......@@ -48,6 +49,7 @@
#define KVM_REQ_EVENT 11
#define KVM_REQ_APF_HALT 12
#define KVM_REQ_STEAL_UPDATE 13
#define KVM_REQ_NMI 14
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
......@@ -55,16 +57,16 @@ struct kvm;
struct kvm_vcpu;
extern struct kmem_cache *kvm_vcpu_cache;
/*
* It would be nice to use something smarter than a linear search, TBD...
* Thankfully we dont expect many devices to register (famous last words :),
* so until then it will suffice. At least its abstracted so we can change
* in one place.
*/
struct kvm_io_range {
gpa_t addr;
int len;
struct kvm_io_device *dev;
};
struct kvm_io_bus {
int dev_count;
#define NR_IOBUS_DEVS 200
struct kvm_io_device *devs[NR_IOBUS_DEVS];
#define NR_IOBUS_DEVS 300
struct kvm_io_range range[NR_IOBUS_DEVS];
};
enum kvm_bus {
......@@ -77,8 +79,8 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int len, const void *val);
int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len,
void *val);
int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
struct kvm_io_device *dev);
int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int len, struct kvm_io_device *dev);
int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
struct kvm_io_device *dev);
......@@ -256,8 +258,9 @@ struct kvm {
struct kvm_arch arch;
atomic_t users_count;
#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
struct kvm_coalesced_mmio_dev *coalesced_mmio_dev;
struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
spinlock_t ring_lock;
struct list_head coalesced_zones;
#endif
struct mutex irq_lock;
......@@ -281,11 +284,8 @@ struct kvm {
/* The guest did something we don't support. */
#define pr_unimpl(vcpu, fmt, ...) \
do { \
if (printk_ratelimit()) \
printk(KERN_ERR "kvm: %i: cpu%i " fmt, \
current->tgid, (vcpu)->vcpu_id , ## __VA_ARGS__); \
} while (0)
pr_err_ratelimited("kvm: %i: cpu%i " fmt, \
current->tgid, (vcpu)->vcpu_id , ## __VA_ARGS__)
#define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
#define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt)
......
This diff is collapsed.
This diff is collapsed.
......@@ -12,14 +12,13 @@
#ifdef CONFIG_KVM_MMIO
#define KVM_COALESCED_MMIO_ZONE_MAX 100
#include <linux/list.h>
struct kvm_coalesced_mmio_dev {
struct list_head list;
struct kvm_io_device dev;
struct kvm *kvm;
spinlock_t lock;
int nb_zones;
struct kvm_coalesced_mmio_zone zone[KVM_COALESCED_MMIO_ZONE_MAX];
struct kvm_coalesced_mmio_zone zone;
};
int kvm_coalesced_mmio_init(struct kvm *kvm);
......
......@@ -586,7 +586,8 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
kvm_iodevice_init(&p->dev, &ioeventfd_ops);
ret = kvm_io_bus_register_dev(kvm, bus_idx, &p->dev);
ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length,
&p->dev);
if (ret < 0)
goto unlock_fail;
......
......@@ -394,7 +394,8 @@ int kvm_ioapic_init(struct kvm *kvm)
kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
ioapic->kvm = kvm;
mutex_lock(&kvm->slots_lock);
ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, ioapic->base_address,
IOAPIC_MEM_LENGTH, &ioapic->dev);
mutex_unlock(&kvm->slots_lock);
if (ret < 0) {
kvm->arch.vioapic = NULL;
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment