Commit e20bbd3d authored by Aravinda Prasad's avatar Aravinda Prasad Committed by Paul Mackerras

KVM: PPC: Book3S HV: Exit guest upon MCE when FWNMI capability is enabled

Enhance KVM to cause a guest exit with KVM_EXIT_NMI
exit reason upon a machine check exception (MCE) in
the guest address space if the KVM_CAP_PPC_FWNMI
capability is enabled (instead of delivering a 0x200
interrupt to guest). This enables QEMU to build error
log and deliver machine check exception to guest via
guest registered machine check handler.

This approach simplifies the delivery of machine
check exception to guest OS compared to the earlier
approach of KVM directly invoking 0x200 guest interrupt
vector.

This design/approach is based on the feedback for the
QEMU patches to handle machine check exception. Details
of earlier approach of handling machine check exception
in QEMU and related discussions can be found at:

https://lists.nongnu.org/archive/html/qemu-devel/2014-11/msg00813.html

Note:

This patch now directly invokes machine_check_print_event_info()
from kvmppc_handle_exit_hv() to print the event to host console
at the time of guest exit before the exception is passed on to the
guest. Hence, the host-side handling which was performed earlier
via machine_check_fwnmi is removed.

The reasons for this approach is (i) it is not possible
to distinguish whether the exception occurred in the
guest or the host from the pt_regs passed on the
machine_check_exception(). Hence machine_check_exception()
calls panic, instead of passing on the exception to
the guest, if the machine check exception is not
recoverable. (ii) the approach introduced in this
patch gives opportunity to the host kernel to perform
actions in virtual mode before passing on the exception
to the guest. This approach does not require complex
tweaks to machine_check_fwnmi and friends.
Signed-off-by: default avatarAravinda Prasad <aravinda@linux.vnet.ibm.com>
Reviewed-by: default avatarDavid Gibson <david@gibson.dropbear.id.au>
Signed-off-by: default avatarMahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: default avatarPaul Mackerras <paulus@ozlabs.org>
parent 8aa586c6
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#include <asm/page.h> #include <asm/page.h>
#include <asm/cacheflush.h> #include <asm/cacheflush.h>
#include <asm/hvcall.h> #include <asm/hvcall.h>
#include <asm/mce.h>
#define KVM_MAX_VCPUS NR_CPUS #define KVM_MAX_VCPUS NR_CPUS
#define KVM_MAX_VCORES NR_CPUS #define KVM_MAX_VCORES NR_CPUS
...@@ -727,6 +728,7 @@ struct kvm_vcpu_arch { ...@@ -727,6 +728,7 @@ struct kvm_vcpu_arch {
int prev_cpu; int prev_cpu;
bool timer_running; bool timer_running;
wait_queue_head_t cpu_run; wait_queue_head_t cpu_run;
struct machine_check_event mce_evt; /* Valid if trap == 0x200 */
struct kvm_vcpu_arch_shared *shared; struct kvm_vcpu_arch_shared *shared;
#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE) #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
......
...@@ -60,6 +60,12 @@ struct kvm_regs { ...@@ -60,6 +60,12 @@ struct kvm_regs {
#define KVM_SREGS_E_FSL_PIDn (1 << 0) /* PID1/PID2 */ #define KVM_SREGS_E_FSL_PIDn (1 << 0) /* PID1/PID2 */
/* flags for kvm_run.flags */
#define KVM_RUN_PPC_NMI_DISP_MASK (3 << 0)
#define KVM_RUN_PPC_NMI_DISP_FULLY_RECOV (1 << 0)
#define KVM_RUN_PPC_NMI_DISP_LIMITED_RECOV (2 << 0)
#define KVM_RUN_PPC_NMI_DISP_NOT_RECOV (3 << 0)
/* /*
* Feature bits indicate which sections of the sregs struct are valid, * Feature bits indicate which sections of the sregs struct are valid,
* both in KVM_GET_SREGS and KVM_SET_SREGS. On KVM_SET_SREGS, registers * both in KVM_GET_SREGS and KVM_SET_SREGS. On KVM_SET_SREGS, registers
......
...@@ -1088,15 +1088,20 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -1088,15 +1088,20 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = RESUME_GUEST; r = RESUME_GUEST;
break; break;
case BOOK3S_INTERRUPT_MACHINE_CHECK: case BOOK3S_INTERRUPT_MACHINE_CHECK:
/* /* Exit to guest with KVM_EXIT_NMI as exit reason */
* Deliver a machine check interrupt to the guest. run->exit_reason = KVM_EXIT_NMI;
* We have to do this, even if the host has handled the run->hw.hardware_exit_reason = vcpu->arch.trap;
* machine check, because machine checks use SRR0/1 and /* Clear out the old NMI status from run->flags */
* the interrupt might have trashed guest state in them. run->flags &= ~KVM_RUN_PPC_NMI_DISP_MASK;
*/ /* Now set the NMI status */
kvmppc_book3s_queue_irqprio(vcpu, if (vcpu->arch.mce_evt.disposition == MCE_DISPOSITION_RECOVERED)
BOOK3S_INTERRUPT_MACHINE_CHECK); run->flags |= KVM_RUN_PPC_NMI_DISP_FULLY_RECOV;
r = RESUME_GUEST; else
run->flags |= KVM_RUN_PPC_NMI_DISP_NOT_RECOV;
r = RESUME_HOST;
/* Print the MCE event to host console. */
machine_check_print_event_info(&vcpu->arch.mce_evt, false);
break; break;
case BOOK3S_INTERRUPT_PROGRAM: case BOOK3S_INTERRUPT_PROGRAM:
{ {
......
...@@ -130,11 +130,27 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) ...@@ -130,11 +130,27 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
out: out:
/* /*
* For guest that supports FWNMI capability, hook the MCE event into
* vcpu structure. We are going to exit the guest with KVM_EXIT_NMI
* exit reason. On our way to exit we will pull this event from vcpu
* structure and print it from thread 0 of the core/subcore.
*
* For guest that does not support FWNMI capability (old QEMU):
* We are now going enter guest either through machine check * We are now going enter guest either through machine check
* interrupt (for unhandled errors) or will continue from * interrupt (for unhandled errors) or will continue from
* current HSRR0 (for handled errors) in guest. Hence * current HSRR0 (for handled errors) in guest. Hence
* queue up the event so that we can log it from host console later. * queue up the event so that we can log it from host console later.
*/ */
if (vcpu->kvm->arch.fwnmi_enabled) {
/*
* Hook up the mce event on to vcpu structure.
* First clear the old event.
*/
memset(&vcpu->arch.mce_evt, 0, sizeof(vcpu->arch.mce_evt));
if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) {
vcpu->arch.mce_evt = mce_evt;
}
} else
machine_check_queue_event(); machine_check_queue_event();
return handled; return handled;
......
...@@ -153,15 +153,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ...@@ -153,15 +153,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
stb r0, HSTATE_HWTHREAD_REQ(r13) stb r0, HSTATE_HWTHREAD_REQ(r13)
/* /*
* For external and machine check interrupts, we need * For external interrupts we need to call the Linux
* to call the Linux handler to process the interrupt. * handler to process the interrupt. We do that by jumping
* We do that by jumping to absolute address 0x500 for * to absolute address 0x500 for external interrupts.
* external interrupts, or the machine_check_fwnmi label * The [h]rfid at the end of the handler will return to
* for machine checks (since firmware might have patched * the book3s_hv_interrupts.S code. For other interrupts
* the vector area at 0x200). The [h]rfid at the end of the * we do the rfid to get back to the book3s_hv_interrupts.S
* handler will return to the book3s_hv_interrupts.S code. * code here.
* For other interrupts we do the rfid to get back
* to the book3s_hv_interrupts.S code here.
*/ */
ld r8, 112+PPC_LR_STKOFF(r1) ld r8, 112+PPC_LR_STKOFF(r1)
addi r1, r1, 112 addi r1, r1, 112
...@@ -176,7 +174,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ...@@ -176,7 +174,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
andi. r0, r0, MSR_IR /* in real mode? */ andi. r0, r0, MSR_IR /* in real mode? */
bne .Lvirt_return bne .Lvirt_return
cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
beq 11f beq 11f
cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
...@@ -191,7 +188,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ...@@ -191,7 +188,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
mtmsrd r6, 1 /* Clear RI in MSR */ mtmsrd r6, 1 /* Clear RI in MSR */
mtsrr0 r8 mtsrr0 r8
mtsrr1 r7 mtsrr1 r7
beq cr1, 13f /* machine check */ /*
* BOOK3S_INTERRUPT_MACHINE_CHECK is handled at the
* time of guest exit
*/
RFI RFI
/* On POWER7, we have external interrupts set to use HSRR0/1 */ /* On POWER7, we have external interrupts set to use HSRR0/1 */
...@@ -199,8 +199,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ...@@ -199,8 +199,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
mtspr SPRN_HSRR1, r7 mtspr SPRN_HSRR1, r7
ba 0x500 ba 0x500
13: b machine_check_fwnmi
14: mtspr SPRN_HSRR0, r8 14: mtspr SPRN_HSRR0, r8
mtspr SPRN_HSRR1, r7 mtspr SPRN_HSRR1, r7
b hmi_exception_after_realmode b hmi_exception_after_realmode
...@@ -2640,22 +2638,32 @@ machine_check_realmode: ...@@ -2640,22 +2638,32 @@ machine_check_realmode:
ld r9, HSTATE_KVM_VCPU(r13) ld r9, HSTATE_KVM_VCPU(r13)
li r12, BOOK3S_INTERRUPT_MACHINE_CHECK li r12, BOOK3S_INTERRUPT_MACHINE_CHECK
/* /*
* Deliver unhandled/fatal (e.g. UE) MCE errors to guest through * For the guest that is FWNMI capable, deliver all the MCE errors
* machine check interrupt (set HSRR0 to 0x200). And for handled * (handled/unhandled) by exiting the guest with KVM_EXIT_NMI exit
* errors (no-fatal), just go back to guest execution with current * reason. This new approach injects machine check errors in guest
* HSRR0 instead of exiting guest. This new approach will inject * address space to guest with additional information in the form
* machine check to guest for fatal error causing guest to crash. * of RTAS event, thus enabling guest kernel to suitably handle
* * such errors.
* The old code used to return to host for unhandled errors which
* was causing guest to hang with soft lockups inside guest and
* makes it difficult to recover guest instance.
* *
* For the guest that is not FWNMI capable (old QEMU) fallback
* to old behaviour for backward compatibility:
* Deliver unhandled/fatal (e.g. UE) MCE errors to guest either
* through machine check interrupt (set HSRR0 to 0x200).
* For handled errors (no-fatal), just go back to guest execution
* with current HSRR0.
* if we receive machine check with MSR(RI=0) then deliver it to * if we receive machine check with MSR(RI=0) then deliver it to
* guest as machine check causing guest to crash. * guest as machine check causing guest to crash.
*/ */
ld r11, VCPU_MSR(r9) ld r11, VCPU_MSR(r9)
rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */ rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
bne mc_cont /* if so, exit to host */ bne mc_cont /* if so, exit to host */
/* Check if guest is capable of handling NMI exit */
ld r10, VCPU_KVM(r9)
lbz r10, KVM_FWNMI(r10)
cmpdi r10, 1 /* FWNMI capable? */
beq mc_cont /* if so, exit with KVM_EXIT_NMI. */
/* if not, fall through for backward compatibility. */
andi. r10, r11, MSR_RI /* check for unrecoverable exception */ andi. r10, r11, MSR_RI /* check for unrecoverable exception */
beq 1f /* Deliver a machine check to guest */ beq 1f /* Deliver a machine check to guest */
ld r10, VCPU_PC(r9) ld r10, VCPU_PC(r9)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment