Commit 3eebd233 authored by Rusty Russell's avatar Rusty Russell

lguest: handle traps on the "interrupt suppressed" iret instruction.

Lguest's "iret" is non-atomic, as it needs to restore the interrupt
state before the real iret (the guest can't actually suppress
interrupts).  For this reason, the host discards an interrupt if it
occurs in this (1-instruction) window.

We can do better, by emulating the iret execution, then immediately
setting up the interrupt handler.  In fact, we don't need to do much,
as emulating the iret and setting up th stack for the interrupt handler
basically cancel each other out.
Signed-off-by: default avatarRusty Russell <rusty@rustcorp.com.au>
parent 01266539
...@@ -56,21 +56,16 @@ static void push_guest_stack(struct lg_cpu *cpu, unsigned long *gstack, u32 val) ...@@ -56,21 +56,16 @@ static void push_guest_stack(struct lg_cpu *cpu, unsigned long *gstack, u32 val)
} }
/*H:210 /*H:210
* The set_guest_interrupt() routine actually delivers the interrupt or * The push_guest_interrupt_stack() routine saves Guest state on the stack for
* trap. The mechanics of delivering traps and interrupts to the Guest are the * an interrupt or trap. The mechanics of delivering traps and interrupts to
* same, except some traps have an "error code" which gets pushed onto the * the Guest are the same, except some traps have an "error code" which gets
* stack as well: the caller tells us if this is one. * pushed onto the stack as well: the caller tells us if this is one.
*
* "lo" and "hi" are the two parts of the Interrupt Descriptor Table for this
* interrupt or trap. It's split into two parts for traditional reasons: gcc
* on i386 used to be frightened by 64 bit numbers.
* *
* We set up the stack just like the CPU does for a real interrupt, so it's * We set up the stack just like the CPU does for a real interrupt, so it's
* identical for the Guest (and the standard "iret" instruction will undo * identical for the Guest (and the standard "iret" instruction will undo
* it). * it).
*/ */
static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, static void push_guest_interrupt_stack(struct lg_cpu *cpu, bool has_err)
bool has_err)
{ {
unsigned long gstack, origstack; unsigned long gstack, origstack;
u32 eflags, ss, irq_enable; u32 eflags, ss, irq_enable;
...@@ -130,12 +125,28 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, ...@@ -130,12 +125,28 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi,
if (has_err) if (has_err)
push_guest_stack(cpu, &gstack, cpu->regs->errcode); push_guest_stack(cpu, &gstack, cpu->regs->errcode);
/* /* Adjust the stack pointer and stack segment. */
* Now we've pushed all the old state, we change the stack, the code
* segment and the address to execute.
*/
cpu->regs->ss = ss; cpu->regs->ss = ss;
cpu->regs->esp = virtstack + (gstack - origstack); cpu->regs->esp = virtstack + (gstack - origstack);
}
/*
* This actually makes the Guest start executing the given interrupt/trap
* handler.
*
* "lo" and "hi" are the two parts of the Interrupt Descriptor Table for this
* interrupt or trap. It's split into two parts for traditional reasons: gcc
* on i386 used to be frightened by 64 bit numbers.
*/
static void guest_run_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi)
{
/* If we're already in the kernel, we don't change stacks. */
if ((cpu->regs->ss&0x3) != GUEST_PL)
cpu->regs->ss = cpu->esp1;
/*
* Set the code segment and the address to execute.
*/
cpu->regs->cs = (__KERNEL_CS|GUEST_PL); cpu->regs->cs = (__KERNEL_CS|GUEST_PL);
cpu->regs->eip = idt_address(lo, hi); cpu->regs->eip = idt_address(lo, hi);
...@@ -158,6 +169,24 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, ...@@ -158,6 +169,24 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi,
kill_guest(cpu, "Disabling interrupts"); kill_guest(cpu, "Disabling interrupts");
} }
/* This restores the eflags word which was pushed on the stack by a trap */
static void restore_eflags(struct lg_cpu *cpu)
{
/* This is the physical address of the stack. */
unsigned long stack_pa = guest_pa(cpu, cpu->regs->esp);
/*
* Stack looks like this:
* Address Contents
* esp EIP
* esp + 4 CS
* esp + 8 EFLAGS
*/
cpu->regs->eflags = lgread(cpu, stack_pa + 8, u32);
cpu->regs->eflags &=
~(X86_EFLAGS_TF|X86_EFLAGS_VM|X86_EFLAGS_RF|X86_EFLAGS_NT);
}
/*H:205 /*H:205
* Virtual Interrupts. * Virtual Interrupts.
* *
...@@ -200,13 +229,6 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more) ...@@ -200,13 +229,6 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more)
BUG_ON(irq >= LGUEST_IRQS); BUG_ON(irq >= LGUEST_IRQS);
/*
* They may be in the middle of an iret, where they asked us never to
* deliver interrupts.
*/
if (cpu->regs->eip == cpu->lg->noirq_iret)
return;
/* If they're halted, interrupts restart them. */ /* If they're halted, interrupts restart them. */
if (cpu->halted) { if (cpu->halted) {
/* Re-enable interrupts. */ /* Re-enable interrupts. */
...@@ -236,12 +258,34 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more) ...@@ -236,12 +258,34 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more)
if (idt_present(idt->a, idt->b)) { if (idt_present(idt->a, idt->b)) {
/* OK, mark it no longer pending and deliver it. */ /* OK, mark it no longer pending and deliver it. */
clear_bit(irq, cpu->irqs_pending); clear_bit(irq, cpu->irqs_pending);
/* /*
* set_guest_interrupt() takes the interrupt descriptor and a * They may be about to iret, where they asked us never to
* flag to say whether this interrupt pushes an error code onto * deliver interrupts. In this case, we can emulate that iret
* the stack as well: virtual interrupts never do. * then immediately deliver the interrupt. This is basically
* a noop: the iret would pop the interrupt frame and restore
* eflags, and then we'd set it up again. So just restore the
* eflags word and jump straight to the handler in this case.
*
* Denys Vlasenko points out that this isn't quite right: if
* the iret was returning to userspace, then that interrupt
* would reset the stack pointer (which the Guest told us
* about via LHCALL_SET_STACK). But unless the Guest is being
* *really* weird, that will be the same as the current stack
* anyway.
*/ */
set_guest_interrupt(cpu, idt->a, idt->b, false); if (cpu->regs->eip == cpu->lg->noirq_iret) {
restore_eflags(cpu);
} else {
/*
* set_guest_interrupt() takes a flag to say whether
* this interrupt pushes an error code onto the stack
* as well: virtual interrupts never do.
*/
push_guest_interrupt_stack(cpu, false);
}
/* Actually make Guest cpu jump to handler. */
guest_run_interrupt(cpu, idt->a, idt->b);
} }
/* /*
...@@ -352,8 +396,9 @@ bool deliver_trap(struct lg_cpu *cpu, unsigned int num) ...@@ -352,8 +396,9 @@ bool deliver_trap(struct lg_cpu *cpu, unsigned int num)
*/ */
if (!idt_present(cpu->arch.idt[num].a, cpu->arch.idt[num].b)) if (!idt_present(cpu->arch.idt[num].a, cpu->arch.idt[num].b))
return false; return false;
set_guest_interrupt(cpu, cpu->arch.idt[num].a, push_guest_interrupt_stack(cpu, has_err(num));
cpu->arch.idt[num].b, has_err(num)); guest_run_interrupt(cpu, cpu->arch.idt[num].a,
cpu->arch.idt[num].b);
return true; return true;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment