Commit f78146b0 authored by Avi Kivity's avatar Avi Kivity Committed by Marcelo Tosatti

KVM: Fix page-crossing MMIO

MMIO that are split across a page boundary are currently broken - the
code does not expect to be aborted by the exit to userspace for the
first MMIO fragment.

This patch fixes the problem by generalizing the current code for handling
16-byte MMIOs to handle a number of "fragments", and changes the MMIO
code to create those fragments.
Signed-off-by: default avatarAvi Kivity <avi@redhat.com>
Signed-off-by: default avatarMarcelo Tosatti <mtosatti@redhat.com>
parent eac05567
...@@ -449,6 +449,8 @@ struct kvm_vcpu_arch { ...@@ -449,6 +449,8 @@ struct kvm_vcpu_arch {
char log_buf[VMM_LOG_LEN]; char log_buf[VMM_LOG_LEN];
union context host; union context host;
union context guest; union context guest;
char mmio_data[8];
}; };
struct kvm_vm_stat { struct kvm_vm_stat {
......
...@@ -232,12 +232,12 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ...@@ -232,12 +232,12 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS) if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS)
goto mmio; goto mmio;
vcpu->mmio_needed = 1; vcpu->mmio_needed = 1;
vcpu->mmio_phys_addr = kvm_run->mmio.phys_addr = p->addr; vcpu->mmio_fragments[0].gpa = kvm_run->mmio.phys_addr = p->addr;
vcpu->mmio_size = kvm_run->mmio.len = p->size; vcpu->mmio_fragments[0].len = kvm_run->mmio.len = p->size;
vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir; vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir;
if (vcpu->mmio_is_write) if (vcpu->mmio_is_write)
memcpy(vcpu->mmio_data, &p->data, p->size); memcpy(vcpu->arch.mmio_data, &p->data, p->size);
memcpy(kvm_run->mmio.data, &p->data, p->size); memcpy(kvm_run->mmio.data, &p->data, p->size);
kvm_run->exit_reason = KVM_EXIT_MMIO; kvm_run->exit_reason = KVM_EXIT_MMIO;
return 0; return 0;
...@@ -719,7 +719,7 @@ static void kvm_set_mmio_data(struct kvm_vcpu *vcpu) ...@@ -719,7 +719,7 @@ static void kvm_set_mmio_data(struct kvm_vcpu *vcpu)
struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu); struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu);
if (!vcpu->mmio_is_write) if (!vcpu->mmio_is_write)
memcpy(&p->data, vcpu->mmio_data, 8); memcpy(&p->data, vcpu->arch.mmio_data, 8);
p->state = STATE_IORESP_READY; p->state = STATE_IORESP_READY;
} }
...@@ -739,7 +739,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ...@@ -739,7 +739,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
} }
if (vcpu->mmio_needed) { if (vcpu->mmio_needed) {
memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); memcpy(vcpu->arch.mmio_data, kvm_run->mmio.data, 8);
kvm_set_mmio_data(vcpu); kvm_set_mmio_data(vcpu);
vcpu->mmio_read_completed = 1; vcpu->mmio_read_completed = 1;
vcpu->mmio_needed = 0; vcpu->mmio_needed = 0;
......
...@@ -3718,9 +3718,8 @@ struct read_write_emulator_ops { ...@@ -3718,9 +3718,8 @@ struct read_write_emulator_ops {
static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes) static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
{ {
if (vcpu->mmio_read_completed) { if (vcpu->mmio_read_completed) {
memcpy(val, vcpu->mmio_data, bytes);
trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
vcpu->mmio_phys_addr, *(u64 *)val); vcpu->mmio_fragments[0].gpa, *(u64 *)val);
vcpu->mmio_read_completed = 0; vcpu->mmio_read_completed = 0;
return 1; return 1;
} }
...@@ -3756,8 +3755,9 @@ static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, ...@@ -3756,8 +3755,9 @@ static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
void *val, int bytes) void *val, int bytes)
{ {
memcpy(vcpu->mmio_data, val, bytes); struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8);
memcpy(vcpu->run->mmio.data, frag->data, frag->len);
return X86EMUL_CONTINUE; return X86EMUL_CONTINUE;
} }
...@@ -3784,10 +3784,7 @@ static int emulator_read_write_onepage(unsigned long addr, void *val, ...@@ -3784,10 +3784,7 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
gpa_t gpa; gpa_t gpa;
int handled, ret; int handled, ret;
bool write = ops->write; bool write = ops->write;
struct kvm_mmio_fragment *frag;
if (ops->read_write_prepare &&
ops->read_write_prepare(vcpu, val, bytes))
return X86EMUL_CONTINUE;
ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write); ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
...@@ -3813,15 +3810,19 @@ static int emulator_read_write_onepage(unsigned long addr, void *val, ...@@ -3813,15 +3810,19 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
bytes -= handled; bytes -= handled;
val += handled; val += handled;
vcpu->mmio_needed = 1; while (bytes) {
vcpu->run->exit_reason = KVM_EXIT_MMIO; unsigned now = min(bytes, 8U);
vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
vcpu->mmio_size = bytes;
vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
vcpu->run->mmio.is_write = vcpu->mmio_is_write = write;
vcpu->mmio_index = 0;
return ops->read_write_exit_mmio(vcpu, gpa, val, bytes); frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
frag->gpa = gpa;
frag->data = val;
frag->len = now;
gpa += now;
val += now;
bytes -= now;
}
return X86EMUL_CONTINUE;
} }
int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr, int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
...@@ -3830,10 +3831,18 @@ int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr, ...@@ -3830,10 +3831,18 @@ int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
struct read_write_emulator_ops *ops) struct read_write_emulator_ops *ops)
{ {
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
gpa_t gpa;
int rc;
if (ops->read_write_prepare &&
ops->read_write_prepare(vcpu, val, bytes))
return X86EMUL_CONTINUE;
vcpu->mmio_nr_fragments = 0;
/* Crossing a page boundary? */ /* Crossing a page boundary? */
if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
int rc, now; int now;
now = -addr & ~PAGE_MASK; now = -addr & ~PAGE_MASK;
rc = emulator_read_write_onepage(addr, val, now, exception, rc = emulator_read_write_onepage(addr, val, now, exception,
...@@ -3846,8 +3855,25 @@ int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr, ...@@ -3846,8 +3855,25 @@ int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
bytes -= now; bytes -= now;
} }
return emulator_read_write_onepage(addr, val, bytes, exception, rc = emulator_read_write_onepage(addr, val, bytes, exception,
vcpu, ops); vcpu, ops);
if (rc != X86EMUL_CONTINUE)
return rc;
if (!vcpu->mmio_nr_fragments)
return rc;
gpa = vcpu->mmio_fragments[0].gpa;
vcpu->mmio_needed = 1;
vcpu->mmio_cur_fragment = 0;
vcpu->run->mmio.len = vcpu->mmio_fragments[0].len;
vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
vcpu->run->exit_reason = KVM_EXIT_MMIO;
vcpu->run->mmio.phys_addr = gpa;
return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
} }
static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt, static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
...@@ -5446,33 +5472,55 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) ...@@ -5446,33 +5472,55 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
return r; return r;
} }
/*
* Implements the following, as a state machine:
*
* read:
* for each fragment
* write gpa, len
* exit
* copy data
* execute insn
*
* write:
* for each fragment
* write gpa, len
* copy data
* exit
*/
static int complete_mmio(struct kvm_vcpu *vcpu) static int complete_mmio(struct kvm_vcpu *vcpu)
{ {
struct kvm_run *run = vcpu->run; struct kvm_run *run = vcpu->run;
struct kvm_mmio_fragment *frag;
int r; int r;
if (!(vcpu->arch.pio.count || vcpu->mmio_needed)) if (!(vcpu->arch.pio.count || vcpu->mmio_needed))
return 1; return 1;
if (vcpu->mmio_needed) { if (vcpu->mmio_needed) {
vcpu->mmio_needed = 0; /* Complete previous fragment */
frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++];
if (!vcpu->mmio_is_write) if (!vcpu->mmio_is_write)
memcpy(vcpu->mmio_data + vcpu->mmio_index, memcpy(frag->data, run->mmio.data, frag->len);
run->mmio.data, 8); if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
vcpu->mmio_index += 8; vcpu->mmio_needed = 0;
if (vcpu->mmio_index < vcpu->mmio_size) { if (vcpu->mmio_is_write)
run->exit_reason = KVM_EXIT_MMIO; return 1;
run->mmio.phys_addr = vcpu->mmio_phys_addr + vcpu->mmio_index; vcpu->mmio_read_completed = 1;
memcpy(run->mmio.data, vcpu->mmio_data + vcpu->mmio_index, 8); goto done;
run->mmio.len = min(vcpu->mmio_size - vcpu->mmio_index, 8);
run->mmio.is_write = vcpu->mmio_is_write;
vcpu->mmio_needed = 1;
return 0;
} }
/* Initiate next fragment */
++frag;
run->exit_reason = KVM_EXIT_MMIO;
run->mmio.phys_addr = frag->gpa;
if (vcpu->mmio_is_write) if (vcpu->mmio_is_write)
return 1; memcpy(run->mmio.data, frag->data, frag->len);
vcpu->mmio_read_completed = 1; run->mmio.len = frag->len;
run->mmio.is_write = vcpu->mmio_is_write;
return 0;
} }
done:
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
......
...@@ -34,6 +34,20 @@ ...@@ -34,6 +34,20 @@
#define KVM_MMIO_SIZE 8 #define KVM_MMIO_SIZE 8
#endif #endif
/*
* If we support unaligned MMIO, at most one fragment will be split into two:
*/
#ifdef KVM_UNALIGNED_MMIO
# define KVM_EXTRA_MMIO_FRAGMENTS 1
#else
# define KVM_EXTRA_MMIO_FRAGMENTS 0
#endif
#define KVM_USER_MMIO_SIZE 8
#define KVM_MAX_MMIO_FRAGMENTS \
(KVM_MMIO_SIZE / KVM_USER_MMIO_SIZE + KVM_EXTRA_MMIO_FRAGMENTS)
/* /*
* vcpu->requests bit members * vcpu->requests bit members
*/ */
...@@ -117,6 +131,16 @@ enum { ...@@ -117,6 +131,16 @@ enum {
EXITING_GUEST_MODE EXITING_GUEST_MODE
}; };
/*
* Sometimes a large or cross-page mmio needs to be broken up into separate
* exits for userspace servicing.
*/
struct kvm_mmio_fragment {
gpa_t gpa;
void *data;
unsigned len;
};
struct kvm_vcpu { struct kvm_vcpu {
struct kvm *kvm; struct kvm *kvm;
#ifdef CONFIG_PREEMPT_NOTIFIERS #ifdef CONFIG_PREEMPT_NOTIFIERS
...@@ -144,10 +168,9 @@ struct kvm_vcpu { ...@@ -144,10 +168,9 @@ struct kvm_vcpu {
int mmio_needed; int mmio_needed;
int mmio_read_completed; int mmio_read_completed;
int mmio_is_write; int mmio_is_write;
int mmio_size; int mmio_cur_fragment;
int mmio_index; int mmio_nr_fragments;
unsigned char mmio_data[KVM_MMIO_SIZE]; struct kvm_mmio_fragment mmio_fragments[KVM_MAX_MMIO_FRAGMENTS];
gpa_t mmio_phys_addr;
#endif #endif
#ifdef CONFIG_KVM_ASYNC_PF #ifdef CONFIG_KVM_ASYNC_PF
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment