Commit b5c9ff73 authored by Takuya Yoshikawa's avatar Takuya Yoshikawa Committed by Avi Kivity

KVM: x86 emulator: Avoid clearing the whole decode_cache

During tracing the emulator, we noticed that init_emulate_ctxt()
sometimes took a bit longer time than we expected.

This patch is for mitigating the problem by some degree.

By looking into the function, we soon notice that it clears the whole
decode_cache whose size is about 2.5K bytes now.  Furthermore, most of
the bytes are taken for the two read_cache arrays, which are used only
by a few instructions.

Considering the fact that we are not assuming the cache arrays have
been cleared when we store actual data, we do not need to clear the
arrays: 2K bytes elimination.  In addition, we can avoid clearing the
fetch_cache and regs arrays.

This patch changes the initialization not to clear the arrays.

On our 64-bit host, init_emulate_ctxt() becomes 0.3 to 0.5us faster with
this patch applied.
Signed-off-by: default avatarTakuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Cc: Gleb Natapov <gleb@redhat.com>
Signed-off-by: default avatarAvi Kivity <avi@redhat.com>
parent adf52235
...@@ -246,8 +246,6 @@ struct decode_cache { ...@@ -246,8 +246,6 @@ struct decode_cache {
unsigned int d; unsigned int d;
int (*execute)(struct x86_emulate_ctxt *ctxt); int (*execute)(struct x86_emulate_ctxt *ctxt);
int (*check_perm)(struct x86_emulate_ctxt *ctxt); int (*check_perm)(struct x86_emulate_ctxt *ctxt);
unsigned long regs[NR_VCPU_REGS];
unsigned long eip;
/* modrm */ /* modrm */
u8 modrm; u8 modrm;
u8 modrm_mod; u8 modrm_mod;
...@@ -255,6 +253,9 @@ struct decode_cache { ...@@ -255,6 +253,9 @@ struct decode_cache {
u8 modrm_rm; u8 modrm_rm;
u8 modrm_seg; u8 modrm_seg;
bool rip_relative; bool rip_relative;
unsigned long eip;
/* Fields above regs are cleared together. */
unsigned long regs[NR_VCPU_REGS];
struct fetch_cache fetch; struct fetch_cache fetch;
struct read_cache io_read; struct read_cache io_read;
struct read_cache mem_read; struct read_cache mem_read;
......
...@@ -4506,6 +4506,20 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu) ...@@ -4506,6 +4506,20 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu)
kvm_queue_exception(vcpu, ctxt->exception.vector); kvm_queue_exception(vcpu, ctxt->exception.vector);
} }
static void init_decode_cache(struct decode_cache *c,
const unsigned long *regs)
{
memset(c, 0, offsetof(struct decode_cache, regs));
memcpy(c->regs, regs, sizeof(c->regs));
c->fetch.start = 0;
c->fetch.end = 0;
c->io_read.pos = 0;
c->io_read.end = 0;
c->mem_read.pos = 0;
c->mem_read.end = 0;
}
static void init_emulate_ctxt(struct kvm_vcpu *vcpu) static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
{ {
struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
...@@ -4531,8 +4545,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) ...@@ -4531,8 +4545,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
X86EMUL_MODE_PROT16; X86EMUL_MODE_PROT16;
ctxt->guest_mode = is_guest_mode(vcpu); ctxt->guest_mode = is_guest_mode(vcpu);
memset(c, 0, sizeof(struct decode_cache)); init_decode_cache(c, vcpu->arch.regs);
memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
vcpu->arch.emulate_regs_need_sync_from_vcpu = false; vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment