Commit b3356bf0 authored by Gleb Natapov's avatar Gleb Natapov Committed by Avi Kivity

KVM: emulator: optimize "rep ins" handling

Optimize "rep ins" by allowing emulator to write back more than one
datum at a time. Introduce new operand type OP_MEM_STR which tells
writeback() that dst contains pointer to an array that should be written
back as opposite to just one data element.
Signed-off-by: default avatarGleb Natapov <gleb@redhat.com>
Signed-off-by: default avatarAvi Kivity <avi@redhat.com>
parent f3bd64c6
...@@ -213,8 +213,9 @@ typedef u32 __attribute__((vector_size(16))) sse128_t; ...@@ -213,8 +213,9 @@ typedef u32 __attribute__((vector_size(16))) sse128_t;
/* Type, address-of, and value of an instruction's operand. */ /* Type, address-of, and value of an instruction's operand. */
struct operand { struct operand {
enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_MM, OP_NONE } type; enum { OP_REG, OP_MEM, OP_MEM_STR, OP_IMM, OP_XMM, OP_MM, OP_NONE } type;
unsigned int bytes; unsigned int bytes;
unsigned int count;
union { union {
unsigned long orig_val; unsigned long orig_val;
u64 orig_val64; u64 orig_val64;
...@@ -234,6 +235,7 @@ struct operand { ...@@ -234,6 +235,7 @@ struct operand {
char valptr[sizeof(unsigned long) + 2]; char valptr[sizeof(unsigned long) + 2];
sse128_t vec_val; sse128_t vec_val;
u64 mm_val; u64 mm_val;
void *data;
}; };
}; };
......
...@@ -1301,8 +1301,15 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, ...@@ -1301,8 +1301,15 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
rc->end = n * size; rc->end = n * size;
} }
memcpy(dest, rc->data + rc->pos, size); if (ctxt->rep_prefix && !(ctxt->eflags & EFLG_DF)) {
rc->pos += size; ctxt->dst.data = rc->data + rc->pos;
ctxt->dst.type = OP_MEM_STR;
ctxt->dst.count = (rc->end - rc->pos) / size;
rc->pos = rc->end;
} else {
memcpy(dest, rc->data + rc->pos, size);
rc->pos += size;
}
return 1; return 1;
} }
...@@ -1546,6 +1553,14 @@ static int writeback(struct x86_emulate_ctxt *ctxt) ...@@ -1546,6 +1553,14 @@ static int writeback(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE) if (rc != X86EMUL_CONTINUE)
return rc; return rc;
break; break;
case OP_MEM_STR:
rc = segmented_write(ctxt,
ctxt->dst.addr.mem,
ctxt->dst.data,
ctxt->dst.bytes * ctxt->dst.count);
if (rc != X86EMUL_CONTINUE)
return rc;
break;
case OP_XMM: case OP_XMM:
write_sse_reg(ctxt, &ctxt->dst.vec_val, ctxt->dst.addr.xmm); write_sse_reg(ctxt, &ctxt->dst.vec_val, ctxt->dst.addr.xmm);
break; break;
...@@ -2793,7 +2808,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, ...@@ -2793,7 +2808,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg, static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
struct operand *op) struct operand *op)
{ {
int df = (ctxt->eflags & EFLG_DF) ? -1 : 1; int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count;
register_address_increment(ctxt, reg_rmw(ctxt, reg), df * op->bytes); register_address_increment(ctxt, reg_rmw(ctxt, reg), df * op->bytes);
op->addr.mem.ea = register_address(ctxt, reg_read(ctxt, reg)); op->addr.mem.ea = register_address(ctxt, reg_read(ctxt, reg));
...@@ -3733,7 +3748,7 @@ static const struct opcode opcode_table[256] = { ...@@ -3733,7 +3748,7 @@ static const struct opcode opcode_table[256] = {
I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op), I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
I(SrcImmByte | Mov | Stack, em_push), I(SrcImmByte | Mov | Stack, em_push),
I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op), I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
I2bvIP(DstDI | SrcDX | Mov | String, em_in, ins, check_perm_in), /* insb, insw/insd */ I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */ I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
/* 0x70 - 0x7F */ /* 0x70 - 0x7F */
X16(D(SrcImmByte)), X16(D(SrcImmByte)),
...@@ -3991,6 +4006,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, ...@@ -3991,6 +4006,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
register_address(ctxt, reg_read(ctxt, VCPU_REGS_RDI)); register_address(ctxt, reg_read(ctxt, VCPU_REGS_RDI));
op->addr.mem.seg = VCPU_SREG_ES; op->addr.mem.seg = VCPU_SREG_ES;
op->val = 0; op->val = 0;
op->count = 1;
break; break;
case OpDX: case OpDX:
op->type = OP_REG; op->type = OP_REG;
...@@ -4034,6 +4050,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, ...@@ -4034,6 +4050,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI)); register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI));
op->addr.mem.seg = seg_override(ctxt); op->addr.mem.seg = seg_override(ctxt);
op->val = 0; op->val = 0;
op->count = 1;
break; break;
case OpImmFAddr: case OpImmFAddr:
op->type = OP_IMM; op->type = OP_IMM;
...@@ -4575,8 +4592,14 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) ...@@ -4575,8 +4592,14 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst); string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
if (ctxt->rep_prefix && (ctxt->d & String)) { if (ctxt->rep_prefix && (ctxt->d & String)) {
unsigned int count;
struct read_cache *r = &ctxt->io_read; struct read_cache *r = &ctxt->io_read;
register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1); if ((ctxt->d & SrcMask) == SrcSI)
count = ctxt->src.count;
else
count = ctxt->dst.count;
register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX),
-count);
if (!string_insn_completed(ctxt)) { if (!string_insn_completed(ctxt)) {
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment