Commit a8ebfcd3 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Paolo Bonzini:
 "RISC-V:
   - Fix missing PAGE_PFN_MASK

   - Fix SRCU deadlock caused by kvm_riscv_check_vcpu_requests()

  x86:
   - Fix for nested virtualization when TSC scaling is active

   - Estimate the size of fastcc subroutines conservatively, avoiding
     disastrous underestimation when return thunks are enabled

   - Avoid possible use of uninitialized fields of 'struct
     kvm_lapic_irq'

  Generic:
   - Mark as such the boolean values available from the statistics file
     descriptors

   - Clarify statistics documentation"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: emulate: do not adjust size of fastop and setcc subroutines
  KVM: x86: Fully initialize 'struct kvm_lapic_irq' in kvm_pv_kick_cpu_op()
  Documentation: kvm: clarify histogram units
  kvm: stats: tell userspace which values are boolean
  x86/kvm: fix FASTOP_SIZE when return thunks are enabled
  KVM: nVMX: Always enable TSC scaling for L2 when it was enabled for L1
  RISC-V: KVM: Fix SRCU deadlock caused by kvm_riscv_check_vcpu_requests()
  riscv: Fix missing PAGE_PFN_MASK
parents 1ce9d792 79629181
...@@ -5657,6 +5657,7 @@ by a string of size ``name_size``. ...@@ -5657,6 +5657,7 @@ by a string of size ``name_size``.
#define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_BOOLEAN (0x4 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES #define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES
#define KVM_STATS_BASE_SHIFT 8 #define KVM_STATS_BASE_SHIFT 8
...@@ -5702,14 +5703,13 @@ Bits 0-3 of ``flags`` encode the type: ...@@ -5702,14 +5703,13 @@ Bits 0-3 of ``flags`` encode the type:
by the ``hist_param`` field. The range of the Nth bucket (1 <= N < ``size``) by the ``hist_param`` field. The range of the Nth bucket (1 <= N < ``size``)
is [``hist_param``*(N-1), ``hist_param``*N), while the range of the last is [``hist_param``*(N-1), ``hist_param``*N), while the range of the last
bucket is [``hist_param``*(``size``-1), +INF). (+INF means positive infinity bucket is [``hist_param``*(``size``-1), +INF). (+INF means positive infinity
value.) The bucket value indicates how many samples fell in the bucket's range. value.)
* ``KVM_STATS_TYPE_LOG_HIST`` * ``KVM_STATS_TYPE_LOG_HIST``
The statistic is reported as a logarithmic histogram. The number of The statistic is reported as a logarithmic histogram. The number of
buckets is specified by the ``size`` field. The range of the first bucket is buckets is specified by the ``size`` field. The range of the first bucket is
[0, 1), while the range of the last bucket is [pow(2, ``size``-2), +INF). [0, 1), while the range of the last bucket is [pow(2, ``size``-2), +INF).
Otherwise, The Nth bucket (1 < N < ``size``) covers Otherwise, The Nth bucket (1 < N < ``size``) covers
[pow(2, N-2), pow(2, N-1)). The bucket value indicates how many samples fell [pow(2, N-2), pow(2, N-1)).
in the bucket's range.
Bits 4-7 of ``flags`` encode the unit: Bits 4-7 of ``flags`` encode the unit:
...@@ -5724,6 +5724,15 @@ Bits 4-7 of ``flags`` encode the unit: ...@@ -5724,6 +5724,15 @@ Bits 4-7 of ``flags`` encode the unit:
It indicates that the statistics data is used to measure time or latency. It indicates that the statistics data is used to measure time or latency.
* ``KVM_STATS_UNIT_CYCLES`` * ``KVM_STATS_UNIT_CYCLES``
It indicates that the statistics data is used to measure CPU clock cycles. It indicates that the statistics data is used to measure CPU clock cycles.
* ``KVM_STATS_UNIT_BOOLEAN``
It indicates that the statistic will always be either 0 or 1. Boolean
statistics of "peak" type will never go back from 1 to 0. Boolean
statistics can be linear histograms (with two buckets) but not logarithmic
histograms.
Note that, in the case of histograms, the unit applies to the bucket
ranges, while the bucket value indicates how many samples fell in the
bucket's range.
Bits 8-11 of ``flags``, together with ``exponent``, encode the scale of the Bits 8-11 of ``flags``, together with ``exponent``, encode the scale of the
unit: unit:
...@@ -5746,7 +5755,7 @@ the corresponding statistics data. ...@@ -5746,7 +5755,7 @@ the corresponding statistics data.
The ``bucket_size`` field is used as a parameter for histogram statistics data. The ``bucket_size`` field is used as a parameter for histogram statistics data.
It is only used by linear histogram statistics data, specifying the size of a It is only used by linear histogram statistics data, specifying the size of a
bucket. bucket in the unit expressed by bits 4-11 of ``flags`` together with ``exponent``.
The ``name`` field is the name string of the statistics data. The name string The ``name`` field is the name string of the statistics data. The name string
starts at the end of ``struct kvm_stats_desc``. The maximum length including starts at the end of ``struct kvm_stats_desc``. The maximum length including
......
...@@ -175,7 +175,7 @@ static inline pud_t pfn_pud(unsigned long pfn, pgprot_t prot) ...@@ -175,7 +175,7 @@ static inline pud_t pfn_pud(unsigned long pfn, pgprot_t prot)
static inline unsigned long _pud_pfn(pud_t pud) static inline unsigned long _pud_pfn(pud_t pud)
{ {
return pud_val(pud) >> _PAGE_PFN_SHIFT; return __page_val_to_pfn(pud_val(pud));
} }
static inline pmd_t *pud_pgtable(pud_t pud) static inline pmd_t *pud_pgtable(pud_t pud)
...@@ -278,13 +278,13 @@ static inline p4d_t pfn_p4d(unsigned long pfn, pgprot_t prot) ...@@ -278,13 +278,13 @@ static inline p4d_t pfn_p4d(unsigned long pfn, pgprot_t prot)
static inline unsigned long _p4d_pfn(p4d_t p4d) static inline unsigned long _p4d_pfn(p4d_t p4d)
{ {
return p4d_val(p4d) >> _PAGE_PFN_SHIFT; return __page_val_to_pfn(p4d_val(p4d));
} }
static inline pud_t *p4d_pgtable(p4d_t p4d) static inline pud_t *p4d_pgtable(p4d_t p4d)
{ {
if (pgtable_l4_enabled) if (pgtable_l4_enabled)
return (pud_t *)pfn_to_virt(p4d_val(p4d) >> _PAGE_PFN_SHIFT); return (pud_t *)pfn_to_virt(__page_val_to_pfn(p4d_val(p4d)));
return (pud_t *)pud_pgtable((pud_t) { p4d_val(p4d) }); return (pud_t *)pud_pgtable((pud_t) { p4d_val(p4d) });
} }
...@@ -292,7 +292,7 @@ static inline pud_t *p4d_pgtable(p4d_t p4d) ...@@ -292,7 +292,7 @@ static inline pud_t *p4d_pgtable(p4d_t p4d)
static inline struct page *p4d_page(p4d_t p4d) static inline struct page *p4d_page(p4d_t p4d)
{ {
return pfn_to_page(p4d_val(p4d) >> _PAGE_PFN_SHIFT); return pfn_to_page(__page_val_to_pfn(p4d_val(p4d)));
} }
#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) #define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
...@@ -347,7 +347,7 @@ static inline void pgd_clear(pgd_t *pgd) ...@@ -347,7 +347,7 @@ static inline void pgd_clear(pgd_t *pgd)
static inline p4d_t *pgd_pgtable(pgd_t pgd) static inline p4d_t *pgd_pgtable(pgd_t pgd)
{ {
if (pgtable_l5_enabled) if (pgtable_l5_enabled)
return (p4d_t *)pfn_to_virt(pgd_val(pgd) >> _PAGE_PFN_SHIFT); return (p4d_t *)pfn_to_virt(__page_val_to_pfn(pgd_val(pgd)));
return (p4d_t *)p4d_pgtable((p4d_t) { pgd_val(pgd) }); return (p4d_t *)p4d_pgtable((p4d_t) { pgd_val(pgd) });
} }
...@@ -355,7 +355,7 @@ static inline p4d_t *pgd_pgtable(pgd_t pgd) ...@@ -355,7 +355,7 @@ static inline p4d_t *pgd_pgtable(pgd_t pgd)
static inline struct page *pgd_page(pgd_t pgd) static inline struct page *pgd_page(pgd_t pgd)
{ {
return pfn_to_page(pgd_val(pgd) >> _PAGE_PFN_SHIFT); return pfn_to_page(__page_val_to_pfn(pgd_val(pgd)));
} }
#define pgd_page(pgd) pgd_page(pgd) #define pgd_page(pgd) pgd_page(pgd)
......
...@@ -261,7 +261,7 @@ static inline pgd_t pfn_pgd(unsigned long pfn, pgprot_t prot) ...@@ -261,7 +261,7 @@ static inline pgd_t pfn_pgd(unsigned long pfn, pgprot_t prot)
static inline unsigned long _pgd_pfn(pgd_t pgd) static inline unsigned long _pgd_pfn(pgd_t pgd)
{ {
return pgd_val(pgd) >> _PAGE_PFN_SHIFT; return __page_val_to_pfn(pgd_val(pgd));
} }
static inline struct page *pmd_page(pmd_t pmd) static inline struct page *pmd_page(pmd_t pmd)
...@@ -590,14 +590,14 @@ static inline pmd_t pmd_mkinvalid(pmd_t pmd) ...@@ -590,14 +590,14 @@ static inline pmd_t pmd_mkinvalid(pmd_t pmd)
return __pmd(pmd_val(pmd) & ~(_PAGE_PRESENT|_PAGE_PROT_NONE)); return __pmd(pmd_val(pmd) & ~(_PAGE_PRESENT|_PAGE_PROT_NONE));
} }
#define __pmd_to_phys(pmd) (pmd_val(pmd) >> _PAGE_PFN_SHIFT << PAGE_SHIFT) #define __pmd_to_phys(pmd) (__page_val_to_pfn(pmd_val(pmd)) << PAGE_SHIFT)
static inline unsigned long pmd_pfn(pmd_t pmd) static inline unsigned long pmd_pfn(pmd_t pmd)
{ {
return ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT); return ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT);
} }
#define __pud_to_phys(pud) (pud_val(pud) >> _PAGE_PFN_SHIFT << PAGE_SHIFT) #define __pud_to_phys(pud) (__page_val_to_pfn(pud_val(pud)) << PAGE_SHIFT)
static inline unsigned long pud_pfn(pud_t pud) static inline unsigned long pud_pfn(pud_t pud)
{ {
......
...@@ -54,7 +54,7 @@ static inline unsigned long gstage_pte_index(gpa_t addr, u32 level) ...@@ -54,7 +54,7 @@ static inline unsigned long gstage_pte_index(gpa_t addr, u32 level)
static inline unsigned long gstage_pte_page_vaddr(pte_t pte) static inline unsigned long gstage_pte_page_vaddr(pte_t pte)
{ {
return (unsigned long)pfn_to_virt(pte_val(pte) >> _PAGE_PFN_SHIFT); return (unsigned long)pfn_to_virt(__page_val_to_pfn(pte_val(pte)));
} }
static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level) static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level)
......
...@@ -781,9 +781,11 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu) ...@@ -781,9 +781,11 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
if (kvm_request_pending(vcpu)) { if (kvm_request_pending(vcpu)) {
if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) { if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) {
kvm_vcpu_srcu_read_unlock(vcpu);
rcuwait_wait_event(wait, rcuwait_wait_event(wait,
(!vcpu->arch.power_off) && (!vcpu->arch.pause), (!vcpu->arch.power_off) && (!vcpu->arch.pause),
TASK_INTERRUPTIBLE); TASK_INTERRUPTIBLE);
kvm_vcpu_srcu_read_lock(vcpu);
if (vcpu->arch.power_off || vcpu->arch.pause) { if (vcpu->arch.power_off || vcpu->arch.pause) {
/* /*
......
...@@ -189,9 +189,6 @@ ...@@ -189,9 +189,6 @@
#define X8(x...) X4(x), X4(x) #define X8(x...) X4(x), X4(x)
#define X16(x...) X8(x), X8(x) #define X16(x...) X8(x), X8(x)
#define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
#define FASTOP_SIZE (8 * (1 + HAS_KERNEL_IBT))
struct opcode { struct opcode {
u64 flags; u64 flags;
u8 intercept; u8 intercept;
...@@ -306,9 +303,15 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) ...@@ -306,9 +303,15 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
* Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
* different operand sizes can be reached by calculation, rather than a jump * different operand sizes can be reached by calculation, rather than a jump
* table (which would be bigger than the code). * table (which would be bigger than the code).
*
* The 16 byte alignment, considering 5 bytes for the RET thunk, 3 for ENDBR
* and 1 for the straight line speculation INT3, leaves 7 bytes for the
* body of the function. Currently none is larger than 4.
*/ */
static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
#define FASTOP_SIZE 16
#define __FOP_FUNC(name) \ #define __FOP_FUNC(name) \
".align " __stringify(FASTOP_SIZE) " \n\t" \ ".align " __stringify(FASTOP_SIZE) " \n\t" \
".type " name ", @function \n\t" \ ".type " name ", @function \n\t" \
...@@ -442,11 +445,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); ...@@ -442,11 +445,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
* RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETHUNK] * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETHUNK]
* INT3 [1 byte; CONFIG_SLS] * INT3 [1 byte; CONFIG_SLS]
*/ */
#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETHUNK)) + \ #define SETCC_ALIGN 16
IS_ENABLED(CONFIG_SLS))
#define SETCC_LENGTH (ENDBR_INSN_SIZE + 3 + RET_LENGTH)
#define SETCC_ALIGN (4 << ((SETCC_LENGTH > 4) & 1) << ((SETCC_LENGTH > 8) & 1))
static_assert(SETCC_LENGTH <= SETCC_ALIGN);
#define FOP_SETCC(op) \ #define FOP_SETCC(op) \
".align " __stringify(SETCC_ALIGN) " \n\t" \ ".align " __stringify(SETCC_ALIGN) " \n\t" \
......
...@@ -2278,7 +2278,6 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0 ...@@ -2278,7 +2278,6 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_ENABLE_VMFUNC | SECONDARY_EXEC_ENABLE_VMFUNC |
SECONDARY_EXEC_TSC_SCALING |
SECONDARY_EXEC_DESC); SECONDARY_EXEC_DESC);
if (nested_cpu_has(vmcs12, if (nested_cpu_has(vmcs12,
......
...@@ -298,7 +298,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { ...@@ -298,7 +298,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
STATS_DESC_COUNTER(VCPU, directed_yield_successful), STATS_DESC_COUNTER(VCPU, directed_yield_successful),
STATS_DESC_COUNTER(VCPU, preemption_reported), STATS_DESC_COUNTER(VCPU, preemption_reported),
STATS_DESC_COUNTER(VCPU, preemption_other), STATS_DESC_COUNTER(VCPU, preemption_other),
STATS_DESC_ICOUNTER(VCPU, guest_mode) STATS_DESC_IBOOLEAN(VCPU, guest_mode)
}; };
const struct kvm_stats_header kvm_vcpu_stats_header = { const struct kvm_stats_header kvm_vcpu_stats_header = {
...@@ -9143,15 +9143,17 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, ...@@ -9143,15 +9143,17 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
*/ */
static void kvm_pv_kick_cpu_op(struct kvm *kvm, int apicid) static void kvm_pv_kick_cpu_op(struct kvm *kvm, int apicid)
{ {
struct kvm_lapic_irq lapic_irq; /*
* All other fields are unused for APIC_DM_REMRD, but may be consumed by
lapic_irq.shorthand = APIC_DEST_NOSHORT; * common code, e.g. for tracing. Defer initialization to the compiler.
lapic_irq.dest_mode = APIC_DEST_PHYSICAL; */
lapic_irq.level = 0; struct kvm_lapic_irq lapic_irq = {
lapic_irq.dest_id = apicid; .delivery_mode = APIC_DM_REMRD,
lapic_irq.msi_redir_hint = false; .dest_mode = APIC_DEST_PHYSICAL,
.shorthand = APIC_DEST_NOSHORT,
.dest_id = apicid,
};
lapic_irq.delivery_mode = APIC_DM_REMRD;
kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
} }
......
...@@ -1822,6 +1822,15 @@ struct _kvm_stats_desc { ...@@ -1822,6 +1822,15 @@ struct _kvm_stats_desc {
STATS_DESC_PEAK(SCOPE, name, KVM_STATS_UNIT_NONE, \ STATS_DESC_PEAK(SCOPE, name, KVM_STATS_UNIT_NONE, \
KVM_STATS_BASE_POW10, 0) KVM_STATS_BASE_POW10, 0)
/* Instantaneous boolean value, read only */
#define STATS_DESC_IBOOLEAN(SCOPE, name) \
STATS_DESC_INSTANT(SCOPE, name, KVM_STATS_UNIT_BOOLEAN, \
KVM_STATS_BASE_POW10, 0)
/* Peak (sticky) boolean value, read/write */
#define STATS_DESC_PBOOLEAN(SCOPE, name) \
STATS_DESC_PEAK(SCOPE, name, KVM_STATS_UNIT_BOOLEAN, \
KVM_STATS_BASE_POW10, 0)
/* Cumulative time in nanosecond */ /* Cumulative time in nanosecond */
#define STATS_DESC_TIME_NSEC(SCOPE, name) \ #define STATS_DESC_TIME_NSEC(SCOPE, name) \
STATS_DESC_CUMULATIVE(SCOPE, name, KVM_STATS_UNIT_SECONDS, \ STATS_DESC_CUMULATIVE(SCOPE, name, KVM_STATS_UNIT_SECONDS, \
...@@ -1853,7 +1862,7 @@ struct _kvm_stats_desc { ...@@ -1853,7 +1862,7 @@ struct _kvm_stats_desc {
HALT_POLL_HIST_COUNT), \ HALT_POLL_HIST_COUNT), \
STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_wait_hist, \ STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_wait_hist, \
HALT_POLL_HIST_COUNT), \ HALT_POLL_HIST_COUNT), \
STATS_DESC_ICOUNTER(VCPU_GENERIC, blocking) STATS_DESC_IBOOLEAN(VCPU_GENERIC, blocking)
extern struct dentry *kvm_debugfs_dir; extern struct dentry *kvm_debugfs_dir;
......
...@@ -2083,6 +2083,7 @@ struct kvm_stats_header { ...@@ -2083,6 +2083,7 @@ struct kvm_stats_header {
#define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_BOOLEAN (0x4 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES #define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES
#define KVM_STATS_BASE_SHIFT 8 #define KVM_STATS_BASE_SHIFT 8
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment