Commit b5df1b3a authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Ingo Molnar:
 "The main changes are the PCID fixes from Andy, but there's also two
  hyperv fixes and two paravirt updates"

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/hyper-v: Remove duplicated HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED definition
  x86/hyper-V: Allocate the IDT entry early in boot
  paravirt: Switch maintainer
  x86/paravirt: Remove no longer used paravirt functions
  x86/mm/64: Initialize CR4.PCIDE early
  x86/hibernate/64: Mask off CR3's PCID bits in the saved CR3
  x86/mm: Get rid of VM_BUG_ON in switch_tlb_irqs_off()
parents 9888e4d4 1278f58c
...@@ -10135,7 +10135,7 @@ F: include/uapi/linux/ppdev.h ...@@ -10135,7 +10135,7 @@ F: include/uapi/linux/ppdev.h
F: Documentation/parport*.txt F: Documentation/parport*.txt
PARAVIRT_OPS INTERFACE PARAVIRT_OPS INTERFACE
M: Jeremy Fitzhardinge <jeremy@goop.org> M: Juergen Gross <jgross@suse.com>
M: Chris Wright <chrisw@sous-sol.org> M: Chris Wright <chrisw@sous-sol.org>
M: Alok Kataria <akataria@vmware.com> M: Alok Kataria <akataria@vmware.com>
M: Rusty Russell <rusty@rustcorp.com.au> M: Rusty Russell <rusty@rustcorp.com.au>
...@@ -10143,7 +10143,7 @@ L: virtualization@lists.linux-foundation.org ...@@ -10143,7 +10143,7 @@ L: virtualization@lists.linux-foundation.org
S: Supported S: Supported
F: Documentation/virtual/paravirt_ops.txt F: Documentation/virtual/paravirt_ops.txt
F: arch/*/kernel/paravirt* F: arch/*/kernel/paravirt*
F: arch/*/include/asm/paravirt.h F: arch/*/include/asm/paravirt*.h
F: include/linux/hypervisor.h F: include/linux/hypervisor.h
PARIDE DRIVERS FOR PARALLEL PORT IDE DEVICES PARIDE DRIVERS FOR PARALLEL PORT IDE DEVICES
......
...@@ -121,7 +121,6 @@ static inline int desc_empty(const void *ptr) ...@@ -121,7 +121,6 @@ static inline int desc_empty(const void *ptr)
#define load_ldt(ldt) asm volatile("lldt %0"::"m" (ldt)) #define load_ldt(ldt) asm volatile("lldt %0"::"m" (ldt))
#define store_gdt(dtr) native_store_gdt(dtr) #define store_gdt(dtr) native_store_gdt(dtr)
#define store_idt(dtr) native_store_idt(dtr)
#define store_tr(tr) (tr = native_store_tr()) #define store_tr(tr) (tr = native_store_tr())
#define load_TLS(t, cpu) native_load_tls(t, cpu) #define load_TLS(t, cpu) native_load_tls(t, cpu)
...@@ -228,7 +227,7 @@ static inline void native_store_gdt(struct desc_ptr *dtr) ...@@ -228,7 +227,7 @@ static inline void native_store_gdt(struct desc_ptr *dtr)
asm volatile("sgdt %0":"=m" (*dtr)); asm volatile("sgdt %0":"=m" (*dtr));
} }
static inline void native_store_idt(struct desc_ptr *dtr) static inline void store_idt(struct desc_ptr *dtr)
{ {
asm volatile("sidt %0":"=m" (*dtr)); asm volatile("sidt %0":"=m" (*dtr));
} }
......
...@@ -71,11 +71,6 @@ static inline void write_cr3(unsigned long x) ...@@ -71,11 +71,6 @@ static inline void write_cr3(unsigned long x)
PVOP_VCALL1(pv_mmu_ops.write_cr3, x); PVOP_VCALL1(pv_mmu_ops.write_cr3, x);
} }
static inline unsigned long __read_cr4(void)
{
return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
}
static inline void __write_cr4(unsigned long x) static inline void __write_cr4(unsigned long x)
{ {
PVOP_VCALL1(pv_cpu_ops.write_cr4, x); PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
...@@ -228,10 +223,6 @@ static inline void set_ldt(const void *addr, unsigned entries) ...@@ -228,10 +223,6 @@ static inline void set_ldt(const void *addr, unsigned entries)
{ {
PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries); PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries);
} }
static inline void store_idt(struct desc_ptr *dtr)
{
PVOP_VCALL1(pv_cpu_ops.store_idt, dtr);
}
static inline unsigned long paravirt_store_tr(void) static inline unsigned long paravirt_store_tr(void)
{ {
return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr); return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr);
...@@ -365,12 +356,6 @@ static inline void paravirt_release_p4d(unsigned long pfn) ...@@ -365,12 +356,6 @@ static inline void paravirt_release_p4d(unsigned long pfn)
PVOP_VCALL1(pv_mmu_ops.release_p4d, pfn); PVOP_VCALL1(pv_mmu_ops.release_p4d, pfn);
} }
static inline void pte_update(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep);
}
static inline pte_t __pte(pteval_t val) static inline pte_t __pte(pteval_t val)
{ {
pteval_t ret; pteval_t ret;
...@@ -472,28 +457,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, ...@@ -472,28 +457,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte); PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte);
} }
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
{
if (sizeof(pmdval_t) > sizeof(long))
/* 5 arg words */
pv_mmu_ops.set_pmd_at(mm, addr, pmdp, pmd);
else
PVOP_VCALL4(pv_mmu_ops.set_pmd_at, mm, addr, pmdp,
native_pmd_val(pmd));
}
static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
pud_t *pudp, pud_t pud)
{
if (sizeof(pudval_t) > sizeof(long))
/* 5 arg words */
pv_mmu_ops.set_pud_at(mm, addr, pudp, pud);
else
PVOP_VCALL4(pv_mmu_ops.set_pud_at, mm, addr, pudp,
native_pud_val(pud));
}
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
{ {
pmdval_t val = native_pmd_val(pmd); pmdval_t val = native_pmd_val(pmd);
......
...@@ -107,7 +107,6 @@ struct pv_cpu_ops { ...@@ -107,7 +107,6 @@ struct pv_cpu_ops {
unsigned long (*read_cr0)(void); unsigned long (*read_cr0)(void);
void (*write_cr0)(unsigned long); void (*write_cr0)(unsigned long);
unsigned long (*read_cr4)(void);
void (*write_cr4)(unsigned long); void (*write_cr4)(unsigned long);
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
...@@ -119,8 +118,6 @@ struct pv_cpu_ops { ...@@ -119,8 +118,6 @@ struct pv_cpu_ops {
void (*load_tr_desc)(void); void (*load_tr_desc)(void);
void (*load_gdt)(const struct desc_ptr *); void (*load_gdt)(const struct desc_ptr *);
void (*load_idt)(const struct desc_ptr *); void (*load_idt)(const struct desc_ptr *);
/* store_gdt has been removed. */
void (*store_idt)(struct desc_ptr *);
void (*set_ldt)(const void *desc, unsigned entries); void (*set_ldt)(const void *desc, unsigned entries);
unsigned long (*store_tr)(void); unsigned long (*store_tr)(void);
void (*load_tls)(struct thread_struct *t, unsigned int cpu); void (*load_tls)(struct thread_struct *t, unsigned int cpu);
...@@ -245,12 +242,6 @@ struct pv_mmu_ops { ...@@ -245,12 +242,6 @@ struct pv_mmu_ops {
void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, void (*set_pte_at)(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pteval); pte_t *ptep, pte_t pteval);
void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
void (*set_pmd_at)(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmdval);
void (*set_pud_at)(struct mm_struct *mm, unsigned long addr,
pud_t *pudp, pud_t pudval);
void (*pte_update)(struct mm_struct *mm, unsigned long addr,
pte_t *ptep);
pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr, pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr,
pte_t *ptep); pte_t *ptep);
......
...@@ -55,8 +55,6 @@ extern pmdval_t early_pmd_flags; ...@@ -55,8 +55,6 @@ extern pmdval_t early_pmd_flags;
#else /* !CONFIG_PARAVIRT */ #else /* !CONFIG_PARAVIRT */
#define set_pte(ptep, pte) native_set_pte(ptep, pte) #define set_pte(ptep, pte) native_set_pte(ptep, pte)
#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte) #define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte)
#define set_pmd_at(mm, addr, pmdp, pmd) native_set_pmd_at(mm, addr, pmdp, pmd)
#define set_pud_at(mm, addr, pudp, pud) native_set_pud_at(mm, addr, pudp, pud)
#define set_pte_atomic(ptep, pte) \ #define set_pte_atomic(ptep, pte) \
native_set_pte_atomic(ptep, pte) native_set_pte_atomic(ptep, pte)
...@@ -87,8 +85,6 @@ extern pmdval_t early_pmd_flags; ...@@ -87,8 +85,6 @@ extern pmdval_t early_pmd_flags;
#define pte_clear(mm, addr, ptep) native_pte_clear(mm, addr, ptep) #define pte_clear(mm, addr, ptep) native_pte_clear(mm, addr, ptep)
#define pmd_clear(pmd) native_pmd_clear(pmd) #define pmd_clear(pmd) native_pmd_clear(pmd)
#define pte_update(mm, addr, ptep) do { } while (0)
#define pgd_val(x) native_pgd_val(x) #define pgd_val(x) native_pgd_val(x)
#define __pgd(x) native_make_pgd(x) #define __pgd(x) native_make_pgd(x)
...@@ -979,31 +975,18 @@ static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, ...@@ -979,31 +975,18 @@ static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
native_set_pte(ptep, pte); native_set_pte(ptep, pte);
} }
static inline void native_set_pmd_at(struct mm_struct *mm, unsigned long addr, static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp , pmd_t pmd) pmd_t *pmdp, pmd_t pmd)
{ {
native_set_pmd(pmdp, pmd); native_set_pmd(pmdp, pmd);
} }
static inline void native_set_pud_at(struct mm_struct *mm, unsigned long addr, static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
pud_t *pudp, pud_t pud) pud_t *pudp, pud_t pud)
{ {
native_set_pud(pudp, pud); native_set_pud(pudp, pud);
} }
#ifndef CONFIG_PARAVIRT
/*
* Rules for using pte_update - it must be called after any PTE update which
* has not been done using the set_pte / clear_pte interfaces. It is used by
* shadow mode hypervisors to resynchronize the shadow page tables. Kernel PTE
* updates should either be sets, clears, or set_pte_atomic for P->P
* transitions, which means this hook should only be called for user PTEs.
* This hook implies a P->P protection or access change has taken place, which
* requires a subsequent TLB flush.
*/
#define pte_update(mm, addr, ptep) do { } while (0)
#endif
/* /*
* We only update the dirty/accessed state if we set * We only update the dirty/accessed state if we set
* the dirty bit by hand in the kernel, since the hardware * the dirty bit by hand in the kernel, since the hardware
...@@ -1031,7 +1014,6 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, ...@@ -1031,7 +1014,6 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep) pte_t *ptep)
{ {
pte_t pte = native_ptep_get_and_clear(ptep); pte_t pte = native_ptep_get_and_clear(ptep);
pte_update(mm, addr, ptep);
return pte; return pte;
} }
...@@ -1058,7 +1040,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, ...@@ -1058,7 +1040,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep) unsigned long addr, pte_t *ptep)
{ {
clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte); clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte);
pte_update(mm, addr, ptep);
} }
#define flush_tlb_fix_spurious_fault(vma, address) do { } while (0) #define flush_tlb_fix_spurious_fault(vma, address) do { } while (0)
......
...@@ -135,6 +135,11 @@ static inline void native_wbinvd(void) ...@@ -135,6 +135,11 @@ static inline void native_wbinvd(void)
extern asmlinkage void native_load_gs_index(unsigned); extern asmlinkage void native_load_gs_index(unsigned);
static inline unsigned long __read_cr4(void)
{
return native_read_cr4();
}
#ifdef CONFIG_PARAVIRT #ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h> #include <asm/paravirt.h>
#else #else
...@@ -173,11 +178,6 @@ static inline void write_cr3(unsigned long x) ...@@ -173,11 +178,6 @@ static inline void write_cr3(unsigned long x)
native_write_cr3(x); native_write_cr3(x);
} }
static inline unsigned long __read_cr4(void)
{
return native_read_cr4();
}
static inline void __write_cr4(unsigned long x) static inline void __write_cr4(unsigned long x)
{ {
native_write_cr4(x); native_write_cr4(x);
......
...@@ -152,12 +152,6 @@ ...@@ -152,12 +152,6 @@
/* Recommend using the newer ExProcessorMasks interface */ /* Recommend using the newer ExProcessorMasks interface */
#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED (1 << 11) #define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED (1 << 11)
/*
* HV_VP_SET available
*/
#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED (1 << 11)
/* /*
* Crash notification flag. * Crash notification flag.
*/ */
......
...@@ -169,21 +169,21 @@ static int __init x86_mpx_setup(char *s) ...@@ -169,21 +169,21 @@ static int __init x86_mpx_setup(char *s)
__setup("nompx", x86_mpx_setup); __setup("nompx", x86_mpx_setup);
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
static int __init x86_pcid_setup(char *s) static int __init x86_nopcid_setup(char *s)
{ {
/* require an exact match without trailing characters */ /* nopcid doesn't accept parameters */
if (strlen(s)) if (s)
return 0; return -EINVAL;
/* do not emit a message if the feature is not present */ /* do not emit a message if the feature is not present */
if (!boot_cpu_has(X86_FEATURE_PCID)) if (!boot_cpu_has(X86_FEATURE_PCID))
return 1; return 0;
setup_clear_cpu_cap(X86_FEATURE_PCID); setup_clear_cpu_cap(X86_FEATURE_PCID);
pr_info("nopcid: PCID feature disabled\n"); pr_info("nopcid: PCID feature disabled\n");
return 1; return 0;
} }
__setup("nopcid", x86_pcid_setup); early_param("nopcid", x86_nopcid_setup);
#endif #endif
static int __init x86_noinvpcid_setup(char *s) static int __init x86_noinvpcid_setup(char *s)
...@@ -329,38 +329,6 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c) ...@@ -329,38 +329,6 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
} }
} }
static void setup_pcid(struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_PCID)) {
if (cpu_has(c, X86_FEATURE_PGE)) {
/*
* We'd like to use cr4_set_bits_and_update_boot(),
* but we can't. CR4.PCIDE is special and can only
* be set in long mode, and the early CPU init code
* doesn't know this and would try to restore CR4.PCIDE
* prior to entering long mode.
*
* Instead, we rely on the fact that hotplug, resume,
* etc all fully restore CR4 before they write anything
* that could have nonzero PCID bits to CR3. CR4.PCIDE
* has no effect on the page tables themselves, so we
* don't need it to be restored early.
*/
cr4_set_bits(X86_CR4_PCIDE);
} else {
/*
* flush_tlb_all(), as currently implemented, won't
* work if PCID is on but PGE is not. Since that
* combination doesn't exist on real hardware, there's
* no reason to try to fully support it, but it's
* polite to avoid corrupting data if we're on
* an improperly configured VM.
*/
clear_cpu_cap(c, X86_FEATURE_PCID);
}
}
}
/* /*
* Protection Keys are not available in 32-bit mode. * Protection Keys are not available in 32-bit mode.
*/ */
...@@ -1175,9 +1143,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) ...@@ -1175,9 +1143,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
setup_smep(c); setup_smep(c);
setup_smap(c); setup_smap(c);
/* Set up PCID */
setup_pcid(c);
/* /*
* The vendor-specific functions might have changed features. * The vendor-specific functions might have changed features.
* Now we do "generic changes." * Now we do "generic changes."
......
...@@ -59,8 +59,6 @@ void hyperv_vector_handler(struct pt_regs *regs) ...@@ -59,8 +59,6 @@ void hyperv_vector_handler(struct pt_regs *regs)
void hv_setup_vmbus_irq(void (*handler)(void)) void hv_setup_vmbus_irq(void (*handler)(void))
{ {
vmbus_handler = handler; vmbus_handler = handler;
/* Setup the IDT for hypervisor callback */
alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
} }
void hv_remove_vmbus_irq(void) void hv_remove_vmbus_irq(void)
...@@ -251,6 +249,8 @@ static void __init ms_hyperv_init_platform(void) ...@@ -251,6 +249,8 @@ static void __init ms_hyperv_init_platform(void)
*/ */
x86_platform.apic_post_init = hyperv_init; x86_platform.apic_post_init = hyperv_init;
hyperv_setup_mmu_ops(); hyperv_setup_mmu_ops();
/* Setup the IDT for hypervisor callback */
alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
#endif #endif
} }
......
...@@ -327,7 +327,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = { ...@@ -327,7 +327,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
.set_debugreg = native_set_debugreg, .set_debugreg = native_set_debugreg,
.read_cr0 = native_read_cr0, .read_cr0 = native_read_cr0,
.write_cr0 = native_write_cr0, .write_cr0 = native_write_cr0,
.read_cr4 = native_read_cr4,
.write_cr4 = native_write_cr4, .write_cr4 = native_write_cr4,
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
.read_cr8 = native_read_cr8, .read_cr8 = native_read_cr8,
...@@ -343,7 +342,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = { ...@@ -343,7 +342,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
.set_ldt = native_set_ldt, .set_ldt = native_set_ldt,
.load_gdt = native_load_gdt, .load_gdt = native_load_gdt,
.load_idt = native_load_idt, .load_idt = native_load_idt,
.store_idt = native_store_idt,
.store_tr = native_store_tr, .store_tr = native_store_tr,
.load_tls = native_load_tls, .load_tls = native_load_tls,
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
...@@ -411,8 +409,6 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = { ...@@ -411,8 +409,6 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
.set_pte = native_set_pte, .set_pte = native_set_pte,
.set_pte_at = native_set_pte_at, .set_pte_at = native_set_pte_at,
.set_pmd = native_set_pmd, .set_pmd = native_set_pmd,
.set_pmd_at = native_set_pmd_at,
.pte_update = paravirt_nop,
.ptep_modify_prot_start = __ptep_modify_prot_start, .ptep_modify_prot_start = __ptep_modify_prot_start,
.ptep_modify_prot_commit = __ptep_modify_prot_commit, .ptep_modify_prot_commit = __ptep_modify_prot_commit,
...@@ -424,7 +420,6 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = { ...@@ -424,7 +420,6 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
.pmd_clear = native_pmd_clear, .pmd_clear = native_pmd_clear,
#endif #endif
.set_pud = native_set_pud, .set_pud = native_set_pud,
.set_pud_at = native_set_pud_at,
.pmd_val = PTE_IDENT, .pmd_val = PTE_IDENT,
.make_pmd = PTE_IDENT, .make_pmd = PTE_IDENT,
......
...@@ -1178,8 +1178,11 @@ void __init setup_arch(char **cmdline_p) ...@@ -1178,8 +1178,11 @@ void __init setup_arch(char **cmdline_p)
* with the current CR4 value. This may not be necessary, but * with the current CR4 value. This may not be necessary, but
* auditing all the early-boot CR4 manipulation would be needed to * auditing all the early-boot CR4 manipulation would be needed to
* rule it out. * rule it out.
*
* Mask off features that don't work outside long mode (just
* PCIDE for now).
*/ */
mmu_cr4_features = __read_cr4(); mmu_cr4_features = __read_cr4() & ~X86_CR4_PCIDE;
memblock_set_current_limit(get_max_mapped()); memblock_set_current_limit(get_max_mapped());
......
...@@ -226,10 +226,12 @@ static int enable_start_cpu0; ...@@ -226,10 +226,12 @@ static int enable_start_cpu0;
static void notrace start_secondary(void *unused) static void notrace start_secondary(void *unused)
{ {
/* /*
* Don't put *anything* before cpu_init(), SMP booting is too * Don't put *anything* except direct CPU state initialization
* fragile that we want to limit the things done here to the * before cpu_init(), SMP booting is too fragile that we want to
* most necessary things. * limit the things done here to the most necessary things.
*/ */
if (boot_cpu_has(X86_FEATURE_PCID))
__write_cr4(__read_cr4() | X86_CR4_PCIDE);
cpu_init(); cpu_init();
x86_cpuinit.early_percpu_clock_init(); x86_cpuinit.early_percpu_clock_init();
preempt_disable(); preempt_disable();
......
...@@ -5192,7 +5192,7 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) ...@@ -5192,7 +5192,7 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */
native_store_idt(&dt); store_idt(&dt);
vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */
vmx->host_idt_base = dt.address; vmx->host_idt_base = dt.address;
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <asm/microcode.h> #include <asm/microcode.h>
#include <asm/kaslr.h> #include <asm/kaslr.h>
#include <asm/hypervisor.h> #include <asm/hypervisor.h>
#include <asm/cpufeature.h>
/* /*
* We need to define the tracepoints somewhere, and tlb.c * We need to define the tracepoints somewhere, and tlb.c
...@@ -193,6 +194,38 @@ static void __init probe_page_size_mask(void) ...@@ -193,6 +194,38 @@ static void __init probe_page_size_mask(void)
} }
} }
static void setup_pcid(void)
{
#ifdef CONFIG_X86_64
if (boot_cpu_has(X86_FEATURE_PCID)) {
if (boot_cpu_has(X86_FEATURE_PGE)) {
/*
* This can't be cr4_set_bits_and_update_boot() --
* the trampoline code can't handle CR4.PCIDE and
* it wouldn't do any good anyway. Despite the name,
* cr4_set_bits_and_update_boot() doesn't actually
* cause the bits in question to remain set all the
* way through the secondary boot asm.
*
* Instead, we brute-force it and set CR4.PCIDE
* manually in start_secondary().
*/
cr4_set_bits(X86_CR4_PCIDE);
} else {
/*
* flush_tlb_all(), as currently implemented, won't
* work if PCID is on but PGE is not. Since that
* combination doesn't exist on real hardware, there's
* no reason to try to fully support it, but it's
* polite to avoid corrupting data if we're on
* an improperly configured VM.
*/
setup_clear_cpu_cap(X86_FEATURE_PCID);
}
}
#endif
}
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
#define NR_RANGE_MR 3 #define NR_RANGE_MR 3
#else /* CONFIG_X86_64 */ #else /* CONFIG_X86_64 */
...@@ -592,6 +625,7 @@ void __init init_mem_mapping(void) ...@@ -592,6 +625,7 @@ void __init init_mem_mapping(void)
unsigned long end; unsigned long end;
probe_page_size_mask(); probe_page_size_mask();
setup_pcid();
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
end = max_pfn << PAGE_SHIFT; end = max_pfn << PAGE_SHIFT;
......
...@@ -426,10 +426,8 @@ int ptep_set_access_flags(struct vm_area_struct *vma, ...@@ -426,10 +426,8 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
{ {
int changed = !pte_same(*ptep, entry); int changed = !pte_same(*ptep, entry);
if (changed && dirty) { if (changed && dirty)
*ptep = entry; *ptep = entry;
pte_update(vma->vm_mm, address, ptep);
}
return changed; return changed;
} }
...@@ -486,9 +484,6 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma, ...@@ -486,9 +484,6 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma,
ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
(unsigned long *) &ptep->pte); (unsigned long *) &ptep->pte);
if (ret)
pte_update(vma->vm_mm, addr, ptep);
return ret; return ret;
} }
......
...@@ -121,8 +121,28 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, ...@@ -121,8 +121,28 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* hypothetical buggy code that directly switches to swapper_pg_dir * hypothetical buggy code that directly switches to swapper_pg_dir
* without going through leave_mm() / switch_mm_irqs_off() or that * without going through leave_mm() / switch_mm_irqs_off() or that
* does something like write_cr3(read_cr3_pa()). * does something like write_cr3(read_cr3_pa()).
*
* Only do this check if CONFIG_DEBUG_VM=y because __read_cr3()
* isn't free.
*/
#ifdef CONFIG_DEBUG_VM
if (WARN_ON_ONCE(__read_cr3() !=
(__sme_pa(real_prev->pgd) | prev_asid))) {
/*
* If we were to BUG here, we'd be very likely to kill
* the system so hard that we don't see the call trace.
* Try to recover instead by ignoring the error and doing
* a global flush to minimize the chance of corruption.
*
* (This is far from being a fully correct recovery.
* Architecturally, the CPU could prefetch something
* back into an incorrect ASID slot and leave it there
* to cause trouble down the road. It's better than
* nothing, though.)
*/ */
VM_BUG_ON(__read_cr3() != (__sme_pa(real_prev->pgd) | prev_asid)); __flush_tlb_all();
}
#endif
if (real_prev == next) { if (real_prev == next) {
VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
......
...@@ -295,7 +295,26 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size) ...@@ -295,7 +295,26 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size)
return -EOVERFLOW; return -EOVERFLOW;
rdr->jump_address = (unsigned long)restore_registers; rdr->jump_address = (unsigned long)restore_registers;
rdr->jump_address_phys = __pa_symbol(restore_registers); rdr->jump_address_phys = __pa_symbol(restore_registers);
rdr->cr3 = restore_cr3;
/*
* The restore code fixes up CR3 and CR4 in the following sequence:
*
* [in hibernation asm]
* 1. CR3 <= temporary page tables
* 2. CR4 <= mmu_cr4_features (from the kernel that restores us)
* 3. CR3 <= rdr->cr3
* 4. CR4 <= mmu_cr4_features (from us, i.e. the image kernel)
* [in restore_processor_state()]
* 5. CR4 <= saved CR4
* 6. CR3 <= saved CR3
*
* Our mmu_cr4_features has CR4.PCIDE=0, and toggling
* CR4.PCIDE while CR3's PCID bits are nonzero is illegal, so
* rdr->cr3 needs to point to valid page tables but must not
* have any of the PCID bits set.
*/
rdr->cr3 = restore_cr3 & ~CR3_PCID_MASK;
rdr->magic = RESTORE_MAGIC; rdr->magic = RESTORE_MAGIC;
hibernation_e820_save(rdr->e820_digest); hibernation_e820_save(rdr->e820_digest);
......
...@@ -1038,7 +1038,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { ...@@ -1038,7 +1038,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.read_cr0 = xen_read_cr0, .read_cr0 = xen_read_cr0,
.write_cr0 = xen_write_cr0, .write_cr0 = xen_write_cr0,
.read_cr4 = native_read_cr4,
.write_cr4 = xen_write_cr4, .write_cr4 = xen_write_cr4,
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
...@@ -1073,7 +1072,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { ...@@ -1073,7 +1072,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.alloc_ldt = xen_alloc_ldt, .alloc_ldt = xen_alloc_ldt,
.free_ldt = xen_free_ldt, .free_ldt = xen_free_ldt,
.store_idt = native_store_idt,
.store_tr = xen_store_tr, .store_tr = xen_store_tr,
.write_ldt_entry = xen_write_ldt_entry, .write_ldt_entry = xen_write_ldt_entry,
......
...@@ -2409,8 +2409,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { ...@@ -2409,8 +2409,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
.flush_tlb_single = xen_flush_tlb_single, .flush_tlb_single = xen_flush_tlb_single,
.flush_tlb_others = xen_flush_tlb_others, .flush_tlb_others = xen_flush_tlb_others,
.pte_update = paravirt_nop,
.pgd_alloc = xen_pgd_alloc, .pgd_alloc = xen_pgd_alloc,
.pgd_free = xen_pgd_free, .pgd_free = xen_pgd_free,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment