Commit 1d966eb4 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Ingo Molnar:
 "Misc fixes:

   - A rather involved set of memory hardware encryption fixes to
     support the early loading of microcode files via the initrd. These
     are larger than what we normally take at such a late -rc stage, but
     there are two mitigating factors: 1) much of the changes are
     limited to the SME code itself 2) being able to early load
     microcode has increased importance in the post-Meltdown/Spectre
     era.

   - An IRQ vector allocator fix

   - An Intel RDT driver use-after-free fix

   - An APIC driver bug fix/revert to make certain older systems boot
     again

   - A pkeys ABI fix

   - TSC calibration fixes

   - A kdump fix"

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/apic/vector: Fix off by one in error path
  x86/intel_rdt/cqm: Prevent use after free
  x86/mm: Encrypt the initrd earlier for BSP microcode update
  x86/mm: Prepare sme_encrypt_kernel() for PAGE aligned encryption
  x86/mm: Centralize PMD flags in sme_encrypt_kernel()
  x86/mm: Use a struct to reduce parameters for SME PGD mapping
  x86/mm: Clean up register saving in the __enc_copy() assembly code
  x86/idt: Mark IDT tables __initconst
  Revert "x86/apic: Remove init_bsp_APIC()"
  x86/mm/pkeys: Fix fill_sig_info_pkey
  x86/tsc: Print tsc_khz, when it differs from cpu_khz
  x86/tsc: Fix erroneous TSC rate on Skylake Xeon
  x86/tsc: Future-proof native_calibrate_tsc()
  kdump: Write the correct address of mem_section into vmcoreinfo
parents 9a4ba2ab 45d55e7b
......@@ -136,6 +136,7 @@ extern void disconnect_bsp_APIC(int virt_wire_setup);
extern void disable_local_APIC(void);
extern void lapic_shutdown(void);
extern void sync_Arb_IDs(void);
extern void init_bsp_APIC(void);
extern void apic_intr_mode_init(void);
extern void setup_local_APIC(void);
extern void init_apic_mappings(void);
......
......@@ -39,7 +39,7 @@ void __init sme_unmap_bootdata(char *real_mode_data);
void __init sme_early_init(void);
void __init sme_encrypt_kernel(void);
void __init sme_encrypt_kernel(struct boot_params *bp);
void __init sme_enable(struct boot_params *bp);
int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size);
......@@ -67,7 +67,7 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { }
static inline void __init sme_early_init(void) { }
static inline void __init sme_encrypt_kernel(void) { }
static inline void __init sme_encrypt_kernel(struct boot_params *bp) { }
static inline void __init sme_enable(struct boot_params *bp) { }
static inline bool sme_active(void) { return false; }
......
......@@ -1286,6 +1286,55 @@ static int __init apic_intr_mode_select(void)
return APIC_SYMMETRIC_IO;
}
/*
* An initial setup of the virtual wire mode.
*/
void __init init_bsp_APIC(void)
{
unsigned int value;
/*
* Don't do the setup now if we have a SMP BIOS as the
* through-I/O-APIC virtual wire mode might be active.
*/
if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
return;
/*
* Do not trust the local APIC being empty at bootup.
*/
clear_local_APIC();
/*
* Enable APIC.
*/
value = apic_read(APIC_SPIV);
value &= ~APIC_VECTOR_MASK;
value |= APIC_SPIV_APIC_ENABLED;
#ifdef CONFIG_X86_32
/* This bit is reserved on P4/Xeon and should be cleared */
if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
(boot_cpu_data.x86 == 15))
value &= ~APIC_SPIV_FOCUS_DISABLED;
else
#endif
value |= APIC_SPIV_FOCUS_DISABLED;
value |= SPURIOUS_APIC_VECTOR;
apic_write(APIC_SPIV, value);
/*
* Set up the virtual wire mode.
*/
apic_write(APIC_LVT0, APIC_DM_EXTINT);
value = APIC_DM_NMI;
if (!lapic_is_integrated()) /* 82489DX */
value |= APIC_LVT_LEVEL_TRIGGER;
if (apic_extnmi == APIC_EXTNMI_NONE)
value |= APIC_LVT_MASKED;
apic_write(APIC_LVT1, value);
}
/* Init the interrupt delivery mode for the BSP */
void __init apic_intr_mode_init(void)
{
......
......@@ -542,14 +542,17 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
err = assign_irq_vector_policy(irqd, info);
trace_vector_setup(virq + i, false, err);
if (err)
if (err) {
irqd->chip_data = NULL;
free_apic_chip_data(apicd);
goto error;
}
}
return 0;
error:
x86_vector_free_irqs(domain, virq, i + 1);
x86_vector_free_irqs(domain, virq, i);
return err;
}
......
......@@ -525,10 +525,6 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
*/
if (static_branch_unlikely(&rdt_mon_enable_key))
rmdir_mondata_subdir_allrdtgrp(r, d->id);
kfree(d->ctrl_val);
kfree(d->rmid_busy_llc);
kfree(d->mbm_total);
kfree(d->mbm_local);
list_del(&d->list);
if (is_mbm_enabled())
cancel_delayed_work(&d->mbm_over);
......@@ -545,6 +541,10 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
cancel_delayed_work(&d->cqm_limbo);
}
kfree(d->ctrl_val);
kfree(d->rmid_busy_llc);
kfree(d->mbm_total);
kfree(d->mbm_local);
kfree(d);
return;
}
......
......@@ -157,8 +157,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
p = fixup_pointer(&phys_base, physaddr);
*p += load_delta - sme_get_me_mask();
/* Encrypt the kernel (if SME is active) */
sme_encrypt_kernel();
/* Encrypt the kernel and related (if SME is active) */
sme_encrypt_kernel(bp);
/*
* Return the SME encryption mask (if SME is active) to be used as a
......
......@@ -56,7 +56,7 @@ struct idt_data {
* Early traps running on the DEFAULT_STACK because the other interrupt
* stacks work only after cpu_init().
*/
static const __initdata struct idt_data early_idts[] = {
static const __initconst struct idt_data early_idts[] = {
INTG(X86_TRAP_DB, debug),
SYSG(X86_TRAP_BP, int3),
#ifdef CONFIG_X86_32
......@@ -70,7 +70,7 @@ static const __initdata struct idt_data early_idts[] = {
* the traps which use them are reinitialized with IST after cpu_init() has
* set up TSS.
*/
static const __initdata struct idt_data def_idts[] = {
static const __initconst struct idt_data def_idts[] = {
INTG(X86_TRAP_DE, divide_error),
INTG(X86_TRAP_NMI, nmi),
INTG(X86_TRAP_BR, bounds),
......@@ -108,7 +108,7 @@ static const __initdata struct idt_data def_idts[] = {
/*
* The APIC and SMP idt entries
*/
static const __initdata struct idt_data apic_idts[] = {
static const __initconst struct idt_data apic_idts[] = {
#ifdef CONFIG_SMP
INTG(RESCHEDULE_VECTOR, reschedule_interrupt),
INTG(CALL_FUNCTION_VECTOR, call_function_interrupt),
......@@ -150,7 +150,7 @@ static const __initdata struct idt_data apic_idts[] = {
* Early traps running on the DEFAULT_STACK because the other interrupt
* stacks work only after cpu_init().
*/
static const __initdata struct idt_data early_pf_idts[] = {
static const __initconst struct idt_data early_pf_idts[] = {
INTG(X86_TRAP_PF, page_fault),
};
......@@ -158,7 +158,7 @@ static const __initdata struct idt_data early_pf_idts[] = {
* Override for the debug_idt. Same as the default, but with interrupt
* stack set to DEFAULT_STACK (0). Required for NMI trap handling.
*/
static const __initdata struct idt_data dbg_idts[] = {
static const __initconst struct idt_data dbg_idts[] = {
INTG(X86_TRAP_DB, debug),
INTG(X86_TRAP_BP, int3),
};
......@@ -180,7 +180,7 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss;
* The exceptions which use Interrupt stacks. They are setup after
* cpu_init() when the TSS has been initialized.
*/
static const __initdata struct idt_data ist_idts[] = {
static const __initconst struct idt_data ist_idts[] = {
ISTG(X86_TRAP_DB, debug, DEBUG_STACK),
ISTG(X86_TRAP_NMI, nmi, NMI_STACK),
SISTG(X86_TRAP_BP, int3, DEBUG_STACK),
......
......@@ -61,6 +61,9 @@ void __init init_ISA_irqs(void)
struct irq_chip *chip = legacy_pic->chip;
int i;
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
init_bsp_APIC();
#endif
legacy_pic->init(0);
for (i = 0; i < nr_legacy_irqs(); i++)
......
......@@ -364,16 +364,6 @@ static void __init reserve_initrd(void)
!ramdisk_image || !ramdisk_size)
return; /* No initrd provided by bootloader */
/*
* If SME is active, this memory will be marked encrypted by the
* kernel when it is accessed (including relocation). However, the
* ramdisk image was loaded decrypted by the bootloader, so make
* sure that it is encrypted before accessing it. For SEV the
* ramdisk will already be encrypted, so only do this for SME.
*/
if (sme_active())
sme_early_encrypt(ramdisk_image, ramdisk_end - ramdisk_image);
initrd_start = 0;
mapped_size = memblock_mem_size(max_pfn_mapped);
......
......@@ -602,7 +602,6 @@ unsigned long native_calibrate_tsc(void)
case INTEL_FAM6_KABYLAKE_DESKTOP:
crystal_khz = 24000; /* 24.0 MHz */
break;
case INTEL_FAM6_SKYLAKE_X:
case INTEL_FAM6_ATOM_DENVERTON:
crystal_khz = 25000; /* 25.0 MHz */
break;
......@@ -612,6 +611,8 @@ unsigned long native_calibrate_tsc(void)
}
}
if (crystal_khz == 0)
return 0;
/*
* TSC frequency determined by CPUID is a "hardware reported"
* frequency and is the most accurate one so far we have. This
......@@ -1315,6 +1316,12 @@ void __init tsc_init(void)
(unsigned long)cpu_khz / 1000,
(unsigned long)cpu_khz % 1000);
if (cpu_khz != tsc_khz) {
pr_info("Detected %lu.%03lu MHz TSC",
(unsigned long)tsc_khz / 1000,
(unsigned long)tsc_khz % 1000);
}
/* Sanitize TSC ADJUST before cyc2ns gets initialized */
tsc_store_and_check_tsc_adjust(true);
......
......@@ -172,14 +172,15 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
* 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really
* faulted on a pte with its pkey=4.
*/
static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
static void fill_sig_info_pkey(int si_signo, int si_code, siginfo_t *info,
u32 *pkey)
{
/* This is effectively an #ifdef */
if (!boot_cpu_has(X86_FEATURE_OSPKE))
return;
/* Fault not from Protection Keys: nothing to do */
if (si_code != SEGV_PKUERR)
if ((si_code != SEGV_PKUERR) || (si_signo != SIGSEGV))
return;
/*
* force_sig_info_fault() is called from a number of
......@@ -218,7 +219,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
lsb = PAGE_SHIFT;
info.si_addr_lsb = lsb;
fill_sig_info_pkey(si_code, &info, pkey);
fill_sig_info_pkey(si_signo, si_code, &info, pkey);
force_sig_info(si_signo, &info, tsk);
}
......
This diff is collapsed.
......@@ -22,9 +22,9 @@ ENTRY(sme_encrypt_execute)
/*
* Entry parameters:
* RDI - virtual address for the encrypted kernel mapping
* RSI - virtual address for the decrypted kernel mapping
* RDX - length of kernel
* RDI - virtual address for the encrypted mapping
* RSI - virtual address for the decrypted mapping
* RDX - length to encrypt
* RCX - virtual address of the encryption workarea, including:
* - stack page (PAGE_SIZE)
* - encryption routine page (PAGE_SIZE)
......@@ -41,9 +41,9 @@ ENTRY(sme_encrypt_execute)
addq $PAGE_SIZE, %rax /* Workarea encryption routine */
push %r12
movq %rdi, %r10 /* Encrypted kernel */
movq %rsi, %r11 /* Decrypted kernel */
movq %rdx, %r12 /* Kernel length */
movq %rdi, %r10 /* Encrypted area */
movq %rsi, %r11 /* Decrypted area */
movq %rdx, %r12 /* Area length */
/* Copy encryption routine into the workarea */
movq %rax, %rdi /* Workarea encryption routine */
......@@ -52,10 +52,10 @@ ENTRY(sme_encrypt_execute)
rep movsb
/* Setup registers for call */
movq %r10, %rdi /* Encrypted kernel */
movq %r11, %rsi /* Decrypted kernel */
movq %r10, %rdi /* Encrypted area */
movq %r11, %rsi /* Decrypted area */
movq %r8, %rdx /* Pagetables used for encryption */
movq %r12, %rcx /* Kernel length */
movq %r12, %rcx /* Area length */
movq %rax, %r8 /* Workarea encryption routine */
addq $PAGE_SIZE, %r8 /* Workarea intermediate copy buffer */
......@@ -71,7 +71,7 @@ ENDPROC(sme_encrypt_execute)
ENTRY(__enc_copy)
/*
* Routine used to encrypt kernel.
* Routine used to encrypt memory in place.
* This routine must be run outside of the kernel proper since
* the kernel will be encrypted during the process. So this
* routine is defined here and then copied to an area outside
......@@ -79,19 +79,19 @@ ENTRY(__enc_copy)
* during execution.
*
* On entry the registers must be:
* RDI - virtual address for the encrypted kernel mapping
* RSI - virtual address for the decrypted kernel mapping
* RDI - virtual address for the encrypted mapping
* RSI - virtual address for the decrypted mapping
* RDX - address of the pagetables to use for encryption
* RCX - length of kernel
* RCX - length of area
* R8 - intermediate copy buffer
*
* RAX - points to this routine
*
* The kernel will be encrypted by copying from the non-encrypted
* kernel space to an intermediate buffer and then copying from the
* intermediate buffer back to the encrypted kernel space. The physical
* addresses of the two kernel space mappings are the same which
* results in the kernel being encrypted "in place".
* The area will be encrypted by copying from the non-encrypted
* memory space to an intermediate buffer and then copying from the
* intermediate buffer back to the encrypted memory space. The physical
* addresses of the two mappings are the same which results in the area
* being encrypted "in place".
*/
/* Enable the new page tables */
mov %rdx, %cr3
......@@ -103,47 +103,55 @@ ENTRY(__enc_copy)
orq $X86_CR4_PGE, %rdx
mov %rdx, %cr4
push %r15
push %r12
movq %rcx, %r9 /* Save area length */
movq %rdi, %r10 /* Save encrypted area address */
movq %rsi, %r11 /* Save decrypted area address */
/* Set the PAT register PA5 entry to write-protect */
push %rcx
movl $MSR_IA32_CR_PAT, %ecx
rdmsr
push %rdx /* Save original PAT value */
mov %rdx, %r15 /* Save original PAT value */
andl $0xffff00ff, %edx /* Clear PA5 */
orl $0x00000500, %edx /* Set PA5 to WP */
wrmsr
pop %rdx /* RDX contains original PAT value */
pop %rcx
movq %rcx, %r9 /* Save kernel length */
movq %rdi, %r10 /* Save encrypted kernel address */
movq %rsi, %r11 /* Save decrypted kernel address */
wbinvd /* Invalidate any cache entries */
/* Copy/encrypt 2MB at a time */
/* Copy/encrypt up to 2MB at a time */
movq $PMD_PAGE_SIZE, %r12
1:
movq %r11, %rsi /* Source - decrypted kernel */
cmpq %r12, %r9
jnb 2f
movq %r9, %r12
2:
movq %r11, %rsi /* Source - decrypted area */
movq %r8, %rdi /* Dest - intermediate copy buffer */
movq $PMD_PAGE_SIZE, %rcx /* 2MB length */
movq %r12, %rcx
rep movsb
movq %r8, %rsi /* Source - intermediate copy buffer */
movq %r10, %rdi /* Dest - encrypted kernel */
movq $PMD_PAGE_SIZE, %rcx /* 2MB length */
movq %r10, %rdi /* Dest - encrypted area */
movq %r12, %rcx
rep movsb
addq $PMD_PAGE_SIZE, %r11
addq $PMD_PAGE_SIZE, %r10
subq $PMD_PAGE_SIZE, %r9 /* Kernel length decrement */
addq %r12, %r11
addq %r12, %r10
subq %r12, %r9 /* Kernel length decrement */
jnz 1b /* Kernel length not zero? */
/* Restore PAT register */
push %rdx /* Save original PAT value */
movl $MSR_IA32_CR_PAT, %ecx
rdmsr
pop %rdx /* Restore original PAT value */
mov %r15, %rdx /* Restore original PAT value */
wrmsr
pop %r12
pop %r15
ret
.L__enc_copy_end:
ENDPROC(__enc_copy)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment