Commit f725492d authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 asm updates from Ingo Molnar:
 "This includes the following changes:

   - cpu_has() cleanups

   - sync_bitops.h modernization to the rmwcc.h facility, similarly to
     bitops.h

   - continued LTO annotations/fixes

   - misc cleanups and smaller cleanups"

* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/um/vdso: Drop unnecessary cc-ldoption
  x86/vdso: Rename variable to fix -Wshadow warning
  x86/cpu/amd: Exclude 32bit only assembler from 64bit build
  x86/asm: Mark all top level asm statements as .text
  x86/build/vdso: Add FORCE to the build rule of %.so
  x86/asm: Modernize sync_bitops.h
  x86/mm: Convert some slow-path static_cpu_has() callers to boot_cpu_has()
  x86: Convert some slow-path static_cpu_has() callers to boot_cpu_has()
  x86/asm: Clarify static_cpu_has()'s intended use
  x86/uaccess: Fix implicit cast of __user pointer
  x86/cpufeature: Remove __pure attribute to _static_cpu_has()
parents 80e77644 3855f11d
......@@ -116,7 +116,7 @@ $(obj)/%-x32.o: $(obj)/%.o FORCE
targets += vdsox32.lds $(vobjx32s-y)
$(obj)/%.so: OBJCOPYFLAGS := -S
$(obj)/%.so: $(obj)/%.so.dbg
$(obj)/%.so: $(obj)/%.so.dbg FORCE
$(call if_changed,objcopy)
$(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE
......
......@@ -7,7 +7,7 @@
static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
void *stripped_addr, size_t stripped_len,
FILE *outfile, const char *name)
FILE *outfile, const char *image_name)
{
int found_load = 0;
unsigned long load_size = -1; /* Work around bogus warning */
......@@ -93,11 +93,12 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
int k;
ELF(Sym) *sym = raw_addr + GET_LE(&symtab_hdr->sh_offset) +
GET_LE(&symtab_hdr->sh_entsize) * i;
const char *name = raw_addr + GET_LE(&strtab_hdr->sh_offset) +
GET_LE(&sym->st_name);
const char *sym_name = raw_addr +
GET_LE(&strtab_hdr->sh_offset) +
GET_LE(&sym->st_name);
for (k = 0; k < NSYMS; k++) {
if (!strcmp(name, required_syms[k].name)) {
if (!strcmp(sym_name, required_syms[k].name)) {
if (syms[k]) {
fail("duplicate symbol %s\n",
required_syms[k].name);
......@@ -134,7 +135,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
if (syms[sym_vvar_start] % 4096)
fail("vvar_begin must be a multiple of 4096\n");
if (!name) {
if (!image_name) {
fwrite(stripped_addr, stripped_len, 1, outfile);
return;
}
......@@ -157,7 +158,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
}
fprintf(outfile, "\n};\n\n");
fprintf(outfile, "const struct vdso_image %s = {\n", name);
fprintf(outfile, "const struct vdso_image %s = {\n", image_name);
fprintf(outfile, "\t.data = raw_data,\n");
fprintf(outfile, "\t.size = %lu,\n", mapping_size);
if (alt_sec) {
......
......@@ -156,11 +156,14 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
#else
/*
* Static testing of CPU features. Used the same as boot_cpu_has().
* These will statically patch the target code for additional
* performance.
* Static testing of CPU features. Used the same as boot_cpu_has(). It
* statically patches the target code for additional performance. Use
* static_cpu_has() only in fast paths, where every cycle counts. Which
* means that the boot_cpu_has() variant is already fast enough for the
* majority of cases and you should stick to using it as it is generally
* only two instructions: a RIP-relative MOV and a TEST.
*/
static __always_inline __pure bool _static_cpu_has(u16 bit)
static __always_inline bool _static_cpu_has(u16 bit)
{
asm_volatile_goto("1: jmp 6f\n"
"2:\n"
......
......@@ -253,7 +253,7 @@ static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
WARN_ON(system_state != SYSTEM_BOOTING);
if (static_cpu_has(X86_FEATURE_XSAVES))
if (boot_cpu_has(X86_FEATURE_XSAVES))
XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
else
XSTATE_OP(XSAVE, xstate, lmask, hmask, err);
......@@ -275,7 +275,7 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
WARN_ON(system_state != SYSTEM_BOOTING);
if (static_cpu_has(X86_FEATURE_XSAVES))
if (boot_cpu_has(X86_FEATURE_XSAVES))
XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
else
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
......@@ -497,8 +497,7 @@ static inline void fpregs_activate(struct fpu *fpu)
* - switch_fpu_finish() restores the new state as
* necessary.
*/
static inline void
switch_fpu_prepare(struct fpu *old_fpu, int cpu)
static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu)
{
if (static_cpu_has(X86_FEATURE_FPU) && old_fpu->initialized) {
if (!copy_fpregs_to_fpstate(old_fpu))
......
......@@ -14,6 +14,8 @@
* bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
*/
#include <asm/rmwcc.h>
#define ADDR (*(volatile long *)addr)
/**
......@@ -29,7 +31,7 @@
*/
static inline void sync_set_bit(long nr, volatile unsigned long *addr)
{
asm volatile("lock; bts %1,%0"
asm volatile("lock; " __ASM_SIZE(bts) " %1,%0"
: "+m" (ADDR)
: "Ir" (nr)
: "memory");
......@@ -47,7 +49,7 @@ static inline void sync_set_bit(long nr, volatile unsigned long *addr)
*/
static inline void sync_clear_bit(long nr, volatile unsigned long *addr)
{
asm volatile("lock; btr %1,%0"
asm volatile("lock; " __ASM_SIZE(btr) " %1,%0"
: "+m" (ADDR)
: "Ir" (nr)
: "memory");
......@@ -64,7 +66,7 @@ static inline void sync_clear_bit(long nr, volatile unsigned long *addr)
*/
static inline void sync_change_bit(long nr, volatile unsigned long *addr)
{
asm volatile("lock; btc %1,%0"
asm volatile("lock; " __ASM_SIZE(btc) " %1,%0"
: "+m" (ADDR)
: "Ir" (nr)
: "memory");
......@@ -78,14 +80,9 @@ static inline void sync_change_bit(long nr, volatile unsigned long *addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr)
static inline bool sync_test_and_set_bit(long nr, volatile unsigned long *addr)
{
unsigned char oldbit;
asm volatile("lock; bts %2,%1\n\tsetc %0"
: "=qm" (oldbit), "+m" (ADDR)
: "Ir" (nr) : "memory");
return oldbit;
return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(bts), *addr, c, "Ir", nr);
}
/**
......@@ -98,12 +95,7 @@ static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr)
*/
static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
{
unsigned char oldbit;
asm volatile("lock; btr %2,%1\n\tsetc %0"
: "=qm" (oldbit), "+m" (ADDR)
: "Ir" (nr) : "memory");
return oldbit;
return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(btr), *addr, c, "Ir", nr);
}
/**
......@@ -116,12 +108,7 @@ static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
*/
static inline int sync_test_and_change_bit(long nr, volatile unsigned long *addr)
{
unsigned char oldbit;
asm volatile("lock; btc %2,%1\n\tsetc %0"
: "=qm" (oldbit), "+m" (ADDR)
: "Ir" (nr) : "memory");
return oldbit;
return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(btc), *addr, c, "Ir", nr);
}
#define sync_test_bit(nr, addr) test_bit(nr, addr)
......
......@@ -586,7 +586,6 @@ extern void __cmpxchg_wrong_size(void)
#define __user_atomic_cmpxchg_inatomic(uval, ptr, old, new, size) \
({ \
int __ret = 0; \
__typeof__(ptr) __uval = (uval); \
__typeof__(*(ptr)) __old = (old); \
__typeof__(*(ptr)) __new = (new); \
__uaccess_begin_nospec(); \
......@@ -662,7 +661,7 @@ extern void __cmpxchg_wrong_size(void)
__cmpxchg_wrong_size(); \
} \
__uaccess_end(); \
*__uval = __old; \
*(uval) = __old; \
__ret; \
})
......
......@@ -175,7 +175,7 @@ static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
this_cpu_write(cpu_llc_id, node);
/* Account for nodes per socket in multi-core-module processors */
if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) {
rdmsrl(MSR_FAM10H_NODE_ID, val);
nodes = ((val >> 3) & 7) + 1;
}
......
......@@ -82,11 +82,14 @@ static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
* performance at the same time..
*/
#ifdef CONFIG_X86_32
extern __visible void vide(void);
__asm__(".globl vide\n"
__asm__(".text\n"
".globl vide\n"
".type vide, @function\n"
".align 4\n"
"vide: ret\n");
#endif
static void init_amd_k5(struct cpuinfo_x86 *c)
{
......
......@@ -83,7 +83,7 @@ unsigned int aperfmperf_get_khz(int cpu)
if (!cpu_khz)
return 0;
if (!static_cpu_has(X86_FEATURE_APERFMPERF))
if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
return 0;
aperfmperf_snapshot_cpu(cpu, ktime_get(), true);
......@@ -99,7 +99,7 @@ void arch_freq_prepare_all(void)
if (!cpu_khz)
return;
if (!static_cpu_has(X86_FEATURE_APERFMPERF))
if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
return;
for_each_online_cpu(cpu)
......@@ -115,7 +115,7 @@ unsigned int arch_freq_get_on_cpu(int cpu)
if (!cpu_khz)
return 0;
if (!static_cpu_has(X86_FEATURE_APERFMPERF))
if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
return 0;
if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true))
......
......@@ -1668,7 +1668,7 @@ static void setup_getcpu(int cpu)
unsigned long cpudata = vdso_encode_cpunode(cpu, early_cpu_to_node(cpu));
struct desc_struct d = { };
if (static_cpu_has(X86_FEATURE_RDTSCP))
if (boot_cpu_has(X86_FEATURE_RDTSCP))
write_rdtscp_aux(cpudata);
/* Store CPU and node number in limit. */
......
......@@ -528,7 +528,7 @@ static void do_inject(void)
* only on the node base core. Refer to D18F3x44[NbMcaToMstCpuEn] for
* Fam10h and later BKDGs.
*/
if (static_cpu_has(X86_FEATURE_AMD_DCM) &&
if (boot_cpu_has(X86_FEATURE_AMD_DCM) &&
b == 4 &&
boot_cpu_data.x86 < 0x17) {
toggle_nb_mca_mst_cpu(amd_get_nb_id(cpu));
......
......@@ -35,11 +35,11 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
"fpu_exception\t: %s\n"
"cpuid level\t: %d\n"
"wp\t\t: yes\n",
static_cpu_has_bug(X86_BUG_FDIV) ? "yes" : "no",
static_cpu_has_bug(X86_BUG_F00F) ? "yes" : "no",
static_cpu_has_bug(X86_BUG_COMA) ? "yes" : "no",
static_cpu_has(X86_FEATURE_FPU) ? "yes" : "no",
static_cpu_has(X86_FEATURE_FPU) ? "yes" : "no",
boot_cpu_has_bug(X86_BUG_FDIV) ? "yes" : "no",
boot_cpu_has_bug(X86_BUG_F00F) ? "yes" : "no",
boot_cpu_has_bug(X86_BUG_COMA) ? "yes" : "no",
boot_cpu_has(X86_FEATURE_FPU) ? "yes" : "no",
boot_cpu_has(X86_FEATURE_FPU) ? "yes" : "no",
c->cpuid_level);
}
#else
......
......@@ -716,6 +716,7 @@ NOKPROBE_SYMBOL(kprobe_int3_handler);
* calls trampoline_handler() runs, which calls the kretprobe's handler.
*/
asm(
".text\n"
".global kretprobe_trampoline\n"
".type kretprobe_trampoline, @function\n"
"kretprobe_trampoline:\n"
......
......@@ -113,7 +113,7 @@ static void do_sanity_check(struct mm_struct *mm,
* tables.
*/
WARN_ON(!had_kernel_mapping);
if (static_cpu_has(X86_FEATURE_PTI))
if (boot_cpu_has(X86_FEATURE_PTI))
WARN_ON(!had_user_mapping);
} else {
/*
......@@ -121,7 +121,7 @@ static void do_sanity_check(struct mm_struct *mm,
* Sync the pgd to the usermode tables.
*/
WARN_ON(had_kernel_mapping);
if (static_cpu_has(X86_FEATURE_PTI))
if (boot_cpu_has(X86_FEATURE_PTI))
WARN_ON(had_user_mapping);
}
}
......@@ -156,7 +156,7 @@ static void map_ldt_struct_to_user(struct mm_struct *mm)
k_pmd = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR);
u_pmd = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR);
if (static_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
set_pmd(u_pmd, *k_pmd);
}
......@@ -181,7 +181,7 @@ static void map_ldt_struct_to_user(struct mm_struct *mm)
{
pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR);
if (static_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
set_pgd(kernel_to_user_pgdp(pgd), *pgd);
}
......@@ -208,7 +208,7 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
spinlock_t *ptl;
int i, nr_pages;
if (!static_cpu_has(X86_FEATURE_PTI))
if (!boot_cpu_has(X86_FEATURE_PTI))
return 0;
/*
......@@ -271,7 +271,7 @@ static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt)
return;
/* LDT map/unmap is only required for PTI */
if (!static_cpu_has(X86_FEATURE_PTI))
if (!boot_cpu_has(X86_FEATURE_PTI))
return;
nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE);
......@@ -311,7 +311,7 @@ static void free_ldt_pgtables(struct mm_struct *mm)
unsigned long start = LDT_BASE_ADDR;
unsigned long end = LDT_END_ADDR;
if (!static_cpu_has(X86_FEATURE_PTI))
if (!boot_cpu_has(X86_FEATURE_PTI))
return;
tlb_gather_mmu(&tlb, mm, start, end);
......
......@@ -121,7 +121,7 @@ DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key);
void __init native_pv_lock_init(void)
{
if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
static_branch_disable(&virt_spin_lock_key);
}
......
......@@ -236,7 +236,7 @@ static int get_cpuid_mode(void)
static int set_cpuid_mode(struct task_struct *task, unsigned long cpuid_enabled)
{
if (!static_cpu_has(X86_FEATURE_CPUID_FAULT))
if (!boot_cpu_has(X86_FEATURE_CPUID_FAULT))
return -ENODEV;
if (cpuid_enabled)
......@@ -670,7 +670,7 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
if (c->x86_vendor != X86_VENDOR_INTEL)
return 0;
if (!cpu_has(c, X86_FEATURE_MWAIT) || static_cpu_has_bug(X86_BUG_MONITOR))
if (!cpu_has(c, X86_FEATURE_MWAIT) || boot_cpu_has_bug(X86_BUG_MONITOR))
return 0;
return 1;
......
......@@ -121,7 +121,7 @@ void __noreturn machine_real_restart(unsigned int type)
write_cr3(real_mode_header->trampoline_pgd);
/* Exiting long mode will fail if CR4.PCIDE is set. */
if (static_cpu_has(X86_FEATURE_PCID))
if (boot_cpu_has(X86_FEATURE_PCID))
cr4_clear_bits(X86_CR4_PCIDE);
#endif
......
......@@ -369,7 +369,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
preempt_disable();
tsk->thread.sp0 += 16;
if (static_cpu_has(X86_FEATURE_SEP)) {
if (boot_cpu_has(X86_FEATURE_SEP)) {
tsk->thread.sysenter_cs = 0;
refresh_sysenter_cs(&tsk->thread);
}
......
......@@ -6,6 +6,7 @@
asmlinkage void just_return_func(void);
asm(
".text\n"
".type just_return_func, @function\n"
".globl just_return_func\n"
"just_return_func:\n"
......
......@@ -578,7 +578,7 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user)
{
#ifdef CONFIG_PAGE_TABLE_ISOLATION
if (user && static_cpu_has(X86_FEATURE_PTI))
if (user && boot_cpu_has(X86_FEATURE_PTI))
pgd = kernel_to_user_pgdp(pgd);
#endif
ptdump_walk_pgd_level_core(m, pgd, false, false);
......@@ -591,7 +591,7 @@ void ptdump_walk_user_pgd_level_checkwx(void)
pgd_t *pgd = INIT_PGD;
if (!(__supported_pte_mask & _PAGE_NX) ||
!static_cpu_has(X86_FEATURE_PTI))
!boot_cpu_has(X86_FEATURE_PTI))
return;
pr_info("x86/mm: Checking user space page tables\n");
......
......@@ -190,7 +190,7 @@ static void pgd_dtor(pgd_t *pgd)
* when PTI is enabled. We need them to map the per-process LDT into the
* user-space page-table.
*/
#define PREALLOCATED_USER_PMDS (static_cpu_has(X86_FEATURE_PTI) ? \
#define PREALLOCATED_USER_PMDS (boot_cpu_has(X86_FEATURE_PTI) ? \
KERNEL_PGD_PTRS : 0)
#define MAX_PREALLOCATED_USER_PMDS KERNEL_PGD_PTRS
......@@ -292,7 +292,7 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
#ifdef CONFIG_PAGE_TABLE_ISOLATION
if (!static_cpu_has(X86_FEATURE_PTI))
if (!boot_cpu_has(X86_FEATURE_PTI))
return;
pgdp = kernel_to_user_pgdp(pgdp);
......
......@@ -628,7 +628,7 @@ static void pti_set_kernel_image_nonglobal(void)
*/
void __init pti_init(void)
{
if (!static_cpu_has(X86_FEATURE_PTI))
if (!boot_cpu_has(X86_FEATURE_PTI))
return;
pr_info("enabled\n");
......
......@@ -62,7 +62,7 @@ quiet_cmd_vdso = VDSO $@
-Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \
sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
VDSO_LDFLAGS = -fPIC -shared -Wl,--hash-style=sysv
GCOV_PROFILE := n
#
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment