Commit 6ea98b4b authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-alternatives-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 alternative instruction updates from Ingo Molnar:
 "Small RDTSCP opimization, enabled by the newly added ALTERNATIVE_3(),
  and other small improvements"

* 'x86-alternatives-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/TSC: Use RDTSCP
  x86/alternatives: Add an ALTERNATIVE_3() macro
  x86/alternatives: Print containing function
  x86/alternatives: Add macro comments
parents 45802da0 093ae8f9
...@@ -94,13 +94,12 @@ static inline int alternatives_text_reserved(void *start, void *end) ...@@ -94,13 +94,12 @@ static inline int alternatives_text_reserved(void *start, void *end)
#define alt_total_slen alt_end_marker"b-661b" #define alt_total_slen alt_end_marker"b-661b"
#define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f" #define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f"
#define __OLDINSTR(oldinstr, num) \ #define OLDINSTR(oldinstr, num) \
"# ALT: oldnstr\n" \
"661:\n\t" oldinstr "\n662:\n" \ "661:\n\t" oldinstr "\n662:\n" \
"# ALT: padding\n" \
".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \ ".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \
"((" alt_rlen(num) ")-(" alt_slen ")),0x90\n" "((" alt_rlen(num) ")-(" alt_slen ")),0x90\n" \
#define OLDINSTR(oldinstr, num) \
__OLDINSTR(oldinstr, num) \
alt_end_marker ":\n" alt_end_marker ":\n"
/* /*
...@@ -116,11 +115,23 @@ static inline int alternatives_text_reserved(void *start, void *end) ...@@ -116,11 +115,23 @@ static inline int alternatives_text_reserved(void *start, void *end)
* additionally longer than the first replacement alternative. * additionally longer than the first replacement alternative.
*/ */
#define OLDINSTR_2(oldinstr, num1, num2) \ #define OLDINSTR_2(oldinstr, num1, num2) \
"# ALT: oldinstr2\n" \
"661:\n\t" oldinstr "\n662:\n" \ "661:\n\t" oldinstr "\n662:\n" \
"# ALT: padding2\n" \
".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \ ".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \
"(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \ "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \
alt_end_marker ":\n" alt_end_marker ":\n"
#define OLDINSTR_3(oldinsn, n1, n2, n3) \
"# ALT: oldinstr3\n" \
"661:\n\t" oldinsn "\n662:\n" \
"# ALT: padding3\n" \
".skip -((" alt_max_short(alt_max_short(alt_rlen(n1), alt_rlen(n2)), alt_rlen(n3)) \
" - (" alt_slen ")) > 0) * " \
"(" alt_max_short(alt_max_short(alt_rlen(n1), alt_rlen(n2)), alt_rlen(n3)) \
" - (" alt_slen ")), 0x90\n" \
alt_end_marker ":\n"
#define ALTINSTR_ENTRY(feature, num) \ #define ALTINSTR_ENTRY(feature, num) \
" .long 661b - .\n" /* label */ \ " .long 661b - .\n" /* label */ \
" .long " b_replacement(num)"f - .\n" /* new instruction */ \ " .long " b_replacement(num)"f - .\n" /* new instruction */ \
...@@ -129,8 +140,9 @@ static inline int alternatives_text_reserved(void *start, void *end) ...@@ -129,8 +140,9 @@ static inline int alternatives_text_reserved(void *start, void *end)
" .byte " alt_rlen(num) "\n" /* replacement len */ \ " .byte " alt_rlen(num) "\n" /* replacement len */ \
" .byte " alt_pad_len "\n" /* pad len */ " .byte " alt_pad_len "\n" /* pad len */
#define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \ #define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \
b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n\t" "# ALT: replacement " #num "\n" \
b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n"
/* alternative assembly primitive: */ /* alternative assembly primitive: */
#define ALTERNATIVE(oldinstr, newinstr, feature) \ #define ALTERNATIVE(oldinstr, newinstr, feature) \
...@@ -153,6 +165,19 @@ static inline int alternatives_text_reserved(void *start, void *end) ...@@ -153,6 +165,19 @@ static inline int alternatives_text_reserved(void *start, void *end)
ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
".popsection\n" ".popsection\n"
#define ALTERNATIVE_3(oldinsn, newinsn1, feat1, newinsn2, feat2, newinsn3, feat3) \
OLDINSTR_3(oldinsn, 1, 2, 3) \
".pushsection .altinstructions,\"a\"\n" \
ALTINSTR_ENTRY(feat1, 1) \
ALTINSTR_ENTRY(feat2, 2) \
ALTINSTR_ENTRY(feat3, 3) \
".popsection\n" \
".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(newinsn1, feat1, 1) \
ALTINSTR_REPLACEMENT(newinsn2, feat2, 2) \
ALTINSTR_REPLACEMENT(newinsn3, feat3, 3) \
".popsection\n"
/* /*
* Alternative instructions for different CPU types or capabilities. * Alternative instructions for different CPU types or capabilities.
* *
......
...@@ -217,6 +217,8 @@ static __always_inline unsigned long long rdtsc(void) ...@@ -217,6 +217,8 @@ static __always_inline unsigned long long rdtsc(void)
*/ */
static __always_inline unsigned long long rdtsc_ordered(void) static __always_inline unsigned long long rdtsc_ordered(void)
{ {
DECLARE_ARGS(val, low, high);
/* /*
* The RDTSC instruction is not ordered relative to memory * The RDTSC instruction is not ordered relative to memory
* access. The Intel SDM and the AMD APM are both vague on this * access. The Intel SDM and the AMD APM are both vague on this
...@@ -227,9 +229,19 @@ static __always_inline unsigned long long rdtsc_ordered(void) ...@@ -227,9 +229,19 @@ static __always_inline unsigned long long rdtsc_ordered(void)
* ordering guarantees as reading from a global memory location * ordering guarantees as reading from a global memory location
* that some other imaginary CPU is updating continuously with a * that some other imaginary CPU is updating continuously with a
* time stamp. * time stamp.
*
* Thus, use the preferred barrier on the respective CPU, aiming for
* RDTSCP as the default.
*/ */
barrier_nospec(); asm volatile(ALTERNATIVE_3("rdtsc",
return rdtsc(); "mfence; rdtsc", X86_FEATURE_MFENCE_RDTSC,
"lfence; rdtsc", X86_FEATURE_LFENCE_RDTSC,
"rdtscp", X86_FEATURE_RDTSCP)
: EAX_EDX_RET(val, low, high)
/* RDTSCP clobbers ECX with MSR_TSC_AUX. */
:: "ecx");
return EAX_EDX_VAL(val, low, high);
} }
static inline unsigned long long native_read_pmc(int counter) static inline unsigned long long native_read_pmc(int counter)
......
...@@ -394,10 +394,10 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, ...@@ -394,10 +394,10 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
continue; continue;
} }
DPRINTK("feat: %d*32+%d, old: (%px len: %d), repl: (%px, len: %d), pad: %d", DPRINTK("feat: %d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d), pad: %d",
a->cpuid >> 5, a->cpuid >> 5,
a->cpuid & 0x1f, a->cpuid & 0x1f,
instr, a->instrlen, instr, instr, a->instrlen,
replacement, a->replacementlen, a->padlen); replacement, a->replacementlen, a->padlen);
DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr); DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment