Commit b6444bd0 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-boot-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 boot and percpu updates from Ingo Molnar:
 "This tree contains a bootable images documentation update plus three
  slightly misplaced x86/asm percpu changes/optimizations"

* 'x86-boot-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86-64: Use RIP-relative addressing for most per-CPU accesses
  x86-64: Handle PC-relative relocations on per-CPU data
  x86: Convert a few more per-CPU items to read-mostly ones
  x86, boot: Document intermediates more clearly
parents 9d0cf6f5 97b67ae5
...@@ -3,6 +3,18 @@ ...@@ -3,6 +3,18 @@
# #
# create a compressed vmlinux image from the original vmlinux # create a compressed vmlinux image from the original vmlinux
# #
# vmlinuz is:
# decompression code (*.o)
# asm globals (piggy.S), including:
# vmlinux.bin.(gz|bz2|lzma|...)
#
# vmlinux.bin is:
# vmlinux stripped of debugging and comments
# vmlinux.bin.all is:
# vmlinux.bin + vmlinux.relocs
# vmlinux.bin.(gz|bz2|lzma|...) is:
# (see scripts/Makefile.lib size_append)
# compressed vmlinux.bin.all + u32 size of vmlinux.bin.all
targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4 vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4
......
...@@ -260,7 +260,7 @@ static void handle_relocations(void *output, unsigned long output_len) ...@@ -260,7 +260,7 @@ static void handle_relocations(void *output, unsigned long output_len)
/* /*
* Process relocations: 32 bit relocations first then 64 bit after. * Process relocations: 32 bit relocations first then 64 bit after.
* Two sets of binary relocations are added to the end of the kernel * Three sets of binary relocations are added to the end of the kernel
* before compression. Each relocation table entry is the kernel * before compression. Each relocation table entry is the kernel
* address of the location which needs to be updated stored as a * address of the location which needs to be updated stored as a
* 32-bit value which is sign extended to 64 bits. * 32-bit value which is sign extended to 64 bits.
...@@ -270,6 +270,8 @@ static void handle_relocations(void *output, unsigned long output_len) ...@@ -270,6 +270,8 @@ static void handle_relocations(void *output, unsigned long output_len)
* kernel bits... * kernel bits...
* 0 - zero terminator for 64 bit relocations * 0 - zero terminator for 64 bit relocations
* 64 bit relocation repeated * 64 bit relocation repeated
* 0 - zero terminator for inverse 32 bit relocations
* 32 bit inverse relocation repeated
* 0 - zero terminator for 32 bit relocations * 0 - zero terminator for 32 bit relocations
* 32 bit relocation repeated * 32 bit relocation repeated
* *
...@@ -286,6 +288,16 @@ static void handle_relocations(void *output, unsigned long output_len) ...@@ -286,6 +288,16 @@ static void handle_relocations(void *output, unsigned long output_len)
*(uint32_t *)ptr += delta; *(uint32_t *)ptr += delta;
} }
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
while (*--reloc) {
long extended = *reloc;
extended += map;
ptr = (unsigned long)extended;
if (ptr < min_addr || ptr > max_addr)
error("inverse 32-bit relocation outside of kernel!\n");
*(int32_t *)ptr -= delta;
}
for (reloc--; *reloc; reloc--) { for (reloc--; *reloc; reloc--) {
long extended = *reloc; long extended = *reloc;
extended += map; extended += map;
......
...@@ -64,7 +64,7 @@ ...@@ -64,7 +64,7 @@
#define __percpu_prefix "" #define __percpu_prefix ""
#endif #endif
#define __percpu_arg(x) __percpu_prefix "%P" #x #define __percpu_arg(x) __percpu_prefix "%" #x
/* /*
* Initialized pointers to per-cpu variables needed for the boot * Initialized pointers to per-cpu variables needed for the boot
...@@ -179,29 +179,58 @@ do { \ ...@@ -179,29 +179,58 @@ do { \
} \ } \
} while (0) } while (0)
#define percpu_from_op(op, var, constraint) \ #define percpu_from_op(op, var) \
({ \ ({ \
typeof(var) pfo_ret__; \ typeof(var) pfo_ret__; \
switch (sizeof(var)) { \ switch (sizeof(var)) { \
case 1: \ case 1: \
asm(op "b "__percpu_arg(1)",%0" \ asm(op "b "__percpu_arg(1)",%0" \
: "=q" (pfo_ret__) \ : "=q" (pfo_ret__) \
: constraint); \ : "m" (var)); \
break; \ break; \
case 2: \ case 2: \
asm(op "w "__percpu_arg(1)",%0" \ asm(op "w "__percpu_arg(1)",%0" \
: "=r" (pfo_ret__) \ : "=r" (pfo_ret__) \
: constraint); \ : "m" (var)); \
break; \ break; \
case 4: \ case 4: \
asm(op "l "__percpu_arg(1)",%0" \ asm(op "l "__percpu_arg(1)",%0" \
: "=r" (pfo_ret__) \ : "=r" (pfo_ret__) \
: constraint); \ : "m" (var)); \
break; \ break; \
case 8: \ case 8: \
asm(op "q "__percpu_arg(1)",%0" \ asm(op "q "__percpu_arg(1)",%0" \
: "=r" (pfo_ret__) \ : "=r" (pfo_ret__) \
: constraint); \ : "m" (var)); \
break; \
default: __bad_percpu_size(); \
} \
pfo_ret__; \
})
#define percpu_stable_op(op, var) \
({ \
typeof(var) pfo_ret__; \
switch (sizeof(var)) { \
case 1: \
asm(op "b "__percpu_arg(P1)",%0" \
: "=q" (pfo_ret__) \
: "p" (&(var))); \
break; \
case 2: \
asm(op "w "__percpu_arg(P1)",%0" \
: "=r" (pfo_ret__) \
: "p" (&(var))); \
break; \
case 4: \
asm(op "l "__percpu_arg(P1)",%0" \
: "=r" (pfo_ret__) \
: "p" (&(var))); \
break; \
case 8: \
asm(op "q "__percpu_arg(P1)",%0" \
: "=r" (pfo_ret__) \
: "p" (&(var))); \
break; \ break; \
default: __bad_percpu_size(); \ default: __bad_percpu_size(); \
} \ } \
...@@ -359,11 +388,11 @@ do { \ ...@@ -359,11 +388,11 @@ do { \
* per-thread variables implemented as per-cpu variables and thus * per-thread variables implemented as per-cpu variables and thus
* stable for the duration of the respective task. * stable for the duration of the respective task.
*/ */
#define this_cpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var))) #define this_cpu_read_stable(var) percpu_stable_op("mov", var)
#define raw_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) #define raw_cpu_read_1(pcp) percpu_from_op("mov", pcp)
#define raw_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) #define raw_cpu_read_2(pcp) percpu_from_op("mov", pcp)
#define raw_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) #define raw_cpu_read_4(pcp) percpu_from_op("mov", pcp)
#define raw_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) #define raw_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
#define raw_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) #define raw_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
...@@ -381,9 +410,9 @@ do { \ ...@@ -381,9 +410,9 @@ do { \
#define raw_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val) #define raw_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val)
#define raw_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val) #define raw_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val)
#define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) #define this_cpu_read_1(pcp) percpu_from_op("mov", pcp)
#define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) #define this_cpu_read_2(pcp) percpu_from_op("mov", pcp)
#define this_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) #define this_cpu_read_4(pcp) percpu_from_op("mov", pcp)
#define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) #define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
#define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) #define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
#define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) #define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val)
...@@ -435,7 +464,7 @@ do { \ ...@@ -435,7 +464,7 @@ do { \
* 32 bit must fall back to generic operations. * 32 bit must fall back to generic operations.
*/ */
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
#define raw_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) #define raw_cpu_read_8(pcp) percpu_from_op("mov", pcp)
#define raw_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) #define raw_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
#define raw_cpu_add_8(pcp, val) percpu_add_op((pcp), val) #define raw_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
#define raw_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) #define raw_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
...@@ -444,7 +473,7 @@ do { \ ...@@ -444,7 +473,7 @@ do { \
#define raw_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) #define raw_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) #define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) #define this_cpu_read_8(pcp) percpu_from_op("mov", pcp)
#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) #define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) #define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) #define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
...@@ -522,7 +551,7 @@ static inline int x86_this_cpu_variable_test_bit(int nr, ...@@ -522,7 +551,7 @@ static inline int x86_this_cpu_variable_test_bit(int nr,
#include <asm-generic/percpu.h> #include <asm-generic/percpu.h>
/* We can use this directly for local CPU (faster). */ /* We can use this directly for local CPU (faster). */
DECLARE_PER_CPU(unsigned long, this_cpu_off); DECLARE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off);
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
......
...@@ -127,7 +127,7 @@ struct cpuinfo_x86 { ...@@ -127,7 +127,7 @@ struct cpuinfo_x86 {
/* Index into per_cpu list: */ /* Index into per_cpu list: */
u16 cpu_index; u16 cpu_index;
u32 microcode; u32 microcode;
} __attribute__((__aligned__(SMP_CACHE_BYTES))); };
#define X86_VENDOR_INTEL 0 #define X86_VENDOR_INTEL 0
#define X86_VENDOR_CYRIX 1 #define X86_VENDOR_CYRIX 1
...@@ -151,7 +151,7 @@ extern __u32 cpu_caps_cleared[NCAPINTS]; ...@@ -151,7 +151,7 @@ extern __u32 cpu_caps_cleared[NCAPINTS];
extern __u32 cpu_caps_set[NCAPINTS]; extern __u32 cpu_caps_set[NCAPINTS];
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
#define cpu_data(cpu) per_cpu(cpu_info, cpu) #define cpu_data(cpu) per_cpu(cpu_info, cpu)
#else #else
#define cpu_info boot_cpu_data #define cpu_info boot_cpu_data
......
...@@ -30,7 +30,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_number); ...@@ -30,7 +30,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_number);
#define BOOT_PERCPU_OFFSET 0 #define BOOT_PERCPU_OFFSET 0
#endif #endif
DEFINE_PER_CPU(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET; DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET;
EXPORT_PER_CPU_SYMBOL(this_cpu_off); EXPORT_PER_CPU_SYMBOL(this_cpu_off);
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = {
......
...@@ -99,7 +99,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_core_map); ...@@ -99,7 +99,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_core_map);
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
/* Per CPU bogomips and other parameters */ /* Per CPU bogomips and other parameters */
DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
EXPORT_PER_CPU_SYMBOL(cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info);
atomic_t init_deasserted; atomic_t init_deasserted;
......
...@@ -186,6 +186,8 @@ SECTIONS ...@@ -186,6 +186,8 @@ SECTIONS
* start another segment - init. * start another segment - init.
*/ */
PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu) PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu)
ASSERT(SIZEOF(.data..percpu) < CONFIG_PHYSICAL_START,
"per-CPU data too large - increase CONFIG_PHYSICAL_START")
#endif #endif
INIT_TEXT_SECTION(PAGE_SIZE) INIT_TEXT_SECTION(PAGE_SIZE)
......
...@@ -20,7 +20,10 @@ struct relocs { ...@@ -20,7 +20,10 @@ struct relocs {
static struct relocs relocs16; static struct relocs relocs16;
static struct relocs relocs32; static struct relocs relocs32;
#if ELF_BITS == 64
static struct relocs relocs32neg;
static struct relocs relocs64; static struct relocs relocs64;
#endif
struct section { struct section {
Elf_Shdr shdr; Elf_Shdr shdr;
...@@ -762,11 +765,16 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym, ...@@ -762,11 +765,16 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
switch (r_type) { switch (r_type) {
case R_X86_64_NONE: case R_X86_64_NONE:
/* NONE can be ignored. */
break;
case R_X86_64_PC32: case R_X86_64_PC32:
/* /*
* NONE can be ignored and PC relative relocations don't * PC relative relocations don't need to be adjusted unless
* need to be adjusted. * referencing a percpu symbol.
*/ */
if (is_percpu_sym(sym, symname))
add_reloc(&relocs32neg, offset);
break; break;
case R_X86_64_32: case R_X86_64_32:
...@@ -986,7 +994,10 @@ static void emit_relocs(int as_text, int use_real_mode) ...@@ -986,7 +994,10 @@ static void emit_relocs(int as_text, int use_real_mode)
/* Order the relocations for more efficient processing */ /* Order the relocations for more efficient processing */
sort_relocs(&relocs16); sort_relocs(&relocs16);
sort_relocs(&relocs32); sort_relocs(&relocs32);
#if ELF_BITS == 64
sort_relocs(&relocs32neg);
sort_relocs(&relocs64); sort_relocs(&relocs64);
#endif
/* Print the relocations */ /* Print the relocations */
if (as_text) { if (as_text) {
...@@ -1007,14 +1018,21 @@ static void emit_relocs(int as_text, int use_real_mode) ...@@ -1007,14 +1018,21 @@ static void emit_relocs(int as_text, int use_real_mode)
for (i = 0; i < relocs32.count; i++) for (i = 0; i < relocs32.count; i++)
write_reloc(relocs32.offset[i], stdout); write_reloc(relocs32.offset[i], stdout);
} else { } else {
if (ELF_BITS == 64) { #if ELF_BITS == 64
/* Print a stop */ /* Print a stop */
write_reloc(0, stdout); write_reloc(0, stdout);
/* Now print each relocation */ /* Now print each relocation */
for (i = 0; i < relocs64.count; i++) for (i = 0; i < relocs64.count; i++)
write_reloc(relocs64.offset[i], stdout); write_reloc(relocs64.offset[i], stdout);
}
/* Print a stop */
write_reloc(0, stdout);
/* Now print each inverse 32-bit relocation */
for (i = 0; i < relocs32neg.count; i++)
write_reloc(relocs32neg.offset[i], stdout);
#endif
/* Print a stop */ /* Print a stop */
write_reloc(0, stdout); write_reloc(0, stdout);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment