Commit 98a96f20 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 vdso updates from Ingo Molnar:
 "Further simplifications and improvements to the VDSO code, by Andy
  Lutomirski"

* 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86_64/vsyscall: Fix warn_bad_vsyscall log output
  x86/vdso: Set VM_MAYREAD for the vvar vma
  x86, vdso: Get rid of the fake section mechanism
  x86, vdso: Move the vvar area before the vdso text
parents 5637a2a3 53b884ac
...@@ -18,15 +18,15 @@ struct vdso_image { ...@@ -18,15 +18,15 @@ struct vdso_image {
unsigned long alt, alt_len; unsigned long alt, alt_len;
unsigned long sym_end_mapping; /* Total size of the mapping */ long sym_vvar_start; /* Negative offset to the vvar area */
unsigned long sym_vvar_page; long sym_vvar_page;
unsigned long sym_hpet_page; long sym_hpet_page;
unsigned long sym_VDSO32_NOTE_MASK; long sym_VDSO32_NOTE_MASK;
unsigned long sym___kernel_sigreturn; long sym___kernel_sigreturn;
unsigned long sym___kernel_rt_sigreturn; long sym___kernel_rt_sigreturn;
unsigned long sym___kernel_vsyscall; long sym___kernel_vsyscall;
unsigned long sym_VDSO32_SYSENTER_RETURN; long sym_VDSO32_SYSENTER_RETURN;
}; };
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
......
...@@ -81,10 +81,10 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, ...@@ -81,10 +81,10 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
if (!show_unhandled_signals) if (!show_unhandled_signals)
return; return;
pr_notice_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", printk_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
level, current->comm, task_pid_nr(current), level, current->comm, task_pid_nr(current),
message, regs->ip, regs->cs, message, regs->ip, regs->cs,
regs->sp, regs->ax, regs->si, regs->di); regs->sp, regs->ax, regs->si, regs->di);
} }
static int addr_to_vsyscall_nr(unsigned long addr) static int addr_to_vsyscall_nr(unsigned long addr)
......
...@@ -10,7 +10,7 @@ VDSO32-$(CONFIG_X86_32) := y ...@@ -10,7 +10,7 @@ VDSO32-$(CONFIG_X86_32) := y
VDSO32-$(CONFIG_COMPAT) := y VDSO32-$(CONFIG_COMPAT) := y
# files to link into the vdso # files to link into the vdso
vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vdso-fakesections.o vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
# files to link into kernel # files to link into kernel
obj-y += vma.o obj-y += vma.o
...@@ -37,7 +37,8 @@ vdso_img_sodbg := $(vdso_img-y:%=vdso%.so.dbg) ...@@ -37,7 +37,8 @@ vdso_img_sodbg := $(vdso_img-y:%=vdso%.so.dbg)
obj-y += $(vdso_img_objs) obj-y += $(vdso_img_objs)
targets += $(vdso_img_cfiles) targets += $(vdso_img_cfiles)
targets += $(vdso_img_sodbg) targets += $(vdso_img_sodbg)
.SECONDARY: $(vdso_img-y:%=$(obj)/vdso-image-%.c) .SECONDARY: $(vdso_img-y:%=$(obj)/vdso-image-%.c) \
$(vdso_img-y:%=$(obj)/vdso%.so)
export CPPFLAGS_vdso.lds += -P -C export CPPFLAGS_vdso.lds += -P -C
...@@ -54,10 +55,10 @@ hostprogs-y += vdso2c ...@@ -54,10 +55,10 @@ hostprogs-y += vdso2c
quiet_cmd_vdso2c = VDSO2C $@ quiet_cmd_vdso2c = VDSO2C $@
define cmd_vdso2c define cmd_vdso2c
$(obj)/vdso2c $< $@ $(obj)/vdso2c $< $(<:%.dbg=%) $@
endef endef
$(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso2c FORCE $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE
$(call if_changed,vdso2c) $(call if_changed,vdso2c)
# #
...@@ -113,6 +114,10 @@ $(obj)/%-x32.o: $(obj)/%.o FORCE ...@@ -113,6 +114,10 @@ $(obj)/%-x32.o: $(obj)/%.o FORCE
targets += vdsox32.lds $(vobjx32s-y) targets += vdsox32.lds $(vobjx32s-y)
$(obj)/%.so: OBJCOPYFLAGS := -S
$(obj)/%.so: $(obj)/%.so.dbg
$(call if_changed,objcopy)
$(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE $(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE
$(call if_changed,vdso) $(call if_changed,vdso)
...@@ -134,7 +139,7 @@ override obj-dirs = $(dir $(obj)) $(obj)/vdso32/ ...@@ -134,7 +139,7 @@ override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
targets += vdso32/vdso32.lds targets += vdso32/vdso32.lds
targets += vdso32/note.o vdso32/vclock_gettime.o $(vdso32.so-y:%=vdso32/%.o) targets += vdso32/note.o vdso32/vclock_gettime.o $(vdso32.so-y:%=vdso32/%.o)
targets += vdso32/vclock_gettime.o vdso32/vdso-fakesections.o targets += vdso32/vclock_gettime.o
$(obj)/vdso32.o: $(vdso32-images:%=$(obj)/%) $(obj)/vdso32.o: $(vdso32-images:%=$(obj)/%)
...@@ -156,7 +161,6 @@ $(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) ...@@ -156,7 +161,6 @@ $(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
$(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \ $(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
$(obj)/vdso32/vdso32.lds \ $(obj)/vdso32/vdso32.lds \
$(obj)/vdso32/vclock_gettime.o \ $(obj)/vdso32/vclock_gettime.o \
$(obj)/vdso32/vdso-fakesections.o \
$(obj)/vdso32/note.o \ $(obj)/vdso32/note.o \
$(obj)/vdso32/%.o $(obj)/vdso32/%.o
$(call if_changed,vdso) $(call if_changed,vdso)
......
/*
* Copyright 2014 Andy Lutomirski
* Subject to the GNU Public License, v.2
*
* String table for loadable section headers. See vdso2c.h for why
* this exists.
*/
const char fake_shstrtab[] __attribute__((section(".fake_shstrtab"))) =
".hash\0"
".dynsym\0"
".dynstr\0"
".gnu.version\0"
".gnu.version_d\0"
".dynamic\0"
".rodata\0"
".fake_shstrtab\0" /* Yay, self-referential code. */
".note\0"
".eh_frame_hdr\0"
".eh_frame\0"
".text";
...@@ -18,6 +18,25 @@ ...@@ -18,6 +18,25 @@
SECTIONS SECTIONS
{ {
/*
* User/kernel shared data is before the vDSO. This may be a little
* uglier than putting it after the vDSO, but it avoids issues with
* non-allocatable things that dangle past the end of the PT_LOAD
* segment.
*/
vvar_start = . - 2 * PAGE_SIZE;
vvar_page = vvar_start;
/* Place all vvars at the offsets in asm/vvar.h. */
#define EMIT_VVAR(name, offset) vvar_ ## name = vvar_page + offset;
#define __VVAR_KERNEL_LDS
#include <asm/vvar.h>
#undef __VVAR_KERNEL_LDS
#undef EMIT_VVAR
hpet_page = vvar_start + PAGE_SIZE;
. = SIZEOF_HEADERS; . = SIZEOF_HEADERS;
.hash : { *(.hash) } :text .hash : { *(.hash) } :text
...@@ -74,31 +93,6 @@ SECTIONS ...@@ -74,31 +93,6 @@ SECTIONS
.altinstructions : { *(.altinstructions) } :text .altinstructions : { *(.altinstructions) } :text
.altinstr_replacement : { *(.altinstr_replacement) } :text .altinstr_replacement : { *(.altinstr_replacement) } :text
/*
* The remainder of the vDSO consists of special pages that are
* shared between the kernel and userspace. It needs to be at the
* end so that it doesn't overlap the mapping of the actual
* vDSO image.
*/
. = ALIGN(PAGE_SIZE);
vvar_page = .;
/* Place all vvars at the offsets in asm/vvar.h. */
#define EMIT_VVAR(name, offset) vvar_ ## name = vvar_page + offset;
#define __VVAR_KERNEL_LDS
#include <asm/vvar.h>
#undef __VVAR_KERNEL_LDS
#undef EMIT_VVAR
. = vvar_page + PAGE_SIZE;
hpet_page = .;
. = . + PAGE_SIZE;
. = ALIGN(PAGE_SIZE);
end_mapping = .;
/DISCARD/ : { /DISCARD/ : {
*(.discard) *(.discard)
*(.discard.*) *(.discard.*)
......
/*
* vdso2c - A vdso image preparation tool
* Copyright (c) 2014 Andy Lutomirski and others
* Licensed under the GPL v2
*
* vdso2c requires stripped and unstripped input. It would be trivial
* to fully strip the input in here, but, for reasons described below,
* we need to write a section table. Doing this is more or less
* equivalent to dropping all non-allocatable sections, but it's
* easier to let objcopy handle that instead of doing it ourselves.
* If we ever need to do something fancier than what objcopy provides,
* it would be straightforward to add here.
*
* We're keep a section table for a few reasons:
*
* The Go runtime had a couple of bugs: it would read the section
* table to try to figure out how many dynamic symbols there were (it
* shouldn't have looked at the section table at all) and, if there
* were no SHT_SYNDYM section table entry, it would use an
* uninitialized value for the number of symbols. An empty DYNSYM
* table would work, but I see no reason not to write a valid one (and
* keep full performance for old Go programs). This hack is only
* needed on x86_64.
*
* The bug was introduced on 2012-08-31 by:
* https://code.google.com/p/go/source/detail?r=56ea40aac72b
* and was fixed on 2014-06-13 by:
* https://code.google.com/p/go/source/detail?r=fc1cd5e12595
*
* Binutils has issues debugging the vDSO: it reads the section table to
* find SHT_NOTE; it won't look at PT_NOTE for the in-memory vDSO, which
* would break build-id if we removed the section table. Binutils
* also requires that shstrndx != 0. See:
* https://sourceware.org/bugzilla/show_bug.cgi?id=17064
*
* elfutils might not look for PT_NOTE if there is a section table at
* all. I don't know whether this matters for any practical purpose.
*
* For simplicity, rather than hacking up a partial section table, we
* just write a mostly complete one. We omit non-dynamic symbols,
* though, since they're rather large.
*
* Once binutils gets fixed, we might be able to drop this for all but
* the 64-bit vdso, since build-id only works in kernel RPMs, and
* systems that update to new enough kernel RPMs will likely update
* binutils in sync. build-id has never worked for home-built kernel
* RPMs without manual symlinking, and I suspect that no one ever does
* that.
*/
#include <inttypes.h> #include <inttypes.h>
#include <stdint.h> #include <stdint.h>
#include <unistd.h> #include <unistd.h>
...@@ -20,9 +70,9 @@ const char *outfilename; ...@@ -20,9 +70,9 @@ const char *outfilename;
/* Symbols that we need in vdso2c. */ /* Symbols that we need in vdso2c. */
enum { enum {
sym_vvar_start,
sym_vvar_page, sym_vvar_page,
sym_hpet_page, sym_hpet_page,
sym_end_mapping,
sym_VDSO_FAKE_SECTION_TABLE_START, sym_VDSO_FAKE_SECTION_TABLE_START,
sym_VDSO_FAKE_SECTION_TABLE_END, sym_VDSO_FAKE_SECTION_TABLE_END,
}; };
...@@ -38,9 +88,9 @@ struct vdso_sym { ...@@ -38,9 +88,9 @@ struct vdso_sym {
}; };
struct vdso_sym required_syms[] = { struct vdso_sym required_syms[] = {
[sym_vvar_start] = {"vvar_start", true},
[sym_vvar_page] = {"vvar_page", true}, [sym_vvar_page] = {"vvar_page", true},
[sym_hpet_page] = {"hpet_page", true}, [sym_hpet_page] = {"hpet_page", true},
[sym_end_mapping] = {"end_mapping", true},
[sym_VDSO_FAKE_SECTION_TABLE_START] = { [sym_VDSO_FAKE_SECTION_TABLE_START] = {
"VDSO_FAKE_SECTION_TABLE_START", false "VDSO_FAKE_SECTION_TABLE_START", false
}, },
...@@ -61,7 +111,8 @@ static void fail(const char *format, ...) ...@@ -61,7 +111,8 @@ static void fail(const char *format, ...)
va_start(ap, format); va_start(ap, format);
fprintf(stderr, "Error: "); fprintf(stderr, "Error: ");
vfprintf(stderr, format, ap); vfprintf(stderr, format, ap);
unlink(outfilename); if (outfilename)
unlink(outfilename);
exit(1); exit(1);
va_end(ap); va_end(ap);
} }
...@@ -96,9 +147,11 @@ extern void bad_put_le(void); ...@@ -96,9 +147,11 @@ extern void bad_put_le(void);
#define NSYMS (sizeof(required_syms) / sizeof(required_syms[0])) #define NSYMS (sizeof(required_syms) / sizeof(required_syms[0]))
#define BITSFUNC3(name, bits) name##bits #define BITSFUNC3(name, bits, suffix) name##bits##suffix
#define BITSFUNC2(name, bits) BITSFUNC3(name, bits) #define BITSFUNC2(name, bits, suffix) BITSFUNC3(name, bits, suffix)
#define BITSFUNC(name) BITSFUNC2(name, ELF_BITS) #define BITSFUNC(name) BITSFUNC2(name, ELF_BITS, )
#define INT_BITS BITSFUNC2(int, ELF_BITS, _t)
#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x #define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x
#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x) #define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x)
...@@ -112,30 +165,53 @@ extern void bad_put_le(void); ...@@ -112,30 +165,53 @@ extern void bad_put_le(void);
#include "vdso2c.h" #include "vdso2c.h"
#undef ELF_BITS #undef ELF_BITS
static void go(void *addr, size_t len, FILE *outfile, const char *name) static void go(void *raw_addr, size_t raw_len,
void *stripped_addr, size_t stripped_len,
FILE *outfile, const char *name)
{ {
Elf64_Ehdr *hdr = (Elf64_Ehdr *)addr; Elf64_Ehdr *hdr = (Elf64_Ehdr *)raw_addr;
if (hdr->e_ident[EI_CLASS] == ELFCLASS64) { if (hdr->e_ident[EI_CLASS] == ELFCLASS64) {
go64(addr, len, outfile, name); go64(raw_addr, raw_len, stripped_addr, stripped_len,
outfile, name);
} else if (hdr->e_ident[EI_CLASS] == ELFCLASS32) { } else if (hdr->e_ident[EI_CLASS] == ELFCLASS32) {
go32(addr, len, outfile, name); go32(raw_addr, raw_len, stripped_addr, stripped_len,
outfile, name);
} else { } else {
fail("unknown ELF class\n"); fail("unknown ELF class\n");
} }
} }
static void map_input(const char *name, void **addr, size_t *len, int prot)
{
off_t tmp_len;
int fd = open(name, O_RDONLY);
if (fd == -1)
err(1, "%s", name);
tmp_len = lseek(fd, 0, SEEK_END);
if (tmp_len == (off_t)-1)
err(1, "lseek");
*len = (size_t)tmp_len;
*addr = mmap(NULL, tmp_len, prot, MAP_PRIVATE, fd, 0);
if (*addr == MAP_FAILED)
err(1, "mmap");
close(fd);
}
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
int fd; size_t raw_len, stripped_len;
off_t len; void *raw_addr, *stripped_addr;
void *addr;
FILE *outfile; FILE *outfile;
char *name, *tmp; char *name, *tmp;
int namelen; int namelen;
if (argc != 3) { if (argc != 4) {
printf("Usage: vdso2c INPUT OUTPUT\n"); printf("Usage: vdso2c RAW_INPUT STRIPPED_INPUT OUTPUT\n");
return 1; return 1;
} }
...@@ -143,7 +219,7 @@ int main(int argc, char **argv) ...@@ -143,7 +219,7 @@ int main(int argc, char **argv)
* Figure out the struct name. If we're writing to a .so file, * Figure out the struct name. If we're writing to a .so file,
* generate raw output insted. * generate raw output insted.
*/ */
name = strdup(argv[2]); name = strdup(argv[3]);
namelen = strlen(name); namelen = strlen(name);
if (namelen >= 3 && !strcmp(name + namelen - 3, ".so")) { if (namelen >= 3 && !strcmp(name + namelen - 3, ".so")) {
name = NULL; name = NULL;
...@@ -159,26 +235,18 @@ int main(int argc, char **argv) ...@@ -159,26 +235,18 @@ int main(int argc, char **argv)
*tmp = '_'; *tmp = '_';
} }
fd = open(argv[1], O_RDONLY); map_input(argv[1], &raw_addr, &raw_len, PROT_READ);
if (fd == -1) map_input(argv[2], &stripped_addr, &stripped_len, PROT_READ);
err(1, "%s", argv[1]);
len = lseek(fd, 0, SEEK_END);
if (len == (off_t)-1)
err(1, "lseek");
addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
if (addr == MAP_FAILED)
err(1, "mmap");
outfilename = argv[2]; outfilename = argv[3];
outfile = fopen(outfilename, "w"); outfile = fopen(outfilename, "w");
if (!outfile) if (!outfile)
err(1, "%s", argv[2]); err(1, "%s", argv[2]);
go(addr, (size_t)len, outfile, name); go(raw_addr, raw_len, stripped_addr, stripped_len, outfile, name);
munmap(addr, len); munmap(raw_addr, raw_len);
munmap(stripped_addr, stripped_len);
fclose(outfile); fclose(outfile);
return 0; return 0;
......
This diff is collapsed.
...@@ -93,7 +93,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr) ...@@ -93,7 +93,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
{ {
struct mm_struct *mm = current->mm; struct mm_struct *mm = current->mm;
struct vm_area_struct *vma; struct vm_area_struct *vma;
unsigned long addr; unsigned long addr, text_start;
int ret = 0; int ret = 0;
static struct page *no_pages[] = {NULL}; static struct page *no_pages[] = {NULL};
static struct vm_special_mapping vvar_mapping = { static struct vm_special_mapping vvar_mapping = {
...@@ -103,26 +103,28 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr) ...@@ -103,26 +103,28 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
if (calculate_addr) { if (calculate_addr) {
addr = vdso_addr(current->mm->start_stack, addr = vdso_addr(current->mm->start_stack,
image->sym_end_mapping); image->size - image->sym_vvar_start);
} else { } else {
addr = 0; addr = 0;
} }
down_write(&mm->mmap_sem); down_write(&mm->mmap_sem);
addr = get_unmapped_area(NULL, addr, image->sym_end_mapping, 0, 0); addr = get_unmapped_area(NULL, addr,
image->size - image->sym_vvar_start, 0, 0);
if (IS_ERR_VALUE(addr)) { if (IS_ERR_VALUE(addr)) {
ret = addr; ret = addr;
goto up_fail; goto up_fail;
} }
current->mm->context.vdso = (void __user *)addr; text_start = addr - image->sym_vvar_start;
current->mm->context.vdso = (void __user *)text_start;
/* /*
* MAYWRITE to allow gdb to COW and set breakpoints * MAYWRITE to allow gdb to COW and set breakpoints
*/ */
vma = _install_special_mapping(mm, vma = _install_special_mapping(mm,
addr, text_start,
image->size, image->size,
VM_READ|VM_EXEC| VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
...@@ -134,9 +136,9 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr) ...@@ -134,9 +136,9 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
} }
vma = _install_special_mapping(mm, vma = _install_special_mapping(mm,
addr + image->size, addr,
image->sym_end_mapping - image->size, -image->sym_vvar_start,
VM_READ, VM_READ|VM_MAYREAD,
&vvar_mapping); &vvar_mapping);
if (IS_ERR(vma)) { if (IS_ERR(vma)) {
...@@ -146,7 +148,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr) ...@@ -146,7 +148,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
if (image->sym_vvar_page) if (image->sym_vvar_page)
ret = remap_pfn_range(vma, ret = remap_pfn_range(vma,
addr + image->sym_vvar_page, text_start + image->sym_vvar_page,
__pa_symbol(&__vvar_page) >> PAGE_SHIFT, __pa_symbol(&__vvar_page) >> PAGE_SHIFT,
PAGE_SIZE, PAGE_SIZE,
PAGE_READONLY); PAGE_READONLY);
...@@ -157,7 +159,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr) ...@@ -157,7 +159,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
#ifdef CONFIG_HPET_TIMER #ifdef CONFIG_HPET_TIMER
if (hpet_address && image->sym_hpet_page) { if (hpet_address && image->sym_hpet_page) {
ret = io_remap_pfn_range(vma, ret = io_remap_pfn_range(vma,
addr + image->sym_hpet_page, text_start + image->sym_hpet_page,
hpet_address >> PAGE_SHIFT, hpet_address >> PAGE_SHIFT,
PAGE_SIZE, PAGE_SIZE,
pgprot_noncached(PAGE_READONLY)); pgprot_noncached(PAGE_READONLY));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment