Commit 7b2d81d4 authored by Ingo Molnar's avatar Ingo Molnar

uprobes/core: Clean up, refactor and improve the code

Make the uprobes code readable to me:

 - improve the Kconfig text so that a mere mortal gets some idea
   what CONFIG_UPROBES=y is really about

 - do trivial renames to standardize around the uprobes_*() namespace

 - clean up and simplify various code flow details

 - separate basic blocks of functionality

 - line break artifact and white space related removal

 - use standard local varible definition blocks

 - use vertical spacing to make things more readable

 - remove unnecessary volatile

 - restructure comment blocks to make them more uniform and
   more readable in general

Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Jim Keniston <jkenisto@us.ibm.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Anton Arapov <anton@redhat.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Link: http://lkml.kernel.org/n/tip-ewbwhb8o6navvllsauu7k07p@git.kernel.orgSigned-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 2b144498
...@@ -66,13 +66,19 @@ config OPTPROBES ...@@ -66,13 +66,19 @@ config OPTPROBES
depends on !PREEMPT depends on !PREEMPT
config UPROBES config UPROBES
bool "User-space probes (EXPERIMENTAL)" bool "Transparent user-space probes (EXPERIMENTAL)"
depends on ARCH_SUPPORTS_UPROBES depends on ARCH_SUPPORTS_UPROBES
default n default n
help help
Uprobes enables kernel subsystems to establish probepoints Uprobes is the user-space counterpart to kprobes: they
in user applications and execute handler functions when enable instrumentation applications (such as 'perf probe')
the probepoints are hit. to establish unintrusive probes in user-space binaries and
libraries, by executing handler functions when the probes
are hit by user-space applications.
( These probes come in the form of single-byte breakpoints,
managed by the kernel and kept transparent to the probed
application. )
If in doubt, say "N". If in doubt, say "N".
......
#ifndef _ASM_UPROBES_H #ifndef _ASM_UPROBES_H
#define _ASM_UPROBES_H #define _ASM_UPROBES_H
/* /*
* Userspace Probes (UProbes) for x86 * User-space Probes (UProbes) for x86
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
*/ */
typedef u8 uprobe_opcode_t; typedef u8 uprobe_opcode_t;
#define MAX_UINSN_BYTES 16 #define MAX_UINSN_BYTES 16
#define UPROBES_XOL_SLOT_BYTES 128 /* to keep it cache aligned */ #define UPROBES_XOL_SLOT_BYTES 128 /* to keep it cache aligned */
...@@ -38,5 +39,5 @@ struct uprobe_arch_info { ...@@ -38,5 +39,5 @@ struct uprobe_arch_info {
}; };
struct uprobe; struct uprobe;
extern int analyze_insn(struct mm_struct *mm, struct uprobe *uprobe); extern int arch_uprobes_analyze_insn(struct mm_struct *mm, struct uprobe *uprobe);
#endif /* _ASM_UPROBES_H */ #endif /* _ASM_UPROBES_H */
/* /*
* Userspace Probes (UProbes) for x86 * User-space Probes (UProbes) for x86
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
...@@ -20,7 +20,6 @@ ...@@ -20,7 +20,6 @@
* Srikar Dronamraju * Srikar Dronamraju
* Jim Keniston * Jim Keniston
*/ */
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/ptrace.h> #include <linux/ptrace.h>
...@@ -55,7 +54,7 @@ ...@@ -55,7 +54,7 @@
<< (row % 32)) << (row % 32))
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
static volatile u32 good_insns_64[256 / 32] = { static u32 good_insns_64[256 / 32] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
/* ---------------------------------------------- */ /* ---------------------------------------------- */
W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 00 */ W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 00 */
...@@ -81,7 +80,7 @@ static volatile u32 good_insns_64[256 / 32] = { ...@@ -81,7 +80,7 @@ static volatile u32 good_insns_64[256 / 32] = {
/* Good-instruction tables for 32-bit apps */ /* Good-instruction tables for 32-bit apps */
static volatile u32 good_insns_32[256 / 32] = { static u32 good_insns_32[256 / 32] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
/* ---------------------------------------------- */ /* ---------------------------------------------- */
W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) | /* 00 */ W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) | /* 00 */
...@@ -105,7 +104,7 @@ static volatile u32 good_insns_32[256 / 32] = { ...@@ -105,7 +104,7 @@ static volatile u32 good_insns_32[256 / 32] = {
}; };
/* Using this for both 64-bit and 32-bit apps */ /* Using this for both 64-bit and 32-bit apps */
static volatile u32 good_2byte_insns[256 / 32] = { static u32 good_2byte_insns[256 / 32] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
/* ---------------------------------------------- */ /* ---------------------------------------------- */
W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */ W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */
...@@ -132,6 +131,7 @@ static volatile u32 good_2byte_insns[256 / 32] = { ...@@ -132,6 +131,7 @@ static volatile u32 good_2byte_insns[256 / 32] = {
/* /*
* opcodes we'll probably never support: * opcodes we'll probably never support:
*
* 6c-6d, e4-e5, ec-ed - in * 6c-6d, e4-e5, ec-ed - in
* 6e-6f, e6-e7, ee-ef - out * 6e-6f, e6-e7, ee-ef - out
* cc, cd - int3, int * cc, cd - int3, int
...@@ -143,24 +143,28 @@ static volatile u32 good_2byte_insns[256 / 32] = { ...@@ -143,24 +143,28 @@ static volatile u32 good_2byte_insns[256 / 32] = {
* 0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2 * 0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2
* *
* invalid opcodes in 64-bit mode: * invalid opcodes in 64-bit mode:
* 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5
* *
* 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5
* 63 - we support this opcode in x86_64 but not in i386. * 63 - we support this opcode in x86_64 but not in i386.
* *
* opcodes we may need to refine support for: * opcodes we may need to refine support for:
*
* 0f - 2-byte instructions: For many of these instructions, the validity * 0f - 2-byte instructions: For many of these instructions, the validity
* depends on the prefix and/or the reg field. On such instructions, we * depends on the prefix and/or the reg field. On such instructions, we
* just consider the opcode combination valid if it corresponds to any * just consider the opcode combination valid if it corresponds to any
* valid instruction. * valid instruction.
*
* 8f - Group 1 - only reg = 0 is OK * 8f - Group 1 - only reg = 0 is OK
* c6-c7 - Group 11 - only reg = 0 is OK * c6-c7 - Group 11 - only reg = 0 is OK
* d9-df - fpu insns with some illegal encodings * d9-df - fpu insns with some illegal encodings
* f2, f3 - repnz, repz prefixes. These are also the first byte for * f2, f3 - repnz, repz prefixes. These are also the first byte for
* certain floating-point instructions, such as addsd. * certain floating-point instructions, such as addsd.
*
* fe - Group 4 - only reg = 0 or 1 is OK * fe - Group 4 - only reg = 0 or 1 is OK
* ff - Group 5 - only reg = 0-6 is OK * ff - Group 5 - only reg = 0-6 is OK
* *
* others -- Do we need to support these? * others -- Do we need to support these?
*
* 0f - (floating-point?) prefetch instructions * 0f - (floating-point?) prefetch instructions
* 07, 17, 1f - pop es, pop ss, pop ds * 07, 17, 1f - pop es, pop ss, pop ds
* 26, 2e, 36, 3e - es:, cs:, ss:, ds: segment prefixes -- * 26, 2e, 36, 3e - es:, cs:, ss:, ds: segment prefixes --
...@@ -182,11 +186,11 @@ static bool is_prefix_bad(struct insn *insn) ...@@ -182,11 +186,11 @@ static bool is_prefix_bad(struct insn *insn)
for (i = 0; i < insn->prefixes.nbytes; i++) { for (i = 0; i < insn->prefixes.nbytes; i++) {
switch (insn->prefixes.bytes[i]) { switch (insn->prefixes.bytes[i]) {
case 0x26: /*INAT_PFX_ES */ case 0x26: /* INAT_PFX_ES */
case 0x2E: /*INAT_PFX_CS */ case 0x2E: /* INAT_PFX_CS */
case 0x36: /*INAT_PFX_DS */ case 0x36: /* INAT_PFX_DS */
case 0x3E: /*INAT_PFX_SS */ case 0x3E: /* INAT_PFX_SS */
case 0xF0: /*INAT_PFX_LOCK */ case 0xF0: /* INAT_PFX_LOCK */
return true; return true;
} }
} }
...@@ -201,12 +205,15 @@ static int validate_insn_32bits(struct uprobe *uprobe, struct insn *insn) ...@@ -201,12 +205,15 @@ static int validate_insn_32bits(struct uprobe *uprobe, struct insn *insn)
insn_get_opcode(insn); insn_get_opcode(insn);
if (is_prefix_bad(insn)) if (is_prefix_bad(insn))
return -ENOTSUPP; return -ENOTSUPP;
if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32)) if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32))
return 0; return 0;
if (insn->opcode.nbytes == 2) { if (insn->opcode.nbytes == 2) {
if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns)) if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
return 0; return 0;
} }
return -ENOTSUPP; return -ENOTSUPP;
} }
...@@ -282,12 +289,12 @@ static void prepare_fixups(struct uprobe *uprobe, struct insn *insn) ...@@ -282,12 +289,12 @@ static void prepare_fixups(struct uprobe *uprobe, struct insn *insn)
* disastrous. * disastrous.
* *
* Some useful facts about rip-relative instructions: * Some useful facts about rip-relative instructions:
*
* - There's always a modrm byte. * - There's always a modrm byte.
* - There's never a SIB byte. * - There's never a SIB byte.
* - The displacement is always 4 bytes. * - The displacement is always 4 bytes.
*/ */
static void handle_riprel_insn(struct mm_struct *mm, struct uprobe *uprobe, static void handle_riprel_insn(struct mm_struct *mm, struct uprobe *uprobe, struct insn *insn)
struct insn *insn)
{ {
u8 *cursor; u8 *cursor;
u8 reg; u8 reg;
...@@ -342,13 +349,12 @@ static void handle_riprel_insn(struct mm_struct *mm, struct uprobe *uprobe, ...@@ -342,13 +349,12 @@ static void handle_riprel_insn(struct mm_struct *mm, struct uprobe *uprobe,
} }
/* Target address = address of next instruction + (signed) offset */ /* Target address = address of next instruction + (signed) offset */
uprobe->arch_info.rip_rela_target_address = (long)insn->length uprobe->arch_info.rip_rela_target_address = (long)insn->length + insn->displacement.value;
+ insn->displacement.value;
/* Displacement field is gone; slide immediate field (if any) over. */ /* Displacement field is gone; slide immediate field (if any) over. */
if (insn->immediate.nbytes) { if (insn->immediate.nbytes) {
cursor++; cursor++;
memmove(cursor, cursor + insn->displacement.nbytes, memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes);
insn->immediate.nbytes);
} }
return; return;
} }
...@@ -361,8 +367,10 @@ static int validate_insn_64bits(struct uprobe *uprobe, struct insn *insn) ...@@ -361,8 +367,10 @@ static int validate_insn_64bits(struct uprobe *uprobe, struct insn *insn)
insn_get_opcode(insn); insn_get_opcode(insn);
if (is_prefix_bad(insn)) if (is_prefix_bad(insn))
return -ENOTSUPP; return -ENOTSUPP;
if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64)) if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64))
return 0; return 0;
if (insn->opcode.nbytes == 2) { if (insn->opcode.nbytes == 2) {
if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns)) if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
return 0; return 0;
...@@ -370,34 +378,31 @@ static int validate_insn_64bits(struct uprobe *uprobe, struct insn *insn) ...@@ -370,34 +378,31 @@ static int validate_insn_64bits(struct uprobe *uprobe, struct insn *insn)
return -ENOTSUPP; return -ENOTSUPP;
} }
static int validate_insn_bits(struct mm_struct *mm, struct uprobe *uprobe, static int validate_insn_bits(struct mm_struct *mm, struct uprobe *uprobe, struct insn *insn)
struct insn *insn)
{ {
if (mm->context.ia32_compat) if (mm->context.ia32_compat)
return validate_insn_32bits(uprobe, insn); return validate_insn_32bits(uprobe, insn);
return validate_insn_64bits(uprobe, insn); return validate_insn_64bits(uprobe, insn);
} }
#else #else /* 32-bit: */
static void handle_riprel_insn(struct mm_struct *mm, struct uprobe *uprobe, static void handle_riprel_insn(struct mm_struct *mm, struct uprobe *uprobe, struct insn *insn)
struct insn *insn)
{ {
return; /* No RIP-relative addressing on 32-bit */
} }
static int validate_insn_bits(struct mm_struct *mm, struct uprobe *uprobe, static int validate_insn_bits(struct mm_struct *mm, struct uprobe *uprobe, struct insn *insn)
struct insn *insn)
{ {
return validate_insn_32bits(uprobe, insn); return validate_insn_32bits(uprobe, insn);
} }
#endif /* CONFIG_X86_64 */ #endif /* CONFIG_X86_64 */
/** /**
* analyze_insn - instruction analysis including validity and fixups. * arch_uprobes_analyze_insn - instruction analysis including validity and fixups.
* @mm: the probed address space. * @mm: the probed address space.
* @uprobe: the probepoint information. * @uprobe: the probepoint information.
* Return 0 on success or a -ve number on error. * Return 0 on success or a -ve number on error.
*/ */
int analyze_insn(struct mm_struct *mm, struct uprobe *uprobe) int arch_uprobes_analyze_insn(struct mm_struct *mm, struct uprobe *uprobe)
{ {
int ret; int ret;
struct insn insn; struct insn insn;
...@@ -406,7 +411,9 @@ int analyze_insn(struct mm_struct *mm, struct uprobe *uprobe) ...@@ -406,7 +411,9 @@ int analyze_insn(struct mm_struct *mm, struct uprobe *uprobe)
ret = validate_insn_bits(mm, uprobe, &insn); ret = validate_insn_bits(mm, uprobe, &insn);
if (ret != 0) if (ret != 0)
return ret; return ret;
handle_riprel_insn(mm, uprobe, &insn); handle_riprel_insn(mm, uprobe, &insn);
prepare_fixups(uprobe, &insn); prepare_fixups(uprobe, &insn);
return 0; return 0;
} }
#ifndef _LINUX_UPROBES_H #ifndef _LINUX_UPROBES_H
#define _LINUX_UPROBES_H #define _LINUX_UPROBES_H
/* /*
* Userspace Probes (UProbes) * User-space Probes (UProbes)
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
...@@ -40,8 +40,10 @@ struct uprobe_arch_info {}; ...@@ -40,8 +40,10 @@ struct uprobe_arch_info {};
#define uprobe_opcode_sz sizeof(uprobe_opcode_t) #define uprobe_opcode_sz sizeof(uprobe_opcode_t)
/* flags that denote/change uprobes behaviour */ /* flags that denote/change uprobes behaviour */
/* Have a copy of original instruction */ /* Have a copy of original instruction */
#define UPROBES_COPY_INSN 0x1 #define UPROBES_COPY_INSN 0x1
/* Dont run handlers when first register/ last unregister in progress*/ /* Dont run handlers when first register/ last unregister in progress*/
#define UPROBES_RUN_HANDLER 0x2 #define UPROBES_RUN_HANDLER 0x2
...@@ -70,27 +72,23 @@ struct uprobe { ...@@ -70,27 +72,23 @@ struct uprobe {
}; };
#ifdef CONFIG_UPROBES #ifdef CONFIG_UPROBES
extern int __weak set_bkpt(struct mm_struct *mm, struct uprobe *uprobe, extern int __weak set_bkpt(struct mm_struct *mm, struct uprobe *uprobe, unsigned long vaddr);
unsigned long vaddr); extern int __weak set_orig_insn(struct mm_struct *mm, struct uprobe *uprobe, unsigned long vaddr, bool verify);
extern int __weak set_orig_insn(struct mm_struct *mm, struct uprobe *uprobe,
unsigned long vaddr, bool verify);
extern bool __weak is_bkpt_insn(uprobe_opcode_t *insn); extern bool __weak is_bkpt_insn(uprobe_opcode_t *insn);
extern int register_uprobe(struct inode *inode, loff_t offset, extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *consumer);
struct uprobe_consumer *consumer); extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *consumer);
extern void unregister_uprobe(struct inode *inode, loff_t offset, extern int uprobe_mmap(struct vm_area_struct *vma);
struct uprobe_consumer *consumer);
extern int mmap_uprobe(struct vm_area_struct *vma);
#else /* CONFIG_UPROBES is not defined */ #else /* CONFIG_UPROBES is not defined */
static inline int register_uprobe(struct inode *inode, loff_t offset, static inline int
struct uprobe_consumer *consumer) uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *consumer)
{ {
return -ENOSYS; return -ENOSYS;
} }
static inline void unregister_uprobe(struct inode *inode, loff_t offset, static inline void
struct uprobe_consumer *consumer) uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *consumer)
{ {
} }
static inline int mmap_uprobe(struct vm_area_struct *vma) static inline int uprobe_mmap(struct vm_area_struct *vma)
{ {
return 0; return 0;
} }
......
This diff is collapsed.
...@@ -618,10 +618,10 @@ again: remove_next = 1 + (end > next->vm_end); ...@@ -618,10 +618,10 @@ again: remove_next = 1 + (end > next->vm_end);
mutex_unlock(&mapping->i_mmap_mutex); mutex_unlock(&mapping->i_mmap_mutex);
if (root) { if (root) {
mmap_uprobe(vma); uprobe_mmap(vma);
if (adjust_next) if (adjust_next)
mmap_uprobe(next); uprobe_mmap(next);
} }
if (remove_next) { if (remove_next) {
...@@ -646,7 +646,7 @@ again: remove_next = 1 + (end > next->vm_end); ...@@ -646,7 +646,7 @@ again: remove_next = 1 + (end > next->vm_end);
} }
} }
if (insert && file) if (insert && file)
mmap_uprobe(insert); uprobe_mmap(insert);
validate_mm(mm); validate_mm(mm);
...@@ -1340,7 +1340,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, ...@@ -1340,7 +1340,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
} else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK)) } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
make_pages_present(addr, addr + len); make_pages_present(addr, addr + len);
if (file && mmap_uprobe(vma)) if (file && uprobe_mmap(vma))
/* matching probes but cannot insert */ /* matching probes but cannot insert */
goto unmap_and_free_vma; goto unmap_and_free_vma;
...@@ -2301,7 +2301,7 @@ int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma) ...@@ -2301,7 +2301,7 @@ int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
security_vm_enough_memory_mm(mm, vma_pages(vma))) security_vm_enough_memory_mm(mm, vma_pages(vma)))
return -ENOMEM; return -ENOMEM;
if (vma->vm_file && mmap_uprobe(vma)) if (vma->vm_file && uprobe_mmap(vma))
return -EINVAL; return -EINVAL;
vma_link(mm, vma, prev, rb_link, rb_parent); vma_link(mm, vma, prev, rb_link, rb_parent);
...@@ -2374,7 +2374,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, ...@@ -2374,7 +2374,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
if (new_vma->vm_file) { if (new_vma->vm_file) {
get_file(new_vma->vm_file); get_file(new_vma->vm_file);
if (mmap_uprobe(new_vma)) if (uprobe_mmap(new_vma))
goto out_free_mempol; goto out_free_mempol;
if (vma->vm_flags & VM_EXECUTABLE) if (vma->vm_flags & VM_EXECUTABLE)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment