Commit d769811c authored by Adrian Hunter's avatar Adrian Hunter Committed by Peter Zijlstra

perf/x86: Add support for perf text poke event for text_poke_bp_batch() callers

Add support for perf text poke event for text_poke_bp_batch() callers. That
includes jump labels. See comments for more details.
Signed-off-by: default avatarAdrian Hunter <adrian.hunter@intel.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200512121922.8997-3-adrian.hunter@intel.com
parent e17d43b9
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/perf_event.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/stringify.h> #include <linux/stringify.h>
...@@ -1001,6 +1002,7 @@ struct text_poke_loc { ...@@ -1001,6 +1002,7 @@ struct text_poke_loc {
s32 rel32; s32 rel32;
u8 opcode; u8 opcode;
const u8 text[POKE_MAX_OPCODE_SIZE]; const u8 text[POKE_MAX_OPCODE_SIZE];
u8 old;
}; };
struct bp_patching_desc { struct bp_patching_desc {
...@@ -1168,8 +1170,10 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries ...@@ -1168,8 +1170,10 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
/* /*
* First step: add a int3 trap to the address that will be patched. * First step: add a int3 trap to the address that will be patched.
*/ */
for (i = 0; i < nr_entries; i++) for (i = 0; i < nr_entries; i++) {
tp[i].old = *(u8 *)text_poke_addr(&tp[i]);
text_poke(text_poke_addr(&tp[i]), &int3, INT3_INSN_SIZE); text_poke(text_poke_addr(&tp[i]), &int3, INT3_INSN_SIZE);
}
text_poke_sync(); text_poke_sync();
...@@ -1177,14 +1181,45 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries ...@@ -1177,14 +1181,45 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
* Second step: update all but the first byte of the patched range. * Second step: update all but the first byte of the patched range.
*/ */
for (do_sync = 0, i = 0; i < nr_entries; i++) { for (do_sync = 0, i = 0; i < nr_entries; i++) {
u8 old[POKE_MAX_OPCODE_SIZE] = { tp[i].old, };
int len = text_opcode_size(tp[i].opcode); int len = text_opcode_size(tp[i].opcode);
if (len - INT3_INSN_SIZE > 0) { if (len - INT3_INSN_SIZE > 0) {
memcpy(old + INT3_INSN_SIZE,
text_poke_addr(&tp[i]) + INT3_INSN_SIZE,
len - INT3_INSN_SIZE);
text_poke(text_poke_addr(&tp[i]) + INT3_INSN_SIZE, text_poke(text_poke_addr(&tp[i]) + INT3_INSN_SIZE,
(const char *)tp[i].text + INT3_INSN_SIZE, (const char *)tp[i].text + INT3_INSN_SIZE,
len - INT3_INSN_SIZE); len - INT3_INSN_SIZE);
do_sync++; do_sync++;
} }
/*
* Emit a perf event to record the text poke, primarily to
* support Intel PT decoding which must walk the executable code
* to reconstruct the trace. The flow up to here is:
* - write INT3 byte
* - IPI-SYNC
* - write instruction tail
* At this point the actual control flow will be through the
* INT3 and handler and not hit the old or new instruction.
* Intel PT outputs FUP/TIP packets for the INT3, so the flow
* can still be decoded. Subsequently:
* - emit RECORD_TEXT_POKE with the new instruction
* - IPI-SYNC
* - write first byte
* - IPI-SYNC
* So before the text poke event timestamp, the decoder will see
* either the old instruction flow or FUP/TIP of INT3. After the
* text poke event timestamp, the decoder will see either the
* new instruction flow or FUP/TIP of INT3. Thus decoders can
* use the timestamp as the point at which to modify the
* executable code.
* The old instruction is recorded so that the event can be
* processed forwards or backwards.
*/
perf_event_text_poke(text_poke_addr(&tp[i]), old, len,
tp[i].text, len);
} }
if (do_sync) { if (do_sync) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment