Skip to content

Commit 17d94e2

Browse files
committedSep 8, 2016
[XRay] ARM 32-bit no-Thumb support in LLVM
This is a port of XRay to ARM 32-bit, without Thumb support yet. The XRay instrumentation support is moving up to AsmPrinter. This is one of 3 commits to different repositories of XRay ARM port. The other 2 are: 1. https://reviews.llvm.org/D23932 (Clang test) 2. https://reviews.llvm.org/D23933 (compiler-rt) Differential Revision: https://reviews.llvm.org/D23931 llvm-svn: 280888
1 parent 6b96c15 commit 17d94e2

17 files changed

+317
-62
lines changed
 

‎llvm/include/llvm/CodeGen/AsmPrinter.h

+28
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,34 @@ class AsmPrinter : public MachineFunctionPass {
187187

188188
MCSymbol *getSymbol(const GlobalValue *GV) const;
189189

190+
//===------------------------------------------------------------------===//
191+
// XRay instrumentation implementation.
192+
//===------------------------------------------------------------------===//
193+
public:
194+
// This describes the kind of sled we're storing in the XRay table.
195+
enum class SledKind : uint8_t {
196+
FUNCTION_ENTER = 0,
197+
FUNCTION_EXIT = 1,
198+
TAIL_CALL = 2,
199+
};
200+
201+
// The table will contain these structs that point to the sled, the function
202+
// containing the sled, and what kind of sled (and whether they should always
203+
// be instrumented).
204+
struct XRayFunctionEntry {
205+
const MCSymbol *Sled;
206+
const MCSymbol *Function;
207+
SledKind Kind;
208+
bool AlwaysInstrument;
209+
const class Function *Fn;
210+
};
211+
212+
// All the sleds to be emitted.
213+
std::vector<XRayFunctionEntry> Sleds;
214+
215+
// Helper function to record a given XRay sled.
216+
void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind);
217+
190218
//===------------------------------------------------------------------===//
191219
// MachineFunctionPass Implementation.
192220
//===------------------------------------------------------------------===//

‎llvm/include/llvm/Target/Target.td

+9-1
Original file line numberDiff line numberDiff line change
@@ -956,11 +956,19 @@ def PATCHABLE_FUNCTION_ENTER : Instruction {
956956
def PATCHABLE_RET : Instruction {
957957
let OutOperandList = (outs unknown:$dst);
958958
let InOperandList = (ins variable_ops);
959-
let AsmString = "# XRay Function Exit.";
959+
let AsmString = "# XRay Function Patchable RET.";
960960
let usesCustomInserter = 1;
961961
let hasSideEffects = 1;
962962
let isReturn = 1;
963963
}
964+
def PATCHABLE_FUNCTION_EXIT : Instruction {
965+
let OutOperandList = (outs);
966+
let InOperandList = (ins);
967+
let AsmString = "# XRay Function Exit.";
968+
let usesCustomInserter = 1;
969+
let hasSideEffects = 0; // FIXME: is this correct?
970+
let isReturn = 0; // Original return instruction will follow
971+
}
964972
def PATCHABLE_TAIL_CALL : Instruction {
965973
let OutOperandList = (outs unknown:$dst);
966974
let InOperandList = (ins variable_ops);

‎llvm/include/llvm/Target/TargetOpcodes.def

+17
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,25 @@ HANDLE_TARGET_OPCODE(PATCHABLE_FUNCTION_ENTER)
153153
/// Wraps a return instruction and its operands to enable adding nop sleds
154154
/// either before or after the return. The nop sleds are useful for inserting
155155
/// instrumentation instructions at runtime.
156+
/// The patch here replaces the return instruction.
156157
HANDLE_TARGET_OPCODE(PATCHABLE_RET)
157158

159+
/// This is a marker instruction which gets translated into a nop sled, useful
160+
/// for inserting instrumentation instructions at runtime.
161+
/// The patch here prepends the return instruction.
162+
/// The same thing as in x86_64 is not possible for ARM because it has multiple
163+
/// return instructions. Furthermore, CPU allows parametrized and even
164+
/// conditional return instructions. In the current ARM implementation we are
165+
/// making use of the fact that currently LLVM doesn't seem to generate
166+
/// conditional return instructions.
167+
/// On ARM, the same instruction can be used for popping multiple registers
168+
/// from the stack and returning (it just pops pc register too), and LLVM
169+
/// generates it sometimes. So we can't insert the sled between this stack
170+
/// adjustment and the return without splitting the original instruction into 2
171+
/// instructions. So on ARM, rather than jumping into the exit trampoline, we
172+
/// call it, it does the tracing, preserves the stack and returns.
173+
HANDLE_TARGET_OPCODE(PATCHABLE_FUNCTION_EXIT)
174+
158175
/// Wraps a tail call instruction and its operands to enable adding nop sleds
159176
/// either before or after the tail exit. We use this as a disambiguation from
160177
/// PATCHABLE_RET which specifically only works for return instructions.

‎llvm/include/llvm/Target/TargetSubtargetInfo.h

+2
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@ class TargetSubtargetInfo : public MCSubtargetInfo {
7171

7272
virtual ~TargetSubtargetInfo();
7373

74+
virtual bool isXRaySupported() const { return false; }
75+
7476
// Interfaces to the major aspects of target machine information:
7577
//
7678
// -- Instruction opcode and operand information

‎llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp

+10
Original file line numberDiff line numberDiff line change
@@ -2606,3 +2606,13 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
26062606
AsmPrinterHandler::~AsmPrinterHandler() {}
26072607

26082608
void AsmPrinterHandler::markFunctionEnd() {}
2609+
2610+
void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI,
2611+
SledKind Kind) {
2612+
auto Fn = MI.getParent()->getParent()->getFunction();
2613+
auto Attr = Fn->getFnAttribute("function-instrument");
2614+
bool AlwaysInstrument =
2615+
Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always";
2616+
Sleds.emplace_back(
2617+
XRayFunctionEntry{ Sled, CurrentFnSym, Kind, AlwaysInstrument, Fn });
2618+
}

‎llvm/lib/CodeGen/XRayInstrumentation.cpp

+82-28
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,74 @@ struct XRayInstrumentation : public MachineFunctionPass {
3434
}
3535

3636
bool runOnMachineFunction(MachineFunction &MF) override;
37+
38+
private:
39+
// Replace the original RET instruction with the exit sled code ("patchable
40+
// ret" pseudo-instruction), so that at runtime XRay can replace the sled
41+
// with a code jumping to XRay trampoline, which calls the tracing handler
42+
// and, in the end, issues the RET instruction.
43+
// This is the approach to go on CPUs which have a single RET instruction,
44+
// like x86/x86_64.
45+
void replaceRetWithPatchableRet(MachineFunction &MF,
46+
const TargetInstrInfo *TII);
47+
// Prepend the original return instruction with the exit sled code ("patchable
48+
// function exit" pseudo-instruction), preserving the original return
49+
// instruction just after the exit sled code.
50+
// This is the approach to go on CPUs which have multiple options for the
51+
// return instruction, like ARM. For such CPUs we can't just jump into the
52+
// XRay trampoline and issue a single return instruction there. We rather
53+
// have to call the trampoline and return from it to the original return
54+
// instruction of the function being instrumented.
55+
void prependRetWithPatchableExit(MachineFunction &MF,
56+
const TargetInstrInfo *TII);
3757
};
58+
} // anonymous namespace
59+
60+
void XRayInstrumentation::replaceRetWithPatchableRet(MachineFunction &MF,
61+
const TargetInstrInfo *TII)
62+
{
63+
// We look for *all* terminators and returns, then replace those with
64+
// PATCHABLE_RET instructions.
65+
SmallVector<MachineInstr *, 4> Terminators;
66+
for (auto &MBB : MF) {
67+
for (auto &T : MBB.terminators()) {
68+
unsigned Opc = 0;
69+
if (T.isReturn() && T.getOpcode() == TII->getReturnOpcode()) {
70+
// Replace return instructions with:
71+
// PATCHABLE_RET <Opcode>, <Operand>...
72+
Opc = TargetOpcode::PATCHABLE_RET;
73+
}
74+
if (TII->isTailCall(T)) {
75+
// Treat the tail call as a return instruction, which has a
76+
// different-looking sled than the normal return case.
77+
Opc = TargetOpcode::PATCHABLE_TAIL_CALL;
78+
}
79+
if (Opc != 0) {
80+
auto MIB = BuildMI(MBB, T, T.getDebugLoc(), TII->get(Opc))
81+
.addImm(T.getOpcode());
82+
for (auto &MO : T.operands())
83+
MIB.addOperand(MO);
84+
Terminators.push_back(&T);
85+
}
86+
}
87+
}
88+
89+
for (auto &I : Terminators)
90+
I->eraseFromParent();
91+
}
92+
93+
void XRayInstrumentation::prependRetWithPatchableExit(MachineFunction &MF,
94+
const TargetInstrInfo *TII)
95+
{
96+
for (auto &MBB : MF) {
97+
for (auto &T : MBB.terminators()) {
98+
if (T.isReturn()) {
99+
// Prepend the return instruction with PATCHABLE_FUNCTION_EXIT
100+
auto MIB = BuildMI(MBB, T, T.getDebugLoc(),
101+
TII->get(TargetOpcode::PATCHABLE_FUNCTION_EXIT));
102+
}
103+
}
104+
}
38105
}
39106

40107
bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
@@ -54,6 +121,11 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
54121
return false; // Function is too small.
55122
}
56123

124+
if (!MF.getSubtarget().isXRaySupported()) {
125+
//FIXME: can this be reported somehow?
126+
return false;
127+
}
128+
57129
// FIXME: Do the loop triviality analysis here or in an earlier pass.
58130

59131
// First, insert an PATCHABLE_FUNCTION_ENTER as the first instruction of the
@@ -64,35 +136,17 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
64136
BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(),
65137
TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER));
66138

67-
// Then we look for *all* terminators and returns, then replace those with
68-
// PATCHABLE_RET instructions.
69-
SmallVector<MachineInstr *, 4> Terminators;
70-
for (auto &MBB : MF) {
71-
for (auto &T : MBB.terminators()) {
72-
unsigned Opc = 0;
73-
if (T.isReturn() && T.getOpcode() == TII->getReturnOpcode()) {
74-
// Replace return instructions with:
75-
// PATCHABLE_RET <Opcode>, <Operand>...
76-
Opc = TargetOpcode::PATCHABLE_RET;
77-
}
78-
if (TII->isTailCall(T)) {
79-
// Treat the tail call as a return instruction, which has a
80-
// different-looking sled than the normal return case.
81-
Opc = TargetOpcode::PATCHABLE_TAIL_CALL;
82-
}
83-
if (Opc != 0) {
84-
auto MIB = BuildMI(MBB, T, T.getDebugLoc(), TII->get(Opc))
85-
.addImm(T.getOpcode());
86-
for (auto &MO : T.operands())
87-
MIB.addOperand(MO);
88-
Terminators.push_back(&T);
89-
}
90-
}
139+
switch (MF.getTarget().getTargetTriple().getArch()) {
140+
case Triple::ArchType::arm:
141+
// For the architectures which don't have a single return instruction
142+
prependRetWithPatchableExit(MF, TII);
143+
break;
144+
default:
145+
// For the architectures that have a single return instruction (such as
146+
// RETQ on x86_64).
147+
replaceRetWithPatchableRet(MF, TII);
148+
break;
91149
}
92-
93-
for (auto &I : Terminators)
94-
I->eraseFromParent();
95-
96150
return true;
97151
}
98152

‎llvm/lib/Target/ARM/ARMAsmPrinter.cpp

+9
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,9 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
150150
// Emit the rest of the function body.
151151
EmitFunctionBody();
152152

153+
// Emit the XRay table for this function.
154+
EmitXRayTable();
155+
153156
// If we need V4T thumb mode Register Indirect Jump pads, emit them.
154157
// These are created per function, rather than per TU, since it's
155158
// relatively easy to exceed the thumb branch range within a TU.
@@ -2005,6 +2008,12 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
20052008
.addReg(0));
20062009
return;
20072010
}
2011+
case ARM::PATCHABLE_FUNCTION_ENTER:
2012+
LowerPATCHABLE_FUNCTION_ENTER(*MI);
2013+
return;
2014+
case ARM::PATCHABLE_FUNCTION_EXIT:
2015+
LowerPATCHABLE_FUNCTION_EXIT(*MI);
2016+
return;
20082017
}
20092018

20102019
MCInst TmpInst;

‎llvm/lib/Target/ARM/ARMAsmPrinter.h

+12
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,19 @@ class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter {
9494
// lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
9595
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
9696

97+
//===------------------------------------------------------------------===//
98+
// XRay implementation
99+
//===------------------------------------------------------------------===//
100+
public:
101+
// XRay-specific lowering for ARM.
102+
void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI);
103+
void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
104+
// Helper function that emits the XRay sleds we've collected for a particular
105+
// function.
106+
void EmitXRayTable();
107+
97108
private:
109+
void EmitSled(const MachineInstr &MI, SledKind Kind);
98110

99111
// Helpers for EmitStartOfAsmFile() and EmitEndOfAsmFile()
100112
void emitAttributes();

‎llvm/lib/Target/ARM/ARMBaseInstrInfo.h

+4
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,10 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo {
100100
// Return whether the target has an explicit NOP encoding.
101101
bool hasNOP() const;
102102

103+
virtual void getNoopForElfTarget(MCInst &NopInst) const {
104+
getNoopForMachoTarget(NopInst);
105+
}
106+
103107
// Return the non-pre/post incrementing version of 'Opc'. Return 0
104108
// if there is not such an opcode.
105109
virtual unsigned getUnindexedOpcode(unsigned Opc) const =0;

‎llvm/lib/Target/ARM/ARMMCInstLower.cpp

+87
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,11 @@
2121
#include "llvm/IR/Mangler.h"
2222
#include "llvm/MC/MCExpr.h"
2323
#include "llvm/MC/MCInst.h"
24+
#include "llvm/MC/MCContext.h"
25+
#include "llvm/MC/MCSymbolELF.h"
26+
#include "llvm/MC/MCSectionELF.h"
27+
#include "llvm/MC/MCInstBuilder.h"
28+
#include "llvm/MC/MCStreamer.h"
2429
using namespace llvm;
2530

2631

@@ -150,3 +155,85 @@ void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
150155
}
151156
}
152157
}
158+
159+
void ARMAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind)
160+
{
161+
static const int8_t NoopsInSledCount = 6;
162+
// We want to emit the following pattern:
163+
//
164+
// .Lxray_sled_N:
165+
// ALIGN
166+
// B #20
167+
// ; 6 NOP instructions (24 bytes)
168+
// .tmpN
169+
//
170+
// We need the 24 bytes (6 instructions) because at runtime, we'd be patching
171+
// over the full 28 bytes (7 instructions) with the following pattern:
172+
//
173+
// PUSH{ r0, lr }
174+
// MOVW r0, #<lower 16 bits of function ID>
175+
// MOVT r0, #<higher 16 bits of function ID>
176+
// MOVW ip, #<lower 16 bits of address of __xray_FunctionEntry/Exit>
177+
// MOVT ip, #<higher 16 bits of address of __xray_FunctionEntry/Exit>
178+
// BLX ip
179+
// POP{ r0, lr }
180+
//
181+
OutStreamer->EmitCodeAlignment(4);
182+
auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
183+
OutStreamer->EmitLabel(CurSled);
184+
auto Target = OutContext.createTempSymbol();
185+
186+
// Emit "B #20" instruction, which jumps over the next 24 bytes (because
187+
// register pc is 8 bytes ahead of the jump instruction by the moment CPU
188+
// is executing it).
189+
// By analogy to ARMAsmPrinter::emitPseudoExpansionLowering() |case ARM::B|.
190+
// It is not clear why |addReg(0)| is needed (the last operand).
191+
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::Bcc).addImm(20)
192+
.addImm(ARMCC::AL).addReg(0));
193+
194+
MCInst Noop;
195+
Subtarget->getInstrInfo()->getNoopForElfTarget(Noop);
196+
for (int8_t I = 0; I < NoopsInSledCount; I++)
197+
{
198+
OutStreamer->EmitInstruction(Noop, getSubtargetInfo());
199+
}
200+
201+
OutStreamer->EmitLabel(Target);
202+
recordSled(CurSled, MI, Kind);
203+
}
204+
205+
void ARMAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI)
206+
{
207+
EmitSled(MI, SledKind::FUNCTION_ENTER);
208+
}
209+
210+
void ARMAsmPrinter::LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI)
211+
{
212+
EmitSled(MI, SledKind::FUNCTION_EXIT);
213+
}
214+
215+
void ARMAsmPrinter::EmitXRayTable()
216+
{
217+
if (Sleds.empty())
218+
return;
219+
if (Subtarget->isTargetELF()) {
220+
auto *Section = OutContext.getELFSection(
221+
"xray_instr_map", ELF::SHT_PROGBITS,
222+
ELF::SHF_ALLOC | ELF::SHF_GROUP | ELF::SHF_MERGE, 0,
223+
CurrentFnSym->getName());
224+
auto PrevSection = OutStreamer->getCurrentSectionOnly();
225+
OutStreamer->SwitchSection(Section);
226+
for (const auto &Sled : Sleds) {
227+
OutStreamer->EmitSymbolValue(Sled.Sled, 4);
228+
OutStreamer->EmitSymbolValue(CurrentFnSym, 4);
229+
auto Kind = static_cast<uint8_t>(Sled.Kind);
230+
OutStreamer->EmitBytes(
231+
StringRef(reinterpret_cast<const char *>(&Kind), 1));
232+
OutStreamer->EmitBytes(
233+
StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1));
234+
OutStreamer->EmitZeros(6);
235+
}
236+
OutStreamer->SwitchSection(PrevSection);
237+
}
238+
Sleds.clear();
239+
}

0 commit comments

Comments
 (0)
Please sign in to comment.