Index: bolt/include/bolt/Core/MCPlusBuilder.h =================================================================== --- bolt/include/bolt/Core/MCPlusBuilder.h +++ bolt/include/bolt/Core/MCPlusBuilder.h @@ -498,9 +498,9 @@ } /// Create increment contents of target by 1 for Instrumentation - virtual InstructionListType createInstrIncMemory(const MCSymbol *Target, - MCContext *Ctx, - bool IsLeaf) const { + virtual InstructionListType + createInstrIncMemory(const MCSymbol *Target, MCContext *Ctx, bool IsLeaf, + unsigned CodePointerSize) const { llvm_unreachable("not implemented"); return InstructionListType(); } @@ -1597,18 +1597,11 @@ return false; } - virtual void createLoadImmediate(MCInst &Inst, const MCPhysReg Dest, - uint32_t Imm) const { + virtual InstructionListType createLoadImmediate(const MCPhysReg Dest, + uint64_t Imm) const { llvm_unreachable("not implemented"); } - /// Create instruction to increment contents of target by 1 - virtual bool createIncMemory(MCInst &Inst, const MCSymbol *Target, - MCContext *Ctx) const { - llvm_unreachable("not implemented"); - return false; - } - /// Create a fragment of code (sequence of instructions) that load a 32-bit /// address from memory, zero-extends it to 64 and jump to it (indirect jump). virtual bool @@ -1969,7 +1962,7 @@ } virtual InstructionListType createSymbolTrampoline(const MCSymbol *TgtSym, - MCContext *Ctx) const { + MCContext *Ctx) { llvm_unreachable("not implemented"); return InstructionListType(); } Index: bolt/lib/Passes/Instrumentation.cpp =================================================================== --- bolt/lib/Passes/Instrumentation.cpp +++ bolt/lib/Passes/Instrumentation.cpp @@ -176,7 +176,8 @@ auto L = BC.scopeLock(); MCSymbol *Label = BC.Ctx->createNamedTempSymbol("InstrEntry"); Summary->Counters.emplace_back(Label); - return BC.MIB->createInstrIncMemory(Label, BC.Ctx.get(), IsLeaf); + return BC.MIB->createInstrIncMemory(Label, BC.Ctx.get(), IsLeaf, + BC.AsmInfo->getCodePointerSize()); } // Helper instruction sequence insertion function @@ -504,9 +505,6 @@ } void Instrumentation::runOnFunctions(BinaryContext &BC) { - if (!BC.isX86()) - return; - const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/false, /*IsText=*/false, /*IsAllocatable=*/true); Index: bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp =================================================================== --- bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -16,6 +16,8 @@ #include "Utils/AArch64BaseInfo.h" #include "bolt/Core/MCPlusBuilder.h" #include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Debug.h" @@ -28,6 +30,100 @@ namespace { +static void getSystemFlag(MCInst &Inst, MCPhysReg RegName) { + Inst.setOpcode(AArch64::MRS); + Inst.clear(); + Inst.addOperand(MCOperand::createReg(RegName)); + Inst.addOperand(MCOperand::createImm(AArch64SysReg::NZCV)); +} + +static void setSystemFlag(MCInst &Inst, MCPhysReg RegName) { + Inst.setOpcode(AArch64::MSR); + Inst.clear(); + Inst.addOperand(MCOperand::createImm(AArch64SysReg::NZCV)); + Inst.addOperand(MCOperand::createReg(RegName)); +} + +static void createPushRegisters(MCInst &Inst, MCPhysReg Reg1, MCPhysReg Reg2) { + Inst.clear(); + unsigned NewOpcode = AArch64::STPXpre; + Inst.setOpcode(NewOpcode); + Inst.addOperand(MCOperand::createReg(AArch64::SP)); + Inst.addOperand(MCOperand::createReg(Reg1)); + Inst.addOperand(MCOperand::createReg(Reg2)); + Inst.addOperand(MCOperand::createReg(AArch64::SP)); + Inst.addOperand(MCOperand::createImm(-2)); +} + +static void createPopRegisters(MCInst &Inst, MCPhysReg Reg1, MCPhysReg Reg2) { + Inst.clear(); + unsigned NewOpcode = AArch64::LDPXpost; + Inst.setOpcode(NewOpcode); + Inst.addOperand(MCOperand::createReg(AArch64::SP)); + Inst.addOperand(MCOperand::createReg(Reg1)); + Inst.addOperand(MCOperand::createReg(Reg2)); + Inst.addOperand(MCOperand::createReg(AArch64::SP)); + Inst.addOperand(MCOperand::createImm(2)); +} + +static void loadReg(MCInst &Inst, MCPhysReg To, MCPhysReg From) { + Inst.setOpcode(AArch64::LDRXui); + Inst.clear(); + if (From == AArch64::SP) { + Inst.setOpcode(AArch64::LDRXpost); + Inst.addOperand(MCOperand::createReg(From)); + Inst.addOperand(MCOperand::createReg(To)); + Inst.addOperand(MCOperand::createReg(From)); + Inst.addOperand(MCOperand::createImm(16)); + } else { + Inst.addOperand(MCOperand::createReg(To)); + Inst.addOperand(MCOperand::createReg(From)); + Inst.addOperand(MCOperand::createImm(0)); + } +} + +static void storeReg(MCInst &Inst, MCPhysReg From, MCPhysReg To) { + Inst.setOpcode(AArch64::STRXui); + Inst.clear(); + if (To == AArch64::SP) { + Inst.setOpcode(AArch64::STRXpre); + Inst.addOperand(MCOperand::createReg(To)); + Inst.addOperand(MCOperand::createReg(From)); + Inst.addOperand(MCOperand::createReg(To)); + Inst.addOperand(MCOperand::createImm(-16)); + } else { + Inst.addOperand(MCOperand::createReg(From)); + Inst.addOperand(MCOperand::createReg(To)); + Inst.addOperand(MCOperand::createImm(0)); + } +} + +static void atomicAdd(MCInst &Inst, MCPhysReg RegTo, MCPhysReg RegCnt) { + // NOTE: Supports only ARM with LSE extension + Inst.setOpcode(AArch64::LDADDX); + Inst.clear(); + Inst.addOperand(MCOperand::createReg(AArch64::XZR)); + Inst.addOperand(MCOperand::createReg(RegCnt)); + Inst.addOperand(MCOperand::createReg(RegTo)); +} + +static void createMovz(MCInst &Inst, MCPhysReg Reg, uint64_t Imm) { + assert(Imm <= UINT16_MAX && "Invalid Imm size"); + Inst.clear(); + Inst.setOpcode(AArch64::MOVZXi); + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createImm(Imm & 0xFFFF)); + Inst.addOperand(MCOperand::createImm(0)); +} + +static InstructionListType createIncMemory(MCPhysReg RegTo, MCPhysReg RegTmp) { + InstructionListType Insts; + Insts.emplace_back(); + createMovz(Insts.back(), RegTmp, 1); + Insts.emplace_back(); + atomicAdd(Insts.back(), RegTo, RegTmp); + return Insts; +} class AArch64MCPlusBuilder : public MCPlusBuilder { public: AArch64MCPlusBuilder(const MCInstrAnalysis *Analysis, const MCInstrInfo *Info, @@ -205,6 +301,40 @@ return Inst.getOpcode() == AArch64::BLR; } + MCPhysReg getSpRegister(int Size) const { + switch (Size) { + case 4: + return AArch64::WSP; + case 8: + return AArch64::SP; + default: + llvm_unreachable("Unexpected size"); + } + } + + MCPhysReg getIntArgRegister(unsigned ArgNo) const override { + switch (ArgNo) { + case 0: + return AArch64::X0; + case 1: + return AArch64::X1; + case 2: + return AArch64::X2; + case 3: + return AArch64::X3; + case 4: + return AArch64::X4; + case 5: + return AArch64::X5; + case 6: + return AArch64::X6; + case 7: + return AArch64::X7; + default: + return getNoRegister(); + } + } + bool hasPCRelOperand(const MCInst &Inst) const override { // ADRP is blacklisted and is an exception. Even though it has a // PC-relative operand, this operand is not a complete symbol reference @@ -816,14 +946,25 @@ int getUncondBranchEncodingSize() const override { return 28; } + InstructionListType createCmpJE(MCPhysReg RegNo, int64_t Imm, + const MCSymbol *Target, + MCContext *Ctx) const override { + InstructionListType Code; + Code.emplace_back(MCInstBuilder(AArch64::SUBSXri) + .addReg(RegNo) + .addReg(RegNo) + .addImm(Imm) + .addImm(0)); + Code.emplace_back(MCInstBuilder(AArch64::Bcc) + .addImm(Imm) + .addExpr(MCSymbolRefExpr::create( + Target, MCSymbolRefExpr::VK_None, *Ctx))); + return Code; + } + bool createTailCall(MCInst &Inst, const MCSymbol *Target, MCContext *Ctx) override { - Inst.setOpcode(AArch64::B); - Inst.addOperand(MCOperand::createExpr(getTargetExprFor( - Inst, MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx), - *Ctx, 0))); - setTailCall(Inst); - return true; + return createDirectCall(Inst, Target, Ctx, /*IsTailCall*/ true); } void createLongTailCall(InstructionListType &Seq, const MCSymbol *Target, @@ -872,6 +1013,18 @@ bool isStore(const MCInst &Inst) const override { return false; } + bool createDirectCall(MCInst &Inst, const MCSymbol *Target, MCContext *Ctx, + bool IsTailCall) override { + Inst.setOpcode(IsTailCall ? AArch64::B : AArch64::BL); + Inst.clear(); + Inst.addOperand(MCOperand::createExpr(getTargetExprFor( + Inst, MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx), + *Ctx, 0))); + if (IsTailCall) + convertJmpToTailCall(Inst); + return true; + } + bool analyzeBranch(InstructionIterator Begin, InstructionIterator End, const MCSymbol *&TBB, const MCSymbol *&FBB, MCInst *&CondBranch, @@ -1139,6 +1292,216 @@ return true; } + bool createStackPointerIncrement( + MCInst &Inst, int Size, + bool NoFlagsClobber = false /*unused for AArch64*/) const override { + Inst.setOpcode(AArch64::SUBXri); + Inst.clear(); + Inst.addOperand(MCOperand::createReg(AArch64::SP)); + Inst.addOperand(MCOperand::createReg(AArch64::SP)); + Inst.addOperand(MCOperand::createImm(Size)); + Inst.addOperand(MCOperand::createImm(0)); + return true; + } + + bool createStackPointerDecrement( + MCInst &Inst, int Size, + bool NoFlagsClobber = false /*unused for AArch64*/) const override { + Inst.setOpcode(AArch64::ADDXri); + Inst.clear(); + Inst.addOperand(MCOperand::createReg(AArch64::SP)); + Inst.addOperand(MCOperand::createReg(AArch64::SP)); + Inst.addOperand(MCOperand::createImm(Size)); + Inst.addOperand(MCOperand::createImm(0)); + return true; + } + + void createIndirectBranch(MCInst &Inst, MCPhysReg MemBaseReg, + int64_t Disp) const { + Inst.setOpcode(AArch64::BR); + Inst.addOperand(MCOperand::createReg(MemBaseReg)); + } + + InstructionListType createInstrumentedIndCallHandlerExitBB() const override { + InstructionListType Insts(5); + // Code sequence for instrumented indirect call handler: + // msr nzcv, x1 + // ldp x0, x1, [sp], #16 + // ldr x16, [sp], #16 + // ldp x0, x1, [sp], #16 + // br x16 + setSystemFlag(Insts[0], AArch64::X1); + createPopRegisters(Insts[1], AArch64::X0, AArch64::X1); + // Here we load address of the next function which should be called in the + // original binary to X16 register. Writing to X16 is permitted without + // needing to restore. + loadReg(Insts[2], AArch64::X16, AArch64::SP); + createPopRegisters(Insts[3], AArch64::X0, AArch64::X1); + createIndirectBranch(Insts[4], AArch64::X16, 0); + return Insts; + } + + InstructionListType + createInstrumentedIndTailCallHandlerExitBB() const override { + return createInstrumentedIndCallHandlerExitBB(); + } + + void convertIndirectCallToLoad(MCInst &Inst, MCPhysReg Reg) override { + bool IsTailCall = isTailCall(Inst); + if (IsTailCall) + removeAnnotation(Inst, MCPlus::MCAnnotation::kTailCall); + if (Inst.getOpcode() == AArch64::BL || Inst.getOpcode() == AArch64::BLR || + Inst.getOpcode() == AArch64::BR || Inst.getOpcode() == AArch64::B) { + + Inst.setOpcode(AArch64::ORRXrs); + Inst.insert(Inst.begin(), MCOperand::createReg(Reg)); + Inst.insert(Inst.begin() + 1, MCOperand::createReg(AArch64::XZR)); + Inst.insert(Inst.begin() + 3, MCOperand::createImm(0)); + return; + } + llvm_unreachable("not implemented"); + } + + InstructionListType createLoadImmediate(const MCPhysReg Dest, + uint64_t Imm) const override { + InstructionListType Insts(4); + int Shift = 48; + for (int I = 0; I < 4; I++, Shift -= 16) { + Insts[I].setOpcode(AArch64::MOVKXi); + Insts[I].addOperand(MCOperand::createReg(Dest)); + Insts[I].addOperand(MCOperand::createReg(Dest)); + Insts[I].addOperand(MCOperand::createImm((Imm >> Shift) & 0xFFFF)); + Insts[I].addOperand(MCOperand::createImm(Shift)); + } + return Insts; + } + + void createIndirectCallInst(MCInst &Inst, bool IsTailCall, + MCPhysReg Reg) const { + Inst.clear(); + Inst.setOpcode(IsTailCall ? AArch64::BR : AArch64::BLR); + Inst.addOperand(MCOperand::createReg(Reg)); + } + + InstructionListType createInstrumentedIndirectCall(MCInst &&CallInst, + MCSymbol *HandlerFuncAddr, + int CallSiteID, + MCContext *Ctx) override { + InstructionListType Insts; + // Code sequence used to enter indirect call instrumentation helper: + // stp x0, x1, [sp, #-16]! createPushRegisters + // mov target x0 convertIndirectCallToLoad -> orr x0 target xzr + // mov x1 CallSiteID createLoadImmediate -> + // movk x1, #0x0, lsl #48 + // movk x1, #0x0, lsl #32 + // movk x1, #0x0, lsl #16 + // movk x1, #0x0 + // stp x0, x1, [sp, #-16]! + // bl *HandlerFuncAddr createIndirectCall -> + // adr x0 *HandlerFuncAddr -> adrp + add + // blr x0 + Insts.emplace_back(); + createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1); + Insts.emplace_back(CallInst); + convertIndirectCallToLoad(Insts.back(), AArch64::X0); + InstructionListType LoadImm = + createLoadImmediate(getIntArgRegister(1), CallSiteID); + Insts.insert(Insts.end(), LoadImm.begin(), LoadImm.end()); + Insts.emplace_back(); + createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1); + Insts.resize(Insts.size() + 2); + InstructionListType Addr = + materializeAddress(HandlerFuncAddr, Ctx, AArch64::X0); + assert(Addr.size() == 2 && "Invalid Addr size"); + std::copy(Addr.begin(), Addr.end(), Insts.end() - Addr.size()); + Insts.emplace_back(); + createIndirectCallInst(Insts.back(), isTailCall(CallInst), AArch64::X0); + + // Carry over metadata including tail call marker if present. + stripAnnotations(Insts.back()); + moveAnnotations(std::move(CallInst), Insts.back()); + + return Insts; + } + + InstructionListType + createInstrumentedIndCallHandlerEntryBB(const MCSymbol *InstrTrampoline, + const MCSymbol *IndCallHandler, + MCContext *Ctx) override { + // Code sequence used to check whether InstrTampoline was initialized + // and call it if so, returns via IndCallHandler + // stp x0, x1, [sp, #-16]! + // mrs x1, nzcv + // adr x0, InstrTrampoline -> adrp + add + // ldr x0, [x0] + // subs x0, x0, #0x0 + // b.eq IndCallHandler + // str x30, [sp, #-16]! + // blr x0 + // ldr x30, [sp], #16 + // b IndCallHandler + InstructionListType Insts; + Insts.emplace_back(); + createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1); + Insts.emplace_back(); + getSystemFlag(Insts.back(), getIntArgRegister(1)); + Insts.emplace_back(); + Insts.emplace_back(); + InstructionListType Addr = + materializeAddress(InstrTrampoline, Ctx, AArch64::X0); + std::copy(Addr.begin(), Addr.end(), Insts.end() - Addr.size()); + assert(Addr.size() == 2 && "Invalid Addr size"); + Insts.emplace_back(); + loadReg(Insts.back(), AArch64::X0, AArch64::X0); + InstructionListType cmpJmp = + createCmpJE(AArch64::X0, 0, IndCallHandler, Ctx); + Insts.insert(Insts.end(), cmpJmp.begin(), cmpJmp.end()); + Insts.emplace_back(); + storeReg(Insts.back(), AArch64::LR, AArch64::SP); + Insts.emplace_back(); + Insts.back().setOpcode(AArch64::BLR); + Insts.back().addOperand(MCOperand::createReg(AArch64::X0)); + Insts.emplace_back(); + loadReg(Insts.back(), AArch64::LR, AArch64::SP); + Insts.emplace_back(); + createDirectCall(Insts.back(), IndCallHandler, Ctx, /*IsTailCall*/ true); + return Insts; + } + + InstructionListType + createInstrIncMemory(const MCSymbol *Target, MCContext *Ctx, bool IsLeaf, + unsigned CodePointerSize) const override { + unsigned int I = 0; + InstructionListType Instrs(IsLeaf ? 12 : 10); + + if (IsLeaf) + createStackPointerIncrement(Instrs[I++], 128); + createPushRegisters(Instrs[I++], AArch64::X0, AArch64::X1); + getSystemFlag(Instrs[I++], AArch64::X1); + InstructionListType Addr = materializeAddress(Target, Ctx, AArch64::X0); + assert(Addr.size() == 2 && "Invalid Addr size"); + std::copy(Addr.begin(), Addr.end(), Instrs.begin() + I); + I += Addr.size(); + storeReg(Instrs[I++], AArch64::X2, AArch64::SP); + InstructionListType Insts = createIncMemory(AArch64::X0, AArch64::X2); + assert(Insts.size() == 2 && "Invalid Insts size"); + std::copy(Insts.begin(), Insts.end(), Instrs.begin() + I); + I += Insts.size(); + loadReg(Instrs[I++], AArch64::X2, AArch64::SP); + setSystemFlag(Instrs[I++], AArch64::X1); + createPopRegisters(Instrs[I++], AArch64::X0, AArch64::X1); + if (IsLeaf) + createStackPointerDecrement(Instrs[I++], 128); + return Instrs; + } + + std::vector createSymbolTrampoline(const MCSymbol *TgtSym, + MCContext *Ctx) override { + std::vector Insts; + createShortJmp(Insts, TgtSym, Ctx, /*IsTailCall*/ true); + return Insts; + } + InstructionListType materializeAddress(const MCSymbol *Target, MCContext *Ctx, MCPhysReg RegName, int64_t Addend = 0) const override { Index: bolt/lib/Target/X86/X86MCPlusBuilder.cpp =================================================================== --- bolt/lib/Target/X86/X86MCPlusBuilder.cpp +++ bolt/lib/Target/X86/X86MCPlusBuilder.cpp @@ -61,6 +61,25 @@ Inst.getOpcode() == X86::ADD64ri8; } +// Create instruction to increment contents of target by 1 +static InstructionListType createIncMemory(const MCSymbol *Target, + MCContext *Ctx) { + InstructionListType Insts; + Insts.emplace_back(); + Insts.back().setOpcode(X86::LOCK_INC64m); + Insts.back().clear(); + Insts.back().addOperand(MCOperand::createReg(X86::RIP)); // BaseReg + Insts.back().addOperand(MCOperand::createImm(1)); // ScaleAmt + Insts.back().addOperand(MCOperand::createReg(X86::NoRegister)); // IndexReg + + Insts.back().addOperand(MCOperand::createExpr( + MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, + *Ctx))); // Displacement + Insts.back().addOperand( + MCOperand::createReg(X86::NoRegister)); // AddrSegmentReg + return Insts; +} + #define GET_INSTRINFO_OPERAND_TYPES_ENUM #define GET_INSTRINFO_OPERAND_TYPE #define GET_INSTRINFO_MEM_OPERAND_SIZE @@ -2309,28 +2328,15 @@ return true; } - void createLoadImmediate(MCInst &Inst, const MCPhysReg Dest, - uint32_t Imm) const override { - Inst.setOpcode(X86::MOV64ri32); - Inst.clear(); - Inst.addOperand(MCOperand::createReg(Dest)); - Inst.addOperand(MCOperand::createImm(Imm)); - } - - bool createIncMemory(MCInst &Inst, const MCSymbol *Target, - MCContext *Ctx) const override { - - Inst.setOpcode(X86::LOCK_INC64m); - Inst.clear(); - Inst.addOperand(MCOperand::createReg(X86::RIP)); // BaseReg - Inst.addOperand(MCOperand::createImm(1)); // ScaleAmt - Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // IndexReg - - Inst.addOperand(MCOperand::createExpr( - MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, - *Ctx))); // Displacement - Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // AddrSegmentReg - return true; + InstructionListType createLoadImmediate(const MCPhysReg Dest, + uint64_t Imm) const override { + InstructionListType Insts; + Insts.emplace_back(); + Insts.back().setOpcode(X86::MOV64ri32); + Insts.back().clear(); + Insts.back().addOperand(MCOperand::createReg(Dest)); + Insts.back().addOperand(MCOperand::createImm(Imm)); + return Insts; } bool createIJmp32Frag(SmallVectorImpl &Insts, @@ -3057,9 +3063,9 @@ Inst.clear(); } - InstructionListType createInstrIncMemory(const MCSymbol *Target, - MCContext *Ctx, - bool IsLeaf) const override { + InstructionListType + createInstrIncMemory(const MCSymbol *Target, MCContext *Ctx, bool IsLeaf, + unsigned CodePointerSize) const override { InstructionListType Instrs(IsLeaf ? 13 : 11); unsigned int I = 0; @@ -3079,7 +3085,10 @@ createClearRegWithNoEFlagsUpdate(Instrs[I++], X86::RAX, 8); createX86SaveOVFlagToRegister(Instrs[I++], X86::AL); // LOCK INC - createIncMemory(Instrs[I++], Target, Ctx); + InstructionListType IncMem = createIncMemory(Target, Ctx); + assert(IncMem.size() == 1 && "Invalid IncMem size"); + std::copy(IncMem.begin(), IncMem.end(), Instrs.begin() + I); + I += IncMem.size(); // POPF createAddRegImm(Instrs[I++], X86::AL, 127, 1); createPopRegister(Instrs[I++], X86::RAX, 8); @@ -3153,8 +3162,8 @@ } Insts.emplace_back(); createPushRegister(Insts.back(), TempReg, 8); - Insts.emplace_back(); - createLoadImmediate(Insts.back(), TempReg, CallSiteID); + InstructionListType LoadImm = createLoadImmediate(TempReg, CallSiteID); + Insts.insert(Insts.end(), LoadImm.begin(), LoadImm.end()); Insts.emplace_back(); createPushRegister(Insts.back(), TempReg, 8); @@ -3264,7 +3273,7 @@ } InstructionListType createSymbolTrampoline(const MCSymbol *TgtSym, - MCContext *Ctx) const override { + MCContext *Ctx) override { InstructionListType Insts(1); createUncondBranch(Insts[0], TgtSym, Ctx); return Insts;