diff --git a/bolt/CODE_OWNERS.TXT b/bolt/CODE_OWNERS.TXT --- a/bolt/CODE_OWNERS.TXT +++ b/bolt/CODE_OWNERS.TXT @@ -19,4 +19,4 @@ N: Vladislav Khmelevsky E: och95@yandex.ru -D: AArch64 backend +D: AArch64 backend, Golang support diff --git a/bolt/README.md b/bolt/README.md --- a/bolt/README.md +++ b/bolt/README.md @@ -25,6 +25,12 @@ transfer (e.g., jump tables), the code patterns should be matching those generated by Clang/GCC. +BOLT supports golang binaries built with the gc compiler using the +`-golang=version` option, where the `version` is the go compiler version that +was used to built the binary or `1` to automatically determine it based on +the input file. Since golang support is strictly dependent on the compiler +version BOLT won't be able to optimize currenly unsupported versions. + NOTE: BOLT is currently incompatible with the `-freorder-blocks-and-partition` compiler option. Since GCC8 enables this option by default, you have to explicitly disable it by adding `-fno-reorder-blocks-and-partition` flag if diff --git a/bolt/include/bolt/Core/BinaryBasicBlock.h b/bolt/include/bolt/Core/BinaryBasicBlock.h --- a/bolt/include/bolt/Core/BinaryBasicBlock.h +++ b/bolt/include/bolt/Core/BinaryBasicBlock.h @@ -688,9 +688,14 @@ return Instructions.erase(II); } + /// Store meta-data of instruction to NOP instruction + /// and insert it in place of erased one + iterator storeInstructionMetadata(iterator II); + /// Erase non-pseudo instruction at a given iterator \p II. /// Return iterator following the removed instruction. iterator eraseInstruction(iterator II) { + II = storeInstructionMetadata(II); adjustNumPseudos(*II, -1); return Instructions.erase(II); } @@ -729,12 +734,9 @@ /// Return iterator pointing to the first inserted instruction. template iterator replaceInstruction(iterator II, Itr Begin, Itr End) { - adjustNumPseudos(*II, -1); + II = eraseInstruction(II); adjustNumPseudos(Begin, End, 1); - - auto I = II - Instructions.begin(); - Instructions.insert(Instructions.erase(II), Begin, End); - return I + Instructions.begin(); + return Instructions.insert(II, Begin, End); } iterator replaceInstruction(iterator II, @@ -754,6 +756,14 @@ return Instructions.insert(At, NewInst); } + iterator insertInstructions(iterator At, std::vector &Instrs) { + for (MCInst &NewInst : Instrs) { + At = insertInstruction(At, NewInst); + ++At; + } + return At; + } + /// Helper to retrieve any terminators in \p BB before \p Pos. This is used /// to skip CFI instructions and to retrieve the first terminator instruction /// in basic blocks with two terminators (conditional jump and unconditional diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -19,6 +19,7 @@ #include "bolt/Core/JumpTable.h" #include "bolt/Core/MCPlusBuilder.h" #include "bolt/RuntimeLibs/RuntimeLibrary.h" +#include "bolt/Utils/NameResolver.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Triple.h" @@ -211,6 +212,8 @@ std::map DwarfLineTablesCUMap; public: + std::vector ExtraSectionsNames; + static Expected> createBinaryContext(const ObjectFile *File, bool IsPIC, std::unique_ptr DwCtx); @@ -429,6 +432,20 @@ return BinaryFunctions; } + /// Return BF by name. If the global function was not found return the first + /// found local + BinaryFunction *getBinaryFunctionByName(StringRef Name) { + BinaryData *Data = getFirstBinaryDataByName(Name); + if (!Data) + return nullptr; + + return getBinaryFunctionAtAddress(Data->getAddress()); + } + + const BinaryFunction *getBinaryFunctionByName(StringRef Name) const { + return const_cast(this)->getBinaryFunctionByName(Name); + } + /// Create BOLT-injected function BinaryFunction *createInjectedBinaryFunction(const std::string &Name, bool IsSimple = true); @@ -790,6 +807,19 @@ return nullptr; } + /// Return BinaryData for the given \p Name + /// If the data was not found return first local BinaryData or nullptr + BinaryData *getFirstBinaryDataByName(StringRef Name) { + BinaryData *Data = getBinaryDataByName(Name); + if (!Data) + return getBinaryDataByName(NameResolver::uniquifyID(Name, 1)); + return Data; + } + + const BinaryData *getFirstBinaryDataByName(StringRef Name) const { + return const_cast(this)->getFirstBinaryDataByName(Name); + } + /// Return true if \p SymbolName was generated internally and was not present /// in the input binary. bool isInternalSymbolName(const StringRef Name) { @@ -888,6 +918,15 @@ uint64_t Size = 0, unsigned Alignment = 1); + /// wrapper function over registerOrUpdateSection + /// with adding section name to extraSection container. + /// For sections are created from scratch (infra sections) + BinarySection ®isterExtraSection(StringRef Name, unsigned ELFType, + unsigned ELFFlags, + uint8_t *Data = nullptr, + uint64_t Size = 0, + unsigned Alignment = 1); + /// Register the information for the note (non-allocatable) section /// with the given /p Name. If the section already exists, the /// information in the section will be updated with the new data. @@ -1238,6 +1277,10 @@ /* DWARFMustBeAtTheEnd */ false)); return Streamer; } + + /// Returns std pair of binary functons desired and maximum alignment bytes + std::pair getBFAlignment(BinaryFunction &Function, + bool EmitColdPart) const; }; template > diff --git a/bolt/include/bolt/Core/BinaryData.h b/bolt/include/bolt/Core/BinaryData.h --- a/bolt/include/bolt/Core/BinaryData.h +++ b/bolt/include/bolt/Core/BinaryData.h @@ -53,6 +53,8 @@ uint64_t Address{0}; /// Size of this data (can be 0). uint64_t Size{0}; + /// Output size of this data (if changed). + uint64_t OutputSize{0}; /// Alignment of this data. uint16_t Alignment{1}; @@ -133,7 +135,8 @@ StringRef getOutputSectionName() const; uint64_t getOutputAddress() const; uint64_t getOutputOffset() const { return OutputOffset; } - uint64_t getOutputSize() const { return Size; } + uint64_t getOutputSize() const { return OutputSize ? OutputSize : Size; } + void setOutputSize(uint64_t NewSize) { OutputSize = NewSize; } bool isMoved() const; bool containsAddress(uint64_t Address) const { diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -68,6 +68,14 @@ ICP_ALL /// Perform ICP on calls and jump tables. }; +enum IndexOrder { + GO_FIRST_BF_INDEX = 0, + FIRST_BF_INDEX = 1, + GO_UNUSED_BF_INDEX = -3U, + GO_LAST_BF_INDEX = -2U, + INVALID_BF_INDEX = -1U, +}; + /// Information on a single indirect call to a particular callee. struct IndirectCallProfile { MCSymbol *Symbol; @@ -200,6 +208,8 @@ using cfi_iterator = CFIInstrMapType::iterator; using const_cfi_iterator = CFIInstrMapType::const_iterator; + // If function is golang stores the offset to functab entry + uint32_t GolangFunctabOffset = 0; private: /// Current state of the function. State CurrentState{State::Empty}; @@ -238,7 +248,7 @@ uint64_t MaxSize{std::numeric_limits::max()}; /// Alignment requirements for the function. - uint16_t Alignment{2}; + uint16_t Alignment{MinAlign}; /// Maximum number of bytes used for alignment of hot part of the function. uint16_t MaxAlignmentBytes{0}; @@ -348,6 +358,9 @@ /// This attribute is only valid when hasCFG() == true. bool HasCanonicalCFG{true}; + /// True if we have a suspicion that the function was written in ASM + bool IsAsm{false}; + /// The address for the code for this function in codegen memory. /// Used for functions that are emitted in a dedicated section with a fixed /// address. E.g. for functions that are overwritten in-place. @@ -408,7 +421,7 @@ const MCSymbol *PLTSymbol{nullptr}; /// Function order for streaming into the destination binary. - uint32_t Index{-1U}; + uint32_t Index{INVALID_BF_INDEX}; /// Get basic block index assuming it belongs to this function. unsigned getIndex(const BinaryBasicBlock *BB) const { @@ -1086,15 +1099,14 @@ uint64_t getOutputSize() const { return OutputSize; } - /// Does this function have a valid streaming order index? - bool hasValidIndex() const { return Index != -1U; } + /// Returns true if this function has a valid streaming order index + bool hasValidIndex() const { return Index != INVALID_BF_INDEX; } /// Get the streaming order index for this function. uint32_t getIndex() const { return Index; } /// Set the streaming order index for this function. void setIndex(uint32_t Idx) { - assert(!hasValidIndex()); Index = Idx; } @@ -1397,6 +1409,9 @@ /// Return true if all CFG edges have local successors. bool hasCanonicalCFG() const { return HasCanonicalCFG; } + /// Return true if we have a suspicion that the function was written in ASM + bool isAsm() const { return IsAsm; } + /// Return true if the original function code has all necessary relocations /// to track addresses of functions emitted to new locations. bool hasExternalRefRelocations() const { return HasExternalRefRelocations; } @@ -1430,6 +1445,15 @@ /// Return true if the body of the function was merged into another function. bool isFolded() const { return FoldedIntoFunction != nullptr; } + /// Sets golang funtab offset for this function + void setGolangFunctabOffset(uint32_t Offset) { GolangFunctabOffset = Offset; } + + /// Return true if the function is presented in original go functab + bool isGolang() const { return GolangFunctabOffset != 0; } + + /// Get golang funtab offset for this function + uint32_t getGolangFunctabOffset() const { return GolangFunctabOffset; } + /// If this function was folded, return the function it was folded into. BinaryFunction *getFoldedIntoFunction() const { return FoldedIntoFunction; } @@ -1823,6 +1847,8 @@ void setHasCanonicalCFG(bool V) { HasCanonicalCFG = V; } + void setIsAsm(bool V) { IsAsm = V; } + void setFolded(BinaryFunction *BF) { FoldedIntoFunction = BF; } BinaryFunction &setPersonalityFunction(uint64_t Addr) { @@ -2344,7 +2370,7 @@ size_t estimateSize() const { size_t Estimate = 0; for (const BinaryBasicBlock *BB : BasicBlocksLayout) - Estimate += BC.computeCodeSize(BB->begin(), BB->end()); + Estimate += BB->estimateSize(); return Estimate; } diff --git a/bolt/include/bolt/Core/BinarySection.h b/bolt/include/bolt/Core/BinarySection.h --- a/bolt/include/bolt/Core/BinarySection.h +++ b/bolt/include/bolt/Core/BinarySection.h @@ -27,6 +27,7 @@ #include #include #include +#include namespace llvm { class MCStreamer; @@ -90,6 +91,8 @@ // Set by ExecutableFileMemoryManager. uint32_t Index{0}; // Section index in the output file. mutable bool IsReordered{false}; // Have the contents been reordered? + mutable bool IsChanged{false}; // OutputContents is changed, but the same + // size as Contents bool IsAnonymous{false}; // True if the name should not be included // in the output file. @@ -257,6 +260,7 @@ } } bool isReordered() const { return IsReordered; } + bool isChanged() const { return IsChanged; } bool isAnonymous() const { return IsAnonymous; } unsigned getELFType() const { return ELFType; } unsigned getELFFlags() const { return ELFFlags; } @@ -312,6 +316,9 @@ /// Does this section have any pending relocations? bool hasPendingRelocations() const { return !PendingRelocations.empty(); } + /// Does this section have any dynamic relocations? + bool hasDynamicRelocations() const { return !DynamicRelocations.empty(); } + /// Remove non-pending relocation with the given /p Offset. bool removeRelocationAt(uint64_t Offset) { auto Itr = Relocations.find(Offset); @@ -319,6 +326,19 @@ Relocations.erase(Itr); return true; } + + return false; + } + + /// Remove the dynamic relocation (if any) at the given /p Offset. + bool removeDynamicRelocationAt(uint64_t Offset) { + Relocation Key{Offset, 0, 0, 0, 0}; + auto Itr = DynamicRelocations.find(Key); + if (Itr != DynamicRelocations.end()) { + DynamicRelocations.erase(Itr); + return true; + } + return false; } @@ -326,7 +346,7 @@ /// Add a new relocation at the given /p Offset. void addRelocation(uint64_t Offset, MCSymbol *Symbol, uint64_t Type, - uint64_t Addend, uint64_t Value = 0, + uint64_t Addend = 0, uint64_t Value = 0, bool Pending = false) { assert(Offset < getSize() && "offset not within section bounds"); if (!Pending) { @@ -339,7 +359,7 @@ /// Add a dynamic relocation at the given /p Offset. void addDynamicRelocation(uint64_t Offset, MCSymbol *Symbol, uint64_t Type, - uint64_t Addend, uint64_t Value = 0) { + uint64_t Addend = 0, uint64_t Value = 0) { assert(Offset < getSize() && "offset not within section bounds"); DynamicRelocations.emplace(Relocation{Offset, Symbol, Type, Addend, Value}); } @@ -368,7 +388,7 @@ return Itr != Relocations.end() ? &*Itr : nullptr; } - /// Lookup the relocation (if any) at the given /p Offset. + /// Lookup the dynamic relocation (if any) at the given /p Offset. const Relocation *getDynamicRelocationAt(uint64_t Offset) const { Relocation Key{Offset, 0, 0, 0, 0}; auto Itr = DynamicRelocations.find(Key); @@ -395,6 +415,11 @@ const uint8_t *getOutputData() const { return reinterpret_cast(getOutputContents().data()); } + void setOutputContents(uint8_t *Data, uint64_t Size) { + IsChanged = true; + OutputSize = Size; + OutputContents = StringRef(reinterpret_cast(Data), Size); + } StringRef getOutputContents() const { return OutputContents; } uint64_t getAllocAddress() const { return reinterpret_cast(getOutputData()); diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -280,8 +280,7 @@ const MCRegisterInfo *RegInfo) : Analysis(Analysis), Info(Info), RegInfo(RegInfo) { // Initialize the default annotation allocator with id 0 - AnnotationAllocators.emplace(0, AnnotationAllocator()); - MaxAllocatorId++; + AnnotationAllocators.emplace(MaxAllocatorId++, AnnotationAllocator()); } /// Initialize a new annotation allocator and return its id @@ -900,6 +899,12 @@ llvm_unreachable("not implemented"); } + /// Get stack adjustment value + virtual int getStackAdjustment(const MCInst &Inst) const { + llvm_unreachable("not implemented"); + return false; + } + /// Identify stack adjustment instructions -- those that change the stack /// pointer by adding or subtracting an immediate. virtual bool isStackAdjustment(const MCInst &Inst) const { @@ -1293,6 +1298,12 @@ return false; } + /// Replace instruction with relaxed version of it + virtual bool relaxInstruction(MCInst &Inst) const { + llvm_unreachable("not implemented"); + return false; + } + /// Lower a tail call instruction \p Inst if required by target. virtual bool lowerTailCall(MCInst &Inst) { llvm_unreachable("not implemented"); @@ -1641,6 +1652,21 @@ return Index; } + /// Copy annotations from one instruction to other + void copyAnnotationInst(const MCInst &From, MCInst &To, + AllocatorIdTy AllocatorId = 0) { + const MCInst *AnnotationInst = getAnnotationInst(From); + if (!AnnotationInst) + return; + + for (unsigned I = 0; I < AnnotationInst->getNumOperands(); ++I) { + const int64_t Imm = AnnotationInst->getOperand(I).getImm(); + const unsigned Index = extractAnnotationIndex(Imm); + const int64_t Value = extractAnnotationValue(Imm); + setAnnotationOpValue(To, Index, Value, AllocatorId); + } + } + /// Store an annotation value on an MCInst. This assumes the annotation /// is not already present. template @@ -1831,6 +1857,13 @@ return InstructionListType(); } + virtual InstructionListType + createInstrumentFiniCall(MCSymbol *HandlerFuncAddr, MCContext *Ctx, + bool IsTailCall) { + llvm_unreachable("not implemented"); + return std::vector(); + } + virtual InstructionListType createNumCountersGetter(MCContext *Ctx) const { llvm_unreachable("not implemented"); return {}; diff --git a/bolt/include/bolt/Core/Relocation.h b/bolt/include/bolt/Core/Relocation.h --- a/bolt/include/bolt/Core/Relocation.h +++ b/bolt/include/bolt/Core/Relocation.h @@ -98,6 +98,18 @@ /// Return code for a PC-relative 8-byte relocation static uint64_t getPC64(); + /// Return code for a ABS 4-byte relocation + static uint64_t getAbs32(); + + /// Return code for a ABS 8-byte relocation + static uint64_t getAbs64(); + + /// Return code for ABS relocation based on size + static uint64_t getAbs(uint8_t Size); + + /// Return code for a RELATIVE relocation + static uint64_t getRelative(); + /// Return true if this relocation is PC-relative. Return false otherwise. bool isPCRelative() const { return isPCRelative(Type); } @@ -105,6 +117,9 @@ /// otherwise. bool isRelative() const { return isRelative(Type); } + /// Return true if this relocation is IRELATIVE type. Return false otherwise. + bool isIRelative() const { return isIRelative(Type); } + /// Emit relocation at a current \p Streamer' position. The caller is /// responsible for setting the position correctly. size_t emit(MCStreamer *Streamer) const; diff --git a/bolt/include/bolt/Passes/Golang.h b/bolt/include/bolt/Passes/Golang.h new file mode 100644 --- /dev/null +++ b/bolt/include/bolt/Passes/Golang.h @@ -0,0 +1,145 @@ +//===--------- Passes/Golang.h ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Golang binaries support passes. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_GOLANG_H +#define LLVM_TOOLS_LLVM_BOLT_PASSES_GOLANG_H + +#include "BinaryPasses.h" +#include "bolt/Utils/CommandLineOpts.h" + +#include "Golang/go_v1_14.h" +#include "Golang/go_v1_16_5.h" +#include "Golang/go_v1_17.h" + +namespace llvm { +namespace bolt { + +class GolangPass : public BinaryFunctionPass { +protected: + uint64_t RuntimeText = 0; + std::unique_ptr FirstModule; + std::unique_ptr Pclntab; + +public: + /// Golang version strings + const char *GolangStringVer[opts::GV_LAST] = { + "none", "auto", "go1.14.9", "go1.14.12", + "go1.16.5", "go1.17.2", "go1.17.5", + }; + + explicit GolangPass(BinaryContext &BC) : BinaryFunctionPass(false) { + if (checkGoVersion(BC) < 0) { + errs() << "BOLT-ERROR: Failed to check golang version!\n"; + exit(1); + } + + if (getSymbols(BC) < 0) { + errs() << "BOLT-ERROR: Failed to get golang-specific symbols!\n"; + exit(1); + } + + FirstModule = createGoModule(); + if (!FirstModule || FirstModule->read(BC) < 0) { + errs() << "BOLT-ERROR: Failed to read firstmodule!\n"; + exit(1); + } + + Pclntab = createGoPclntab(); + if (!Pclntab || Pclntab->readHeader(BC, getPcHeaderAddr()) < 0) { + errs() << "BOLT-ERROR: Failed to read pclntab!\n"; + exit(1); + } + + // NOTE last entry is etext + if (Pclntab->getFunctionsCount() != FirstModule->getFtab().getCount() - 1) { + errs() << "BOLT-ERROR: Wrong symtab size!\n"; + exit(1); + } + } + + std::unique_ptr createGoModule(); + + std::unique_ptr createGoPclntab(); + + std::unique_ptr createGoFunc(); + + static const char *getFirstBFName(void) { + const char *const Name = "runtime.text"; + return Name; + } + + static const char *getLastBFName(void) { + const char *const Name = "runtime.etext"; + return Name; + } + + static uint32_t getUndAarch64(void) { return 0xbea71700; } + + uint64_t getPcHeaderAddr() const { return FirstModule->getPcHeaderAddr(); } + + uint8_t getPsize() const { return Pclntab->getPsize(); } + + const class Pclntab *getPclntab() const { + return const_cast(Pclntab.get()); + } + + const char *getName() const override { return "golang"; } + + /// Pass entry point + void runOnFunctions(BinaryContext &BC) override; + int checkGoVersion(BinaryContext &BC); + int getSymbols(BinaryContext &BC); + int textsectmapPass(BinaryContext &BC); + int typePass(BinaryContext &BC, uint64_t TypeAddr); + int typelinksPass(BinaryContext &BC); + int pcspPass(BinaryFunction *BF, uint8_t **SectionData, const uint32_t Index, + uint8_t Quantum, bool ForcePreserve); + uint32_t deferreturnPass(BinaryContext &BC, BinaryFunction *BF); + int getNextMCinstVal(BinaryFunction::order_iterator BBIt, uint64_t I, + const uint32_t Index, int32_t &Val, + uint64_t *NextOffset); + int writeVarintPass(BinaryFunction *BF, uint8_t **DataFuncOffset, + const uint32_t Index, const uint8_t Quantum); + int pclntabPass(BinaryContext &BC); + int findFuncTabPass(BinaryContext &BC); +}; + +class GolangPostPass : public GolangPass { +public: + explicit GolangPostPass(BinaryContext &BC) : GolangPass(BC) {} + + /// Pass entry point + void runOnFunctions(BinaryContext &BC) override; + void skipPleaseUseCallersFramesPass(BinaryContext &BC); + void instrumentExitCall(BinaryContext &BC); + uint32_t pcdataPass(BinaryFunction *BF, GoFunc *GoFunc, const uint32_t Index, + const unsigned AllocId); + int pclntabPass(BinaryContext &BC); +}; + +class GolangPrePass : public GolangPass { +public: + explicit GolangPrePass(BinaryContext &BC) : GolangPass(BC) {} + + /// Pass entry point + void runOnFunctions(BinaryContext &BC) override; + void goPassInit(BinaryContext &BC); + void nopPass(BinaryContext &BC); + int pclntabPass(BinaryContext &BC); + void deferreturnPass(BinaryFunction &BF, const uint64_t DeferOffset, + const unsigned AllocId, const MCCodeEmitter *Emitter); +}; + +} // namespace bolt +} // namespace llvm + +#endif diff --git a/bolt/include/bolt/Passes/Golang/go_base.h b/bolt/include/bolt/Passes/Golang/go_base.h new file mode 100644 --- /dev/null +++ b/bolt/include/bolt/Passes/Golang/go_base.h @@ -0,0 +1,209 @@ +//===--------- Passes/Golang/go_base.h --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_BOLT_GOLANG_BASE_H +#define LLVM_TOOLS_LLVM_BOLT_GOLANG_BASE_H + +#include "go_common.h" + +namespace llvm { +namespace bolt { + +// runtime/symtab.go +struct Functab { + uint64_t Address; // Pointer size + uint64_t Offset; // Pointer size +}; + +// runtime/symtab.go +struct InlinedCall { + int16_t Parent; + uint8_t FuncID; + uint8_t Unused; + uint32_t File; + uint32_t Line; + uint32_t Func; + uint32_t ParentPc; +}; + +class GoArray { +public: + uint64_t Address; + uint64_t Count[2]; + + uint64_t getAddress() const { return Address; } + + void setAddress(uint64_t Addr) { Address = Addr; } + + uint64_t getCount() const { return Count[0]; } + + void setCount(uint64_t C) { + Count[0] = C; + Count[1] = C; + } +}; + +// runtime/symtab.go +class Pclntab { + uint64_t PclntabHeaderOffset = 0; + + virtual void __readHeader(BinaryContext &BC, DataExtractor &DE) = 0; + virtual void __writeHeader(BinaryContext &BC, uint8_t *Pclntab) const = 0; + virtual bool checkMagic() const = 0; + virtual void setNewHeaderOffsets() = 0; + +protected: + void setPclntabHeaderOffset(uint64_t Off) { PclntabHeaderOffset = Off; } + + uint64_t getPclntabHeaderOffset() const { return PclntabHeaderOffset; } + +public: + virtual ~Pclntab() = 0; + int readHeader(BinaryContext &BC, const uint64_t PclntabHeaderAddr); + int writeHeader(BinaryContext &BC, uint8_t *Pclntab); + virtual size_t getPcHeaderSize() const = 0; + virtual void setFunctionsCount(uint64_t Count) = 0; + virtual uint8_t getQuantum() const = 0; + virtual uint8_t getPsize() const = 0; + virtual uint64_t getFunctionsCount() const = 0; + virtual uint64_t getNameOffset() const = 0; + virtual uint64_t getFiletabOffset() const = 0; + virtual uint64_t getPctabOffset() const = 0; + virtual uint64_t getPclntabOffset() const = 0; + virtual uint64_t getFunctabOffset() const = 0; +}; + +// runtime/runtime2.go +struct GoFunc { + virtual ~GoFunc() = 0; + + uint64_t PcdataOffset = 0; + uint64_t FuncdataOffset = 0; + + virtual void __read(BinaryContext &BC, DataExtractor &DE, + BinarySection *Section, uint64_t *FuncOffset) = 0; + + virtual void __write(BinaryFunction *BF, uint8_t **FuncPart, uint8_t *Section, + BinarySection *OutputSection) const = 0; + + virtual size_t getSize(BinaryContext &BC) const = 0; + + int read(BinaryContext &BC, DataExtractor &DE, BinarySection *Section, + uint64_t *FuncOffset) { + __read(BC, DE, Section, FuncOffset); + + // Read pcdata + PcdataOffset = *FuncOffset; + for (uint32_t I = 0; I < getNpcdata(); ++I) + setPcdata(I, (uint32_t)readEndianVal(DE, FuncOffset, sizeof(uint32_t))); + + // Read funcdata + *FuncOffset = alignTo(*FuncOffset, BC.AsmInfo->getCodePointerSize()); + FuncdataOffset = *FuncOffset; + for (uint32_t I = 0; I < getNfuncdata(); ++I) + setFuncdata( + I, readEndianVal(DE, FuncOffset, BC.AsmInfo->getCodePointerSize())); + + return 0; + } + + int write(BinaryFunction *BF, uint8_t **FuncPart, uint8_t *SectionData, + BinarySection *OutputSection) { + BinaryContext &BC = BF->getBinaryContext(); + __write(BF, FuncPart, SectionData, OutputSection); + + // Write pcdata + for (uint32_t I = 0; I < getNpcdata(); ++I) + writeEndianVal(BC, getPcdata(I), sizeof(uint32_t), FuncPart); + + // Write funcdata + *FuncPart = SectionData + alignTo(*FuncPart - SectionData, + BC.AsmInfo->getCodePointerSize()); + for (uint32_t I = 0; I < getNfuncdata(); ++I) { + uint64_t Val = getFuncdata(I); + if (Val) { + uint64_t Delta = (uint64_t)(*FuncPart - SectionData); + AddRelaReloc(BC, nullptr, OutputSection, Delta, Val); + *FuncPart += BC.AsmInfo->getCodePointerSize(); + } else { + writeEndianPointer(BC, 0, FuncPart); + } + } + return 0; + } + + virtual void disableMetadata() = 0; + virtual int32_t getNameOffset() const = 0; + virtual void setNameOffset(int32_t Offset) = 0; + virtual uint32_t getDeferreturnOffset() const = 0; + virtual void setDeferreturnOffset(uint32_t Offset) = 0; + virtual uint32_t getPcspOffset() const = 0; + virtual void setPcspOffset(uint32_t Offset) = 0; + virtual uint32_t getNpcdata() const = 0; + virtual void fixNpcdata() = 0; + virtual bool hasReservedID(std::string Name) const = 0; + virtual uint8_t getNfuncdata() const = 0; + + // runtime/symtab.go + virtual unsigned getPcdataUnsafePointIndex() const = 0; + virtual unsigned getPcdataStackMapIndex() const = 0; + virtual unsigned getPcdataInlTreeIndex() const = 0; + virtual unsigned getPcdataMaxIndex() const = 0; + virtual size_t getPcdataSize() const = 0; + virtual uint32_t getPcdata(unsigned Index) const = 0; + virtual void setPcdata(unsigned Index, uint32_t Value) = 0; + virtual void setPcdataMaxVal(unsigned Index, int32_t Value) = 0; + virtual int32_t getPcdataMax(unsigned Index) const = 0; + virtual int getPcdataUnsafePointVal() const = 0; + virtual unsigned getFuncdataInlTreeIndex() const = 0; + virtual uint64_t getFuncdata(unsigned Index) const = 0; + virtual void setFuncdata(unsigned Index, uint64_t Value) = 0; + + uint32_t getPcdataOffset() const { return PcdataOffset; } + + uint32_t getFuncdataOffset() const { return FuncdataOffset; } + + // Fake index used to store pcsp values + unsigned getPcspIndex() const { return getPcdataMaxIndex() + 1; } +}; + +// moduledata struct +// runtime/symtab.go +// NOTE: Every field size is target-machines pointer size +// NOTE: For some reason array[] fields count is repeated in struct +struct Module { + virtual ~Module() = 0; + + BinaryData *getModuleBD(BinaryContext &BC) { + BinaryData *Module = BC.getFirstBinaryDataByName("local.moduledata"); + if (!Module) + Module = BC.getFirstBinaryDataByName("runtime.firstmoduledata"); + + return Module; + } + + int read(BinaryContext &BC); + virtual uint64_t getFieldOffset(BinaryContext &BC, uint64_t *Addr) const = 0; + virtual int patch(BinaryContext &BC) = 0; + virtual uint64_t *getModule() = 0; + virtual size_t getModuleSize() const = 0; + virtual void setPclntabSize(uint64_t Size) = 0; + virtual void setFtabSize(uint64_t Count) = 0; + virtual uint64_t getPcHeaderAddr() const = 0; + virtual const GoArray &getFtab() const = 0; + virtual uint64_t getFindfunctab() const = 0; + virtual uint64_t getTypes() const = 0; + virtual uint64_t getEtypes() const = 0; + virtual const GoArray &getTextsectmap() const = 0; + virtual const GoArray &getTypelinks() const = 0; +}; + +} // namespace bolt +} // namespace llvm + +#endif diff --git a/bolt/include/bolt/Passes/Golang/go_common.h b/bolt/include/bolt/Passes/Golang/go_common.h new file mode 100644 --- /dev/null +++ b/bolt/include/bolt/Passes/Golang/go_common.h @@ -0,0 +1,156 @@ +//===--------- Passes/Golang/go_common.h ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_BOLT_GOLANG_COMMON_H +#define LLVM_TOOLS_LLVM_BOLT_GOLANG_COMMON_H + +namespace llvm { +namespace bolt { + +BinaryFunction *getBF(BinaryContext &BC, std::vector &BFs, + const char *Name); + +BinaryFunction *getFirstBF(BinaryContext &BC, + std::vector &BFs); + +BinaryFunction *getLastBF(BinaryContext &BC, + std::vector &BFs); + +void writeEndianVal(BinaryContext &BC, uint64_t Val, uint16_t Size, + uint8_t **Res); + +void writeEndianPointer(BinaryContext &BC, uint64_t Val, uint8_t **Res); + +inline uint64_t readEndianVal(DataExtractor &DE, uint64_t *Offset, + uint16_t Size) { + assert(DE.isValidOffset(*Offset) && "Invalid offset"); + switch (Size) { + case 8: + return DE.getU64(Offset); + case 4: + return DE.getU32(Offset); + case 2: + return DE.getU16(Offset); + case 1: + return DE.getU8(Offset); + default: + __builtin_unreachable(); + } +} + +inline uint32_t readVarint(const uint8_t *Data, uint64_t *Offset) { + uint32_t res = 0, shift = 0; + uint8_t val; + + while (1) { + val = Data[(*Offset)++]; + res |= ((uint32_t)(val & 0x7F)) << (shift & 31); + if ((val & 0x80) == 0) + break; + + shift += 7; + } + + return res; +} + +inline int32_t readVarintPair(const uint8_t *Data, uint64_t *Offset, + int32_t &ValSum, uint64_t &OffsetSum, + const uint8_t Quantum) { + uint32_t Val = readVarint(Data, Offset); + ValSum += (-(Val & 1) ^ (Val >> 1)); + OffsetSum += readVarint(Data, Offset) * Quantum; + return (int32_t)Val; +} + +inline int32_t readVarintPair(DataExtractor &DE, uint64_t *Offset, + int32_t &ValSum, uint64_t &OffsetSum, + const uint8_t Quantum) { + assert(DE.isValidOffset(*Offset)); + const uint8_t *Data = (const uint8_t *)DE.getData().data(); + return readVarintPair(Data, Offset, ValSum, OffsetSum, Quantum); +} + +inline void AddRelaReloc(BinaryContext &BC, MCSymbol *Symbol, + BinarySection *Section, uint64_t Offset, + uint64_t Addend) { + Section->addRelocation(Offset, Symbol, + Relocation::getAbs(BC.AsmInfo->getCodePointerSize()), + Addend); + + if (!BC.HasFixedLoadAddress) + Section->addDynamicRelocation(Offset, Symbol, Relocation::getRelative(), + Addend); +} + +inline void RemoveRelaReloc(BinaryContext &BC, BinarySection *Section, + uint64_t Offset) { + Section->removeRelocationAt(Offset); + Section->removeDynamicRelocationAt(Offset); +} + +inline std::string getVarintName(uint32_t Index, bool IsNext = false) { + const char *const Varint = "VARINT"; + const char *const VarintNext = "VARINT_NEXT"; + + std::string Name = IsNext ? VarintNext : Varint; + Name += std::to_string(Index); + return Name; +} + +inline void addVarintAnnotation(BinaryContext &BC, MCInst &II, uint32_t Index, + int32_t Value, bool IsNext, + unsigned AllocId = 0) { + BC.MIB->addAnnotation(II, getVarintName(Index, IsNext), Value, AllocId); +} + +inline bool hasVarintAnnotation(BinaryContext &BC, MCInst &II, uint32_t Index, + bool IsNext = false) { + return BC.MIB->hasAnnotation(II, getVarintName(Index, IsNext)); +} + +inline int32_t getVarintAnnotation(BinaryContext &BC, MCInst &II, + uint32_t Index, bool IsNext = false) { + return BC.MIB->getAnnotationAs(II, getVarintName(Index, IsNext)); +} + +inline std::string getFuncdataName(uint32_t Findex, uint32_t Size) { + return "FUNCDATA" + std::to_string(Findex) + std::to_string(Size); +} + +inline std::string getFuncdataSizeName(uint32_t Findex) { + return "FUNCDATA_SIZE" + std::to_string(Findex); +} + +inline void addFuncdataAnnotation(BinaryContext &BC, MCInst &II, + uint32_t Findex, int32_t Value, + unsigned AllocId = 0) { + auto &Size = BC.MIB->getOrCreateAnnotationAs( + II, getFuncdataSizeName(Findex), AllocId); + BC.MIB->addAnnotation(II, getFuncdataName(Findex, Size++), Value, AllocId); +} + +inline bool hasFuncdataAnnotation(BinaryContext &BC, MCInst &II, + uint32_t Findex) { + return BC.MIB->hasAnnotation(II, getFuncdataSizeName(Findex)); +} + +inline uint32_t getFuncdataSizeAnnotation(BinaryContext &BC, MCInst &II, + uint32_t Findex) { + return BC.MIB->getAnnotationAs(II, getFuncdataSizeName(Findex)); +} + +inline int32_t getFuncdataAnnotation(BinaryContext &BC, MCInst &II, + uint32_t Findex, uint32_t Index) { + return BC.MIB->getAnnotationAs(II, getFuncdataName(Findex, Index)); +} + +} // namespace bolt +} // namespace llvm + +#endif diff --git a/bolt/include/bolt/Passes/Golang/go_v1_14.h b/bolt/include/bolt/Passes/Golang/go_v1_14.h new file mode 100644 --- /dev/null +++ b/bolt/include/bolt/Passes/Golang/go_v1_14.h @@ -0,0 +1,484 @@ +//===--------- Passes/Golang/go_v1_14.h -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_BOLT_GOLANG_V1_14_H +#define LLVM_TOOLS_LLVM_BOLT_GOLANG_V1_14_H + +#include "go_base.h" + +namespace llvm { +namespace bolt { + +class Pclntab_v1_14_9 : public Pclntab { +// runtime/symtab.go moduledataverify1 +#define PclntabFields \ + F(uint32_t, false, Magic) \ + F(uint8_t, false, Zero1) \ + F(uint8_t, false, Zero2) \ + F(uint8_t, false, Quantum) \ + F(uint8_t, false, Psize) \ + F(uint64_t, true, SymtabSize) + + struct PcHeader { +#define F(Type, IsPointerSize, Field) Type Field; + PclntabFields +#undef F + } Header; + + void __readHeader(BinaryContext &BC, DataExtractor &DE) override { + uint64_t Offset = getPclntabHeaderOffset(); +#define F(Type, IsPointer, Field) \ + { \ + assert(DE.isValidOffset(Offset) && "Invalid offset"); \ + size_t FSize = \ + IsPointer ? BC.AsmInfo->getCodePointerSize() : sizeof(Type); \ + Header.Field = readEndianVal(DE, &Offset, FSize); \ + } + PclntabFields +#undef F + } + + void __writeHeader(BinaryContext &BC, uint8_t *Pclntab) const override { +#define F(Type, IsPointer, Field) \ + { \ + size_t FSize = \ + IsPointer ? BC.AsmInfo->getCodePointerSize() : sizeof(Type); \ + writeEndianVal(BC, Header.Field, FSize, &Pclntab); \ + } + PclntabFields +#undef F + } + + void setNewHeaderOffsets() override {} + + bool checkMagic() const override { return Header.Magic == 0xfffffffb; } + +public: + ~Pclntab_v1_14_9() = default; + + static size_t getPcHeaderSize(unsigned Psize) { + size_t FuncSize = 0; +#define F(Type, IsPointerSize, Field) \ + if (IsPointerSize) \ + FuncSize += Psize; \ + else \ + FuncSize += sizeof(Type); + PclntabFields +#undef F + + return alignTo(FuncSize, Psize); + } + +#undef PclntabFields + + size_t getPcHeaderSize() const override { + return getPcHeaderSize(Header.Psize); + } + + void setFunctionsCount(uint64_t Count) override { Header.SymtabSize = Count; } + + uint8_t getQuantum() const override { return Header.Quantum; } + + uint8_t getPsize() const override { return Header.Psize; } + + uint64_t getFunctionsCount() const override { return Header.SymtabSize; } + + uint64_t getNameOffset() const override { return getPclntabHeaderOffset(); } + + uint64_t getFiletabOffset() const override { + return getPclntabHeaderOffset(); + } + + uint64_t getPctabOffset() const override { return getPclntabHeaderOffset(); } + + uint64_t getPclntabOffset() const override { + return getPclntabHeaderOffset() + getPcHeaderSize(); + } + + uint64_t getFunctabOffset() const override { + return getPclntabHeaderOffset(); + } +}; + +struct GoFunc_v1_14_9 : GoFunc { + ~GoFunc_v1_14_9() = default; + + // runtime/symtab.go + enum { + _PCDATA_RegMapIndex = 0, + _PCDATA_StackMapIndex = 1, + _PCDATA_InlTreeIndex = 2, + _PCDATA_MAX, + _FUNCDATA_ArgsPointerMaps = 0, + _FUNCDATA_LocalsPointerMaps = 1, + _FUNCDATA_RegPointerMaps = 2, + _FUNCDATA_StackObjects = 3, + _FUNCDATA_InlTree = 4, + _FUNCDATA_OpenCodedDeferInfo = 5, + _FUNCDATA_MAX, + _ArgsSizeUnknown = -0x80000000 + }; + + // runtime/symtab.go + enum { + funcID_normal = 0, + funcID_runtime_main, + funcID_goexit, + funcID_jmpdefer, + funcID_mcall, + funcID_morestack, + funcID_mstart, + funcID_rt0_go, + funcID_asmcgocall, + funcID_sigpanic, + funcID_runfinq, + funcID_gcBgMarkWorker, + funcID_systemstack_switch, + funcID_systemstack, + funcID_cgocallback_gofunc, + funcID_gogo, + funcID_externalthreadhandler, + funcID_debugCallV1, + funcID_gopanic, + funcID_panicwrap, + funcID_handleAsyncEvent, + funcID_asyncPreempt, + funcID_wrapper + }; + +#define FuncFields \ + F(uint64_t, true, Entry) \ + F(int32_t, false, Name) \ + F(uint32_t, false, Args) \ + F(uint32_t, false, Deferreturn) \ + F(int32_t, false, Pcsp) \ + F(int32_t, false, Pcfile) \ + F(int32_t, false, Pcln) \ + F(uint32_t, false, Npcdata) \ + F(uint8_t, false, FuncID) \ + F(uint8_t, false, Unused1) \ + F(uint8_t, false, Unused2) \ + F(uint8_t, false, Nfuncdata) + + struct _Func { +#define F(Type, IsPointer, Field) Type Field; + FuncFields +#undef F + } __GoFunc; + + uint32_t Pcdata[_PCDATA_MAX] = {}; + uint64_t Funcdata[_FUNCDATA_MAX] = {}; + + int32_t PcdataMax[_PCDATA_MAX] = {}; + + void __read(BinaryContext &BC, DataExtractor &DE, BinarySection *Section, + uint64_t *FuncOffset) override { +#define F(Type, IsPointer, Field) \ + { \ + assert(DE.isValidOffset(*FuncOffset) && "Invalid offset"); \ + size_t FSize = \ + IsPointer ? BC.AsmInfo->getCodePointerSize() : sizeof(Type); \ + if (IsPointer && Section) \ + RemoveRelaReloc(BC, Section, *FuncOffset); \ + __GoFunc.Field = readEndianVal(DE, FuncOffset, FSize); \ + } + FuncFields +#undef F + } + + void __write(BinaryFunction *BF, uint8_t **FuncPart, uint8_t *SectionData, + BinarySection *OutputSection) const override { + BinaryContext &BC = BF->getBinaryContext(); +#define F(Type, IsPointer, Field) \ + { \ + if (IsPointer) { \ + uint64_t Delta = (uint64_t)(*FuncPart - SectionData); \ + AddRelaReloc(BC, BF->getSymbol(), OutputSection, Delta, 0); \ + *FuncPart += BC.AsmInfo->getCodePointerSize(); \ + } else { \ + writeEndianVal(BC, __GoFunc.Field, sizeof(Type), FuncPart); \ + } \ + } + FuncFields +#undef F + } + + size_t getSize(BinaryContext &BC) const override { + size_t FuncSize = 0; +#define F(Type, IsPointerSize, Field) \ + if (IsPointerSize) \ + FuncSize += BC.AsmInfo->getCodePointerSize(); \ + else \ + FuncSize += sizeof(Type); + FuncFields +#undef F + return FuncSize; + } + +#undef FuncFields + + void disableMetadata() override { + __GoFunc.Pcfile = 0; + __GoFunc.Pcln = 0; + } + + int32_t getNameOffset() const override { return __GoFunc.Name; } + + void setNameOffset(int32_t Offset) override { __GoFunc.Name = Offset; } + + uint32_t getDeferreturnOffset() const override { + return __GoFunc.Deferreturn; + } + + void setDeferreturnOffset(uint32_t Offset) override { + __GoFunc.Deferreturn = Offset; + } + + uint32_t getPcspOffset() const override { return __GoFunc.Pcsp; } + + void setPcspOffset(uint32_t Offset) override { __GoFunc.Pcsp = Offset; } + + uint32_t getNpcdata() const override { return __GoFunc.Npcdata; } + + void fixNpcdata() override { + for (int I = _PCDATA_MAX - 1; I >= 0; --I) { + if (Pcdata[I]) { + __GoFunc.Npcdata = I + 1; + return; + } + } + + __GoFunc.Npcdata = 0; + } + + bool hasReservedID(std::string Name) const override { + return __GoFunc.FuncID != funcID_normal && + __GoFunc.FuncID != funcID_wrapper; + } + + uint8_t getNfuncdata() const override { return __GoFunc.Nfuncdata; } + + unsigned getPcdataUnsafePointIndex() const override { + return _PCDATA_RegMapIndex; + } + + unsigned getPcdataStackMapIndex() const override { + return _PCDATA_StackMapIndex; + } + + unsigned getPcdataInlTreeIndex() const override { + return _PCDATA_InlTreeIndex; + } + + unsigned getPcdataMaxIndex() const override { return _PCDATA_MAX; } + + size_t getPcdataSize() const override { return sizeof(Pcdata); } + + uint32_t getPcdata(unsigned Index) const override { + assert(Index < _PCDATA_MAX && "Invalid index"); + return Pcdata[Index]; + } + + void setPcdata(unsigned Index, uint32_t Value) override { + assert(Index < _PCDATA_MAX && "Invalid index"); + Pcdata[Index] = Value; + } + + void setPcdataMaxVal(unsigned Index, int32_t Value) override { + assert(Index < _PCDATA_MAX && "Invalid index"); + PcdataMax[Index] = Value; + } + + int32_t getPcdataMax(unsigned Index) const override { + assert(Index < _PCDATA_MAX && "Invalid index"); + return PcdataMax[Index]; + } + + int getPcdataUnsafePointVal() const override { + const int Val = -2; + return Val; + } + + unsigned getFuncdataInlTreeIndex() const override { + return _FUNCDATA_InlTree; + } + + uint64_t getFuncdata(unsigned Index) const override { + assert(Index < _FUNCDATA_MAX && "Invalid index"); + return Funcdata[Index]; + } + + void setFuncdata(unsigned Index, uint64_t Value) override { + assert(Index < _FUNCDATA_MAX && "Invalid index"); + Funcdata[Index] = Value; + } +}; + +struct Module_v1_14_9 : Module { + ~Module_v1_14_9() = default; + + union ModuleStruct { + struct { + GoArray pclntable; + + GoArray ftab; + + GoArray filetab; + + uint64_t findfunctab; + uint64_t minpc, maxpc; + uint64_t text, etext; + uint64_t noptrdata, enoptrdata; + uint64_t data, edata; + uint64_t bss, ebss; + uint64_t noptrbss, enoptrbss; + uint64_t end, gcdata, gcbss; + uint64_t types, etypes; + + GoArray textsectmap; + + GoArray typelinks; + + GoArray itablinks; + + // Other fields are zeroed/unused in exec + } m; + + uint64_t a[sizeof(m) / sizeof(uint64_t)]; + } ModuleStruct; + + uint64_t getFieldOffset(BinaryContext &BC, uint64_t *Addr) const override { + unsigned Psize = BC.AsmInfo->getCodePointerSize(); + return (Addr - ModuleStruct.a) * Psize; + } + + uint64_t *getModule() override { return ModuleStruct.a; } + + size_t getModuleSize() const override { return sizeof(ModuleStruct.m); } + + void setPclntabSize(uint64_t Size) override { + // Set pclntable size + ModuleStruct.m.pclntable.setCount(Size); + } + + void setFtabSize(uint64_t Count) override { + // Fix ftab size; the last entry reserved for maxpc + ModuleStruct.m.ftab.setCount(Count + 1); + } + + uint64_t getPcHeaderAddr() const override { + return ModuleStruct.m.pclntable.getAddress(); + } + + const GoArray &getFtab() const override { return ModuleStruct.m.ftab; } + + uint64_t getFindfunctab() const override { + return ModuleStruct.m.findfunctab; + } + + uint64_t getTypes() const override { return ModuleStruct.m.types; } + + uint64_t getEtypes() const override { return ModuleStruct.m.etypes; } + + const GoArray &getTextsectmap() const override { + return ModuleStruct.m.textsectmap; + } + + const GoArray &getTypelinks() const override { + return ModuleStruct.m.typelinks; + } + + int patch(BinaryContext &BC) override { + BinaryData *Module = getModuleBD(BC); + if (!Module) { + errs() << "BOLT-ERROR: Failed to get firstmoduledata symbol!\n"; + return -1; + } + + BinarySection *Section = &Module->getSection(); + std::vector BFs = BC.getSortedFunctions(); + unsigned Psize = BC.AsmInfo->getCodePointerSize(); + +#define getOffset(Field) \ + Module->getOffset() + getFieldOffset(BC, &ModuleStruct.m.Field); + +#define getValue(Field) (ModuleStruct.m.Field) + + // Fix firsmoduledata pointers + BinaryData *PclntabSym = BC.getBinaryDataAtAddress(getPcHeaderAddr()); + assert(PclntabSym && "PclntabSym absent"); + BinaryData *FindfunctabSym = BC.getBinaryDataAtAddress(getFindfunctab()); + assert(FindfunctabSym && "FindfunctabSym absent"); + BinaryFunction *FirstBF = getFirstBF(BC, BFs); + assert(FirstBF && "Text BF absent"); + BinaryFunction *LastBF = getLastBF(BC, BFs); + assert(LastBF && "Text BF absent"); + +#define FirstmoduleFields \ + F(pclntable.Address, PclntabSym, 0) \ + F(ftab.Address, PclntabSym, Pclntab_v1_14_9::getPcHeaderSize(Psize)) \ + F(findfunctab, FindfunctabSym, 0) \ + F(minpc, FirstBF, 0) \ + F(text, FirstBF, 0) \ + F(maxpc, LastBF, 0) \ + F(etext, LastBF, 0) + +#define F(Field, Symbol, Addend) \ + { \ + uint64_t FieldOffset = getOffset(Field); \ + RemoveRelaReloc(BC, Section, FieldOffset); \ + AddRelaReloc(BC, Symbol->getSymbol(), Section, FieldOffset, Addend); \ + } + FirstmoduleFields +#undef F +#undef FirstmoduleFields + + // Fix firstmoduledata static fields + MCSymbol *ZeroSym = BC.registerNameAtAddress("Zero", 0, 0, 0); +#define FirstmoduleFields \ + F(pclntable.Count[0]) \ + F(pclntable.Count[1]) \ + F(ftab.Count[0]) \ + F(ftab.Count[1]) \ + F(filetab.Count[0]) \ + F(filetab.Count[1]) + +#define F(Field) \ + { \ + uint64_t FieldOffset = getOffset(Field); \ + uint64_t FieldVal = getValue(Field); \ + Section->addRelocation(FieldOffset, ZeroSym, Relocation::getAbs(Psize), \ + FieldVal); \ + } + FirstmoduleFields +#undef F +#undef FirstmoduleFields + +#undef getValue +#undef getOffset + return 0; + } +}; + +class Pclntab_v1_14_12 : public Pclntab_v1_14_9 { +public: + ~Pclntab_v1_14_12() = default; +}; + +struct GoFunc_v1_14_12 : GoFunc_v1_14_9 { + ~GoFunc_v1_14_12() = default; +}; + +struct Module_v1_14_12 : Module_v1_14_9 { + ~Module_v1_14_12() = default; +}; + +} // namespace bolt +} // namespace llvm + +#endif diff --git a/bolt/include/bolt/Passes/Golang/go_v1_16_5.h b/bolt/include/bolt/Passes/Golang/go_v1_16_5.h new file mode 100644 --- /dev/null +++ b/bolt/include/bolt/Passes/Golang/go_v1_16_5.h @@ -0,0 +1,518 @@ +//===--------- Passes/Golang/go_v1_16_6.h -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_BOLT_GOLANG_V1_16_5_H +#define LLVM_TOOLS_LLVM_BOLT_GOLANG_V1_16_5_H + +#include "go_base.h" + +namespace llvm { +namespace bolt { + +class Pclntab_v1_16_5 : public Pclntab { +#define PcHeaderFields \ + F(uint32_t, false, Magic) \ + F(uint8_t, false, Zero1) \ + F(uint8_t, false, Zero2) \ + F(uint8_t, false, MinLC) \ + F(uint8_t, false, PtrSize) \ + F(uint64_t, true, Nfuncs) \ + F(uint64_t, true, Nfiles) \ + F(uint64_t, true, FuncnameOffset) \ + F(uint64_t, true, CuOffset) \ + F(uint64_t, true, FiletabOffset) \ + F(uint64_t, true, PctabOffset) \ + F(uint64_t, true, PclnOffset) + + struct PcHeader { +#define F(Type, IsPointerSize, Field) Type Field; + PcHeaderFields +#undef F + } Header; + + void __readHeader(BinaryContext &BC, DataExtractor &DE) override { + uint64_t Offset = getPclntabHeaderOffset(); +#define F(Type, IsPointer, Field) \ + { \ + assert(DE.isValidOffset(Offset) && "Invalid offset"); \ + size_t FSize = \ + IsPointer ? BC.AsmInfo->getCodePointerSize() : sizeof(Type); \ + Header.Field = readEndianVal(DE, &Offset, FSize); \ + } + PcHeaderFields +#undef F + } + + void __writeHeader(BinaryContext &BC, uint8_t *Pclntab) const override { +#define F(Type, IsPointer, Field) \ + { \ + size_t FSize = \ + IsPointer ? BC.AsmInfo->getCodePointerSize() : sizeof(Type); \ + writeEndianVal(BC, Header.Field, FSize, &Pclntab); \ + } + PcHeaderFields +#undef F + } + + void setNewHeaderOffsets() override { + Header.FuncnameOffset = 0; + Header.CuOffset = 0; + Header.FiletabOffset = 0; + Header.PctabOffset = 0; + Header.PclnOffset = getPcHeaderSize(); + } + + bool checkMagic() const override { return Header.Magic == 0xfffffffa; } + +public: + ~Pclntab_v1_16_5() = default; + + static size_t getPcHeaderSize(unsigned Psize) { + size_t FuncSize = 0; +#define F(Type, IsPointerSize, Field) \ + if (IsPointerSize) \ + FuncSize += Psize; \ + else \ + FuncSize += sizeof(Type); + PcHeaderFields +#undef F + + return alignTo(FuncSize, Psize); + } + +#undef PcHeaderFields + + size_t getPcHeaderSize() const override { + return getPcHeaderSize(Header.PtrSize); + } + + void setFunctionsCount(uint64_t Count) override { Header.Nfuncs = Count; } + + uint8_t getQuantum() const override { return Header.MinLC; } + + uint8_t getPsize() const override { return Header.PtrSize; } + + uint64_t getFunctionsCount() const override { return Header.Nfuncs; } + + uint64_t getNameOffset() const override { + return getPclntabHeaderOffset() + Header.FuncnameOffset; + } + + uint64_t getFiletabOffset() const override { + return getPclntabHeaderOffset() + Header.FiletabOffset; + } + + uint64_t getPctabOffset() const override { + return getPclntabHeaderOffset() + Header.PctabOffset; + } + + uint64_t getPclntabOffset() const override { + return getPclntabHeaderOffset() + Header.PclnOffset; + } + + uint64_t getFunctabOffset() const override { + return getPclntabHeaderOffset() + Header.PclnOffset; + } +}; + +struct GoFunc_v1_16_5 : GoFunc { + ~GoFunc_v1_16_5() = default; + + // runtime/symtab.go + enum { + _PCDATA_UnsafePoint = 0, + _PCDATA_StackMapIndex = 1, + _PCDATA_InlTreeIndex = 2, + _PCDATA_MAX, + _FUNCDATA_ArgsPointerMaps = 0, + _FUNCDATA_LocalsPointerMaps = 1, + _FUNCDATA_StackObjects = 2, + _FUNCDATA_InlTree = 3, + _FUNCDATA_OpenCodedDeferInfo = 4, + _FUNCDATA_ArgInfo = 5, // NOTE gc 1.17 + _FUNCDATA_MAX, + _ArgsSizeUnknown = -0x80000000 + }; + + // runtime/symtab.go + enum { + _PCDATA_UnsafePointSafe = -1, + _PCDATA_UnsafePointUnsafe = -2, + _PCDATA_Restart1 = -3, + _PCDATA_Restart2 = -4, + _PCDATA_RestartAtEntry = -5 + }; + + // runtime/symtab.go + enum { + funcID_normal = 0, + funcID_runtime_main, + funcID_goexit, + funcID_jmpdefer, + funcID_mcall, + funcID_morestack, + funcID_mstart, + funcID_rt0_go, + funcID_asmcgocall, + funcID_sigpanic, + funcID_runfinq, + funcID_gcBgMarkWorker, + funcID_systemstack_switch, + funcID_systemstack, + funcID_cgocallback, + funcID_gogo, + funcID_externalthreadhandler, + funcID_debugCallV1, + funcID_gopanic, + funcID_panicwrap, + funcID_handleAsyncEvent, + funcID_asyncPreempt, + funcID_wrapper + }; + +#define FuncFields \ + F(uint64_t, true, Entry) \ + F(int32_t, false, Name) \ + F(int32_t, false, Args) \ + F(uint32_t, false, Deferreturn) \ + F(uint32_t, false, Pcsp) \ + F(uint32_t, false, pcfile) \ + F(uint32_t, false, Pcln) \ + F(uint32_t, false, Npcdata) \ + F(uint32_t, false, CuOffset) \ + F(uint8_t, false, FuncID) \ + F(uint8_t, false, Flag) \ + F(uint8_t, false, Unused2) \ + F(uint8_t, false, Nfuncdata) + + struct _Func { +#define F(Type, IsPointer, Field) Type Field; + FuncFields +#undef F + } __GoFunc; + + uint32_t Pcdata[_PCDATA_MAX] = {}; + uint64_t Funcdata[_FUNCDATA_MAX] = {}; + + int32_t PcdataMax[_PCDATA_MAX] = {}; + + void __read(BinaryContext &BC, DataExtractor &DE, BinarySection *Section, + uint64_t *FuncOffset) override { +#define F(Type, IsPointer, Field) \ + { \ + assert(DE.isValidOffset(*FuncOffset) && "Invalid offset"); \ + size_t FSize = \ + IsPointer ? BC.AsmInfo->getCodePointerSize() : sizeof(Type); \ + if (IsPointer && Section) \ + RemoveRelaReloc(BC, Section, *FuncOffset); \ + __GoFunc.Field = readEndianVal(DE, FuncOffset, FSize); \ + } + FuncFields +#undef F + } + + void __write(BinaryFunction *BF, uint8_t **FuncPart, uint8_t *SectionData, + BinarySection *OutputSection) const override { + BinaryContext &BC = BF->getBinaryContext(); +#define F(Type, IsPointer, Field) \ + { \ + if (IsPointer) { \ + uint64_t Delta = (uint64_t)(*FuncPart - SectionData); \ + AddRelaReloc(BC, BF->getSymbol(), OutputSection, Delta, 0); \ + *FuncPart += BC.AsmInfo->getCodePointerSize(); \ + } else { \ + writeEndianVal(BC, __GoFunc.Field, sizeof(Type), FuncPart); \ + } \ + } + FuncFields +#undef F + } + + size_t getSize(BinaryContext &BC) const override { + size_t FuncSize = 0; +#define F(Type, IsPointerSize, Field) \ + if (IsPointerSize) \ + FuncSize += BC.AsmInfo->getCodePointerSize(); \ + else \ + FuncSize += sizeof(Type); + FuncFields +#undef F + return FuncSize; + } + +#undef FuncFields + + void disableMetadata() override { + __GoFunc.pcfile = 0; + __GoFunc.Pcln = 0; + __GoFunc.CuOffset = 0; + } + + int32_t getNameOffset() const override { return __GoFunc.Name; } + + void setNameOffset(int32_t Offset) override { __GoFunc.Name = Offset; } + + uint32_t getDeferreturnOffset() const override { + return __GoFunc.Deferreturn; + } + + void setDeferreturnOffset(uint32_t Offset) override { + __GoFunc.Deferreturn = Offset; + } + + uint32_t getPcspOffset() const override { return __GoFunc.Pcsp; } + + void setPcspOffset(uint32_t Offset) override { __GoFunc.Pcsp = Offset; } + + uint32_t getNpcdata() const override { return __GoFunc.Npcdata; } + + void fixNpcdata() override { + for (int i = _PCDATA_MAX - 1; i >= 0; --i) { + if (Pcdata[i]) { + __GoFunc.Npcdata = i + 1; + return; + } + } + + __GoFunc.Npcdata = 0; + } + + bool hasReservedID(std::string Name) const override { + return __GoFunc.FuncID != funcID_normal && + __GoFunc.FuncID != funcID_wrapper; + } + + uint8_t getNfuncdata() const override { return __GoFunc.Nfuncdata; } + + unsigned getPcdataUnsafePointIndex() const override { + return _PCDATA_UnsafePoint; + } + + unsigned getPcdataStackMapIndex() const override { + return _PCDATA_StackMapIndex; + } + + unsigned getPcdataInlTreeIndex() const override { + return _PCDATA_InlTreeIndex; + } + + unsigned getPcdataMaxIndex() const override { return _PCDATA_MAX; } + + size_t getPcdataSize() const override { return sizeof(Pcdata); } + + uint32_t getPcdata(unsigned Index) const override { + assert(Index < _PCDATA_MAX && "Invalid index"); + return Pcdata[Index]; + } + + void setPcdata(unsigned Index, uint32_t Value) override { + assert(Index < _PCDATA_MAX && "Invalid index"); + Pcdata[Index] = Value; + } + + void setPcdataMaxVal(unsigned Index, int32_t Value) override { + assert(Index < _PCDATA_MAX && "Invalid index"); + PcdataMax[Index] = Value; + } + + int32_t getPcdataMax(unsigned Index) const override { + return PcdataMax[Index]; + } + + int getPcdataUnsafePointVal() const override { + return _PCDATA_UnsafePointUnsafe; + } + + unsigned getFuncdataInlTreeIndex() const override { + return _FUNCDATA_InlTree; + } + + uint64_t getFuncdata(unsigned Index) const override { + assert(Index < _FUNCDATA_MAX && "Invalid index"); + return Funcdata[Index]; + } + + void setFuncdata(unsigned Index, uint64_t Value) override { + assert(Index < _FUNCDATA_MAX && "Invalid index"); + Funcdata[Index] = Value; + } +}; + +struct Module_v1_16_5 : Module { + ~Module_v1_16_5() = default; + + union ModuleStruct { + struct { + uint64_t pcHeader; + + // Function names part + GoArray funcnametab; + + // Compilation Unit indexes part + GoArray cutab; + + // Source file names part + GoArray filetab; + + // Functions pc-relative part + GoArray pctab; + + // Function - ftab offset table part + GoArray pclntable; + + // Functions table part + GoArray ftab; + + uint64_t findfunctab; + uint64_t minpc, maxpc; + uint64_t text, etext; + uint64_t noptrdata, enoptrdata; + uint64_t data, edata; + uint64_t bss, ebss; + uint64_t noptrbss, enoptrbss; + uint64_t end, gcdata, gcbss; + uint64_t types, etypes; + + GoArray textsectmap; + + GoArray typelinks; + + GoArray itablinks; + + // Other fields are zeroed/unused in exec + } m; + + uint64_t a[sizeof(m) / sizeof(uint64_t)]; + } ModuleStruct; + + uint64_t getFieldOffset(BinaryContext &BC, uint64_t *Addr) const override { + unsigned Psize = BC.AsmInfo->getCodePointerSize(); + return (Addr - ModuleStruct.a) * Psize; + } + + uint64_t *getModule() override { return ModuleStruct.a; } + + size_t getModuleSize() const override { return sizeof(ModuleStruct.m); } + + void setPclntabSize(uint64_t Size) override { + // Set funcnametab size + ModuleStruct.m.funcnametab.setCount(Size); + + // Set pctab size + ModuleStruct.m.pctab.setCount(Size); + + // Set pclntable size + ModuleStruct.m.pclntable.setCount(Size); + } + + void setFtabSize(uint64_t Count) override { + // Fix ftab size; the last entry reserved for maxpc + ModuleStruct.m.ftab.setCount(Count + 1); + } + + uint64_t getPcHeaderAddr() const override { return ModuleStruct.m.pcHeader; } + + const GoArray &getFtab() const override { return ModuleStruct.m.ftab; } + + uint64_t getFindfunctab() const override { + return ModuleStruct.m.findfunctab; + } + + uint64_t getTypes() const override { return ModuleStruct.m.types; } + + uint64_t getEtypes() const override { return ModuleStruct.m.etypes; } + + const GoArray &getTextsectmap() const override { + return ModuleStruct.m.textsectmap; + } + + const GoArray &getTypelinks() const override { + return ModuleStruct.m.typelinks; + } + + int patch(BinaryContext &BC) override { + BinaryData *Module = getModuleBD(BC); + if (!Module) { + errs() << "BOLT-ERROR: Failed to get firstmoduledata symbol!\n"; + return -1; + } + + BinarySection *Section = &Module->getSection(); + std::vector BFs = BC.getSortedFunctions(); + unsigned Psize = BC.AsmInfo->getCodePointerSize(); + +#define getOffset(Field) \ + Module->getOffset() + getFieldOffset(BC, &ModuleStruct.m.Field); + +#define getValue(Field) (ModuleStruct.m.Field) + + // Fix firsmoduledata pointers + BinaryData *PclntabSym = BC.getBinaryDataAtAddress(getPcHeaderAddr()); + assert(PclntabSym && "PclntabSym absent"); + BinaryData *FindfunctabSym = BC.getBinaryDataAtAddress(getFindfunctab()); + assert(FindfunctabSym && "FindfunctabSym absent"); + BinaryFunction *FirstBF = getFirstBF(BC, BFs); + assert(FirstBF && "Text BF absent"); + BinaryFunction *LastBF = getLastBF(BC, BFs); + assert(LastBF && "Text BF absent"); + +#define FirstmoduleFields \ + F(pcHeader, PclntabSym, 0) \ + F(funcnametab.Address, PclntabSym, 0) \ + F(pctab.Address, PclntabSym, 0) \ + F(pclntable.Address, PclntabSym, 0) \ + F(ftab.Address, PclntabSym, Pclntab_v1_16_5::getPcHeaderSize(Psize)) \ + F(findfunctab, FindfunctabSym, 0) \ + F(minpc, FirstBF, 0) \ + F(text, FirstBF, 0) \ + F(maxpc, LastBF, 0) \ + F(etext, LastBF, 0) + +#define F(Field, Symbol, Addend) \ + { \ + uint64_t FieldOffset = getOffset(Field); \ + RemoveRelaReloc(BC, Section, FieldOffset); \ + AddRelaReloc(BC, Symbol->getSymbol(), Section, FieldOffset, Addend); \ + } + FirstmoduleFields +#undef F +#undef FirstmoduleFields + + // Fix firstmoduledata static fields + MCSymbol *ZeroSym = BC.registerNameAtAddress("Zero", 0, 0, 0); +#define FirstmoduleFields \ + F(funcnametab.Count[0]) \ + F(funcnametab.Count[1]) \ + F(pctab.Count[0]) \ + F(pctab.Count[1]) \ + F(pclntable.Count[0]) \ + F(pclntable.Count[1]) \ + F(ftab.Count[0]) \ + F(ftab.Count[1]) + +#define F(Field) \ + { \ + uint64_t FieldOffset = getOffset(Field); \ + uint64_t FieldVal = getValue(Field); \ + Section->addRelocation(FieldOffset, ZeroSym, Relocation::getAbs(Psize), \ + FieldVal); \ + } + FirstmoduleFields +#undef F +#undef FirstmoduleFields + +#undef getValue +#undef getOffset + return 0; + } +}; + +} // namespace bolt +} // namespace llvm + +#endif diff --git a/bolt/include/bolt/Passes/Golang/go_v1_17.h b/bolt/include/bolt/Passes/Golang/go_v1_17.h new file mode 100644 --- /dev/null +++ b/bolt/include/bolt/Passes/Golang/go_v1_17.h @@ -0,0 +1,86 @@ +//===--------- Passes/Golang/go_v1_17.h -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_BOLT_GOLANG_V1_17_H +#define LLVM_TOOLS_LLVM_BOLT_GOLANG_V1_17_H + +#include "go_v1_16_5.h" + +namespace llvm { +namespace bolt { + +class Pclntab_v1_17_2 : public Pclntab_v1_16_5 { +public: + ~Pclntab_v1_17_2() = default; +}; + +struct GoFunc_v1_17_2 : GoFunc_v1_16_5 { + // runtime/symtab.go + enum { + funcID_normal = 0, + funcID_abort, + funcID_asmcgocall, + funcID_asyncPreempt, + funcID_cgocallback, + funcID_debugCallV2, + funcID_gcBgMarkWorker, + funcID_goexit, + funcID_gogo, + funcID_gopanic, + funcID_handleAsyncEvent, + funcID_jmpdefer, + funcID_mcall, + funcID_morestack, + funcID_mstart, + funcID_panicwrap, + funcID_rt0_go, + funcID_runfinq, + funcID_runtime_main, + funcID_sigpanic, + funcID_systemstack, + funcID_systemstack_switch, + funcID_wrapper + }; + + bool hasReservedID(std::string Name) const override { + // NOTE Go 1.17 has a bug with function names containing '_' symbol. + // https://go-review.googlesource.com/c/go/+/396797 + // The end of the name might contain .abi0 prefix + const char Rt0Go[] = "runtime.rt0_go"; + const char StackSwitch[] = "runtime.systemstack_switch"; + + return (__GoFunc.FuncID != funcID_normal && + __GoFunc.FuncID != funcID_wrapper) || + !strncmp(Name.c_str(), Rt0Go, sizeof(Rt0Go) - 1) || + !strncmp(Name.c_str(), StackSwitch, sizeof(StackSwitch) - 1); + } + + ~GoFunc_v1_17_2() = default; +}; + +struct Module_v1_17_2 : Module_v1_16_5 { + ~Module_v1_17_2() = default; +}; + +class Pclntab_v1_17_5 : public Pclntab_v1_17_2 { +public: + ~Pclntab_v1_17_5() = default; +}; + +struct GoFunc_v1_17_5 : GoFunc_v1_17_2 { + ~GoFunc_v1_17_5() = default; +}; + +struct Module_v1_17_5 : Module_v1_17_2 { + ~Module_v1_17_5() = default; +}; + +} // namespace bolt +} // namespace llvm + +#endif diff --git a/bolt/include/bolt/Passes/IdenticalCodeFolding.h b/bolt/include/bolt/Passes/IdenticalCodeFolding.h --- a/bolt/include/bolt/Passes/IdenticalCodeFolding.h +++ b/bolt/include/bolt/Passes/IdenticalCodeFolding.h @@ -20,15 +20,7 @@ /// class IdenticalCodeFolding : public BinaryFunctionPass { protected: - bool shouldOptimize(const BinaryFunction &BF) const override { - if (BF.hasUnknownControlFlow()) - return false; - if (BF.isFolded()) - return false; - if (BF.hasSDTMarker()) - return false; - return BinaryFunctionPass::shouldOptimize(BF); - } + bool shouldOptimizeICF(const BinaryFunction &BF) const; public: explicit IdenticalCodeFolding(const cl::opt &PrintPass) diff --git a/bolt/include/bolt/Passes/LongJmp.h b/bolt/include/bolt/Passes/LongJmp.h --- a/bolt/include/bolt/Passes/LongJmp.h +++ b/bolt/include/bolt/Passes/LongJmp.h @@ -126,10 +126,22 @@ /// Helper to identify whether \p Inst is branching to a stub bool usesStub(const BinaryFunction &Func, const MCInst &Inst) const; + /// Helper function to return delta offset between \p DotAddress and target + uint64_t getTargetOffset(const BinaryContext &BC, uint64_t InstSize, + uint64_t TargetAddress, uint64_t DotAddress) const; + uint64_t getTargetOffset(const BinaryFunction &Func, uint64_t InstSize, + const MCSymbol *TgtSym, uint64_t DotAddress) const; + uint64_t getTargetOffset(const BinaryFunction &Func, const MCInst &Inst, + uint64_t DotAddress) const; + /// True if Inst is a branch that is out of range bool needsStub(const BinaryBasicBlock &BB, const MCInst &Inst, uint64_t DotAddress) const; + /// Relax instruction \p II in \p BB in-place if necessary + bool relaxInstruction(BinaryBasicBlock &BB, BinaryBasicBlock::iterator &II, + uint64_t DotAddress); + /// Expand the range of the stub in StubBB if necessary bool relaxStub(BinaryBasicBlock &StubBB); @@ -137,8 +149,14 @@ uint64_t getSymbolAddress(const BinaryContext &BC, const MCSymbol *Target, const BinaryBasicBlock *TgtBB) const; + enum RelaxRet { + NotModified = 0, // No changes were made + InstrRelaxed = 1, // Instruction relaxations were made + StubsInserted = 2 // New stubs were inserted in function + }; + /// Relax function by adding necessary stubs or relaxing existing stubs - bool relax(BinaryFunction &BF); + RelaxRet relax(BinaryFunction &BF); public: /// BinaryPass public interface diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h --- a/bolt/include/bolt/Rewrite/RewriteInstance.h +++ b/bolt/include/bolt/Rewrite/RewriteInstance.h @@ -453,6 +453,9 @@ uint64_t NewTextSegmentOffset{0}; uint64_t NewTextSegmentSize{0}; + uint64_t NewExtraSegmentAddress{0}; + uint64_t NewExtraSegmentSize{0}; + /// Track next available address for new allocatable sections. uint64_t NextAvailableAddress{0}; diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h --- a/bolt/include/bolt/Utils/CommandLineOpts.h +++ b/bolt/include/bolt/Utils/CommandLineOpts.h @@ -54,6 +54,20 @@ extern llvm::cl::opt UseOldText; extern llvm::cl::opt UpdateDebugSections; +enum GolangVersion : char { + GV_NONE = 0, /// Do not use golang passes + GV_FIRST, + GV_AUTO = GV_FIRST, /// Auto delect golang version + GV_1_14_9, /// Version 1.14.9 + GV_1_14_12, /// Version 1.14.12 + GV_1_16_5, /// Version 1.16.5 + GV_1_17_2, /// Version 1.17.2 + GV_1_17_5, /// Version 1.17.5 + GV_LAST, + GV_LATEST = GV_LAST - 1 /// Go latest version +}; + +extern llvm::cl::opt GolangPass; // The default verbosity level (0) is pretty terse, level 1 is fairly // verbose and usually prints some informational message for every // function processed. Level 2 is for the noisiest of messages and diff --git a/bolt/include/bolt/Utils/NameResolver.h b/bolt/include/bolt/Utils/NameResolver.h --- a/bolt/include/bolt/Utils/NameResolver.h +++ b/bolt/include/bolt/Utils/NameResolver.h @@ -29,9 +29,13 @@ public: /// Return unique version of the \p Name in the form "Name". + static std::string uniquifyID(StringRef Name, uint64_t ID) { + return (Name + Twine(Sep) + Twine(ID)).str(); + } + std::string uniquify(StringRef Name) { const uint64_t ID = ++Counters[Name]; - return (Name + Twine(Sep) + Twine(ID)).str(); + return uniquifyID(Name, ID); } /// For uniquified \p Name, return the original form (that may no longer be diff --git a/bolt/lib/Core/BinaryBasicBlock.cpp b/bolt/lib/Core/BinaryBasicBlock.cpp --- a/bolt/lib/Core/BinaryBasicBlock.cpp +++ b/bolt/lib/Core/BinaryBasicBlock.cpp @@ -646,5 +646,22 @@ LocSyms.reset(nullptr); } +BinaryBasicBlock::iterator +BinaryBasicBlock::storeInstructionMetadata(BinaryBasicBlock::iterator II) { + // In case the instruction has locked annotation we will save all + // its annotations to noop instruction and store it in place of + // erased instruction + BinaryContext &BC = Function->getBinaryContext(); + if (BC.MIB->hasAnnotation(*II, "Locked")) { + MCInst Noop; + BC.MIB->createNoop(Noop); + std::unique_lock Lock(BC.CtxMutex); + BC.MIB->copyAnnotationInst(*II, Noop); + return insertInstruction(II, Noop) + 1; + } + + return II; +} + } // namespace bolt } // namespace llvm diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -1777,6 +1777,17 @@ new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags)); } +BinarySection &BinaryContext::registerExtraSection(StringRef Name, + unsigned ELFType, + unsigned ELFFlags, + uint8_t *Data, uint64_t Size, + unsigned Alignment) { + BinarySection &Section = + registerOrUpdateSection(Name, ELFType, ELFFlags, Data, Size, Alignment); + ExtraSectionsNames.emplace_back(Section.getName()); + return Section; +} + bool BinaryContext::deregisterSection(BinarySection &Section) { BinarySection *SectionPtr = &Section; auto Itr = Sections.find(SectionPtr); @@ -2132,5 +2143,18 @@ return OutputRanges; } +std::pair +BinaryContext::getBFAlignment(BinaryFunction &Function, + bool EmitColdPart) const { + unsigned Alignment = Function.getAlignment(); + if (HasRelocations) { + unsigned MaxAlignBytes = EmitColdPart ? Function.getMaxColdAlignmentBytes() + : Function.getMaxAlignmentBytes(); + return std::make_pair(Alignment, MaxAlignBytes); + } + + return std::make_pair(Alignment, Alignment); +} + } // namespace bolt } // namespace llvm diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp --- a/bolt/lib/Core/BinaryEmitter.cpp +++ b/bolt/lib/Core/BinaryEmitter.cpp @@ -425,9 +425,10 @@ } // Check if special alignment for macro-fusion is needed. - bool MayNeedMacroFusionAlignment = - (opts::AlignMacroOpFusion == MFT_ALL) || - (opts::AlignMacroOpFusion == MFT_HOT && BB->getKnownExecutionCount()); + bool MayNeedMacroFusionAlignment = ((opts::AlignMacroOpFusion == MFT_ALL) || + (opts::AlignMacroOpFusion == MFT_HOT && + BB->getKnownExecutionCount())) && + opts::GolangPass == opts::GV_NONE; BinaryBasicBlock::const_iterator MacroFusionPair; if (MayNeedMacroFusionAlignment) { MacroFusionPair = BB->getMacroOpFusionPair(); @@ -1143,6 +1144,14 @@ Section.emitAsData(Streamer, EmitName); Section.clearRelocations(); } + + for (auto &SectionName : BC.ExtraSectionsNames) { + ErrorOr Section = BC.getUniqueSectionByName(SectionName); + if (!Section) + continue; + Section->emitAsData(Streamer, SectionName); + Section->clearRelocations(); + } } namespace llvm { diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -14,6 +14,7 @@ #include "bolt/Core/BinaryBasicBlock.h" #include "bolt/Core/DynoStats.h" #include "bolt/Core/MCPlusBuilder.h" +#include "bolt/Passes/Golang.h" #include "bolt/Utils/NameResolver.h" #include "bolt/Utils/NameShortener.h" #include "bolt/Utils/Utils.h" @@ -320,6 +321,14 @@ BB->markValid(true); break; } + + // NOTE GO: Deferreturn calls are located in unreachable regions + if (opts::GolangPass != opts::GV_NONE && BC.MIB->isCall(Inst) && + BC.MIB->hasAnnotation(Inst, "IsDefer")) { + Stack.push(BB); + BB->markValid(true); + break; + } } } @@ -1209,14 +1218,7 @@ continue; } - if (!BC.DisAsm->getInstruction(Instruction, Size, - FunctionData.slice(Offset), - AbsoluteInstrAddr, nulls())) { - // Functions with "soft" boundaries, e.g. coming from assembly source, - // can have 0-byte padding at the end. - if (isZeroPaddingAt(Offset)) - break; - + auto disasmFailed = [&]() { errs() << "BOLT-WARNING: unable to disassemble instruction at offset 0x" << Twine::utohexstr(Offset) << " (address 0x" << Twine::utohexstr(AbsoluteInstrAddr) << ") in function " << *this @@ -1228,8 +1230,33 @@ } else { setIgnored(); } + }; - break; + if (!BC.DisAsm->getInstruction(Instruction, Size, + FunctionData.slice(Offset), + AbsoluteInstrAddr, nulls())) { + if (opts::GolangPass != opts::GV_NONE && BC.isAArch64()) { + // Golang uses special UND instruction for aarch64, handle it as NOP + // Also skipPleaseUseCallersFrames is full of 0, replace them with NOPs + DataExtractor DE = + DataExtractor(FunctionData, BC.AsmInfo->isLittleEndian(), + BC.AsmInfo->getCodePointerSize()); + uint64_t ValOffset = Offset; + uint32_t Value = DE.getU32(&ValOffset); + if (Value && Value != GolangPass::getUndAarch64()) { + disasmFailed(); + break; + } + + BC.MIB->createNoop(Instruction); + } else if (isZeroPaddingAt(Offset)) { + // Functions with "soft" boundaries, e.g. coming from assembly source, + // can have 0-byte padding at the end. + break; + } else { + disasmFailed(); + break; + } } // Check integrity of LLVM assembler/disassembler. @@ -2027,7 +2054,7 @@ MIB->setOffset(Instr, static_cast(Offset), AllocatorId); if (IsSDTMarker || IsLKMarker) HasSDTMarker = true; - else + else if (!PreserveNops) // Annotate ordinary nops, so we can safely delete them if required. MIB->addAnnotation(Instr, "NOP", static_cast(1), AllocatorId); } @@ -2222,7 +2249,8 @@ // Remove "Offset" annotations, unless we need an address-translation table // later. This has no cost, since annotations are allocated by a bumpptr // allocator and won't be released anyway until late in the pipeline. - if (!requiresAddressTranslation() && !opts::Instrument) { + if (!requiresAddressTranslation() && !opts::Instrument && + opts::GolangPass == opts::GV_NONE) { for (BinaryBasicBlock *BB : layout()) for (MCInst &Inst : *BB) BC.MIB->clearOffset(Inst); diff --git a/bolt/lib/Core/BinarySection.cpp b/bolt/lib/Core/BinarySection.cpp --- a/bolt/lib/Core/BinarySection.cpp +++ b/bolt/lib/Core/BinarySection.cpp @@ -16,6 +16,8 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/Support/CommandLine.h" +#include + #define DEBUG_TYPE "bolt" using namespace llvm; @@ -67,7 +69,7 @@ void BinarySection::emitAsData(MCStreamer &Streamer, StringRef NewName) const { StringRef SectionName = !NewName.empty() ? NewName : getName(); - StringRef SectionContents = getContents(); + StringRef SectionContents = isChanged() ? getOutputContents() : getContents(); MCSectionELF *ELFSection = BC.Ctx->getELFSection(SectionName, getELFType(), getELFFlags()); @@ -162,6 +164,11 @@ } BinarySection::~BinarySection() { + if (isChanged()) { + munmap(getOutputData(), getOutputSize()); + return; + } + if (isReordered()) { delete[] getData(); return; diff --git a/bolt/lib/Core/Relocation.cpp b/bolt/lib/Core/Relocation.cpp --- a/bolt/lib/Core/Relocation.cpp +++ b/bolt/lib/Core/Relocation.cpp @@ -606,6 +606,34 @@ return isPCRelativeX86(Type); } +uint64_t Relocation::getAbs32() { + if (Arch == Triple::aarch64) + return ELF::R_AARCH64_ABS32; + return ELF::R_X86_64_32; +} + +uint64_t Relocation::getAbs64() { + if (Arch == Triple::aarch64) + return ELF::R_AARCH64_ABS64; + return ELF::R_X86_64_64; +} + +uint64_t Relocation::getAbs(uint8_t Size) { + if (Size == sizeof(uint64_t)) + return Relocation::getAbs64(); + else if (Size == sizeof(uint32_t)) + return Relocation::getAbs32(); + + llvm_unreachable("Wrong relocation size"); + return 0; +} + +uint64_t Relocation::getRelative() { + if (Arch == Triple::aarch64) + return ELF::R_AARCH64_RELATIVE; + return ELF::R_X86_64_RELATIVE; +} + size_t Relocation::emit(MCStreamer *Streamer) const { const size_t Size = getSizeForType(Type); MCContext &Ctx = Streamer->getContext(); diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -105,7 +105,7 @@ cl::Hidden, cl::cat(BoltOptCategory)); -static cl::list Peepholes( +cl::list Peepholes( "peepholes", cl::CommaSeparated, cl::desc("enable peephole optimizations"), cl::value_desc("opt1,opt2,opt3,..."), cl::values(clEnumValN(Peepholes::PEEP_NONE, "none", "disable peepholes"), @@ -1215,9 +1215,13 @@ if (!BC.HasRelocations) return; - const bool UseColdSection = - BC.NumProfiledFuncs > 0 || - opts::ReorderFunctions == ReorderFunctions::RT_USER; + bool UseColdSection = BC.NumProfiledFuncs > 0 || + opts::ReorderFunctions == ReorderFunctions::RT_USER; + + // With RT_NONE reordering we won't assign indexes for BF + if (opts::ReorderFunctions == ReorderFunctions::RT_NONE) + UseColdSection = false; + for (auto &BFI : BC.getBinaryFunctions()) { BinaryFunction &Function = BFI.second; if (opts::isHotTextMover(Function)) { @@ -1227,7 +1231,8 @@ } if (!UseColdSection || Function.hasValidIndex() || - Function.hasValidProfile()) + (opts::ReorderFunctions == ReorderFunctions::RT_NONE && + Function.hasProfile())) Function.setCodeSectionName(BC.getMainCodeSectionName()); else Function.setCodeSectionName(BC.getColdCodeSectionName()); diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt --- a/bolt/lib/Passes/CMakeLists.txt +++ b/bolt/lib/Passes/CMakeLists.txt @@ -14,6 +14,9 @@ ExtTSPReorderAlgorithm.cpp FrameAnalysis.cpp FrameOptimizer.cpp + Golang.cpp + Golang-postprocess.cpp + Golang-preprocess.cpp HFSort.cpp HFSortPlus.cpp IdenticalCodeFolding.cpp diff --git a/bolt/lib/Passes/Golang-postprocess.cpp b/bolt/lib/Passes/Golang-postprocess.cpp new file mode 100644 --- /dev/null +++ b/bolt/lib/Passes/Golang-postprocess.cpp @@ -0,0 +1,228 @@ +//===--------- Passes/Golang-postprocess.h---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#include "bolt/Core/ParallelUtilities.h" +#include "bolt/Passes/Golang.h" + +#define DEBUG_TYPE "bolt-golang-postprocess" + +using namespace llvm; + +namespace opts { +extern cl::opt Instrument; +extern cl::opt GolangPcspPreserve; +} // end namespace opts + +namespace llvm { +namespace bolt { + +void GolangPostPass::skipPleaseUseCallersFramesPass(BinaryContext &BC) { + // The function must have sizeofSkipFunction bytes of nops + // runtime/traceback.go + const int sizeofSkipFunction = 256; + const char *FunctionName = "runtime.skipPleaseUseCallersFrames"; + BinaryFunction *Function = BC.getBinaryFunctionByName(FunctionName); + if (!Function) { + LLVM_DEBUG(outs() << "BOLT-WARNING: Failed to get " << FunctionName + << "\n"); + return; + } + + assert(Function->layout_begin() != Function->layout_end() && + "Unexpected empty function"); + BinaryBasicBlock *BB = *Function->layout_begin(); + BB->clear(); + + MCInst Inst; + BC.MIB->createNoop(Inst); + uint64_t Size = sizeofSkipFunction / BC.computeInstructionSize(Inst); + std::unique_ptr GoFunc = createGoFunc(); + unsigned PcspIndex = GoFunc->getPcspIndex(); + while (Size--) { + Inst.clear(); + BC.MIB->createNoop(Inst); + BB->insertInstruction(BB->begin(), Inst); + addVarintAnnotation(BC, *BB->begin(), PcspIndex, /* Value */ 0, + /*IsNext*/ false); + } +} + +void GolangPostPass::instrumentExitCall(BinaryContext &BC) { + // Golang doesn not call anything on process termination + // Insert instrumentation fini call at exit function + if (!opts::Instrument) + return; + + BinaryFunction *Function = BC.getBinaryFunctionByName("runtime.exit"); + if (!Function) { + outs() << "BOLT-WARNING: Failed to get runtime.exit for instrumentation!\n"; + return; + } + + if (!BC.shouldEmit(*Function)) { + outs() << "BOLT-WARNING: runtime.exit could not be patched for " + "instrumentation!\n"; + return; + } + + assert(Function->layout_begin() != Function->layout_end() && + "runtime.exit is empty"); + BinaryBasicBlock *BB = *Function->layout_begin(); + MCSymbol *FiniHandler = + BC.Ctx->getOrCreateSymbol("__bolt_trampoline_instr_fini_call"); + std::vector Instrs = BC.MIB->createInstrumentFiniCall( + FiniHandler, &*BC.Ctx, /*IsTailCall*/ false); + BB->insertInstructions(BB->begin(), Instrs); +} + +uint32_t GolangPostPass::pcdataPass(BinaryFunction *BF, GoFunc *GoFunc, + const uint32_t Index, + const unsigned AllocId) { + int Ret; + int32_t Val, NextVal; + MCInst NoopInst; + BinaryContext &BC = BF->getBinaryContext(); + for (auto BBIt = BF->layout_begin(); BBIt != BF->layout_end(); ++BBIt) { + BinaryBasicBlock *BB = *BBIt; + for (uint64_t I = 0; I < BB->size(); ++I) { + MCInst &Inst = BB->getInstructionAtIndex(I); + bool IsMap = hasVarintAnnotation(BC, Inst, Index); + if (!IsMap) { + if (Index == GoFunc->getPcdataUnsafePointIndex() && + BC.MIB->hasAnnotation(Inst, "IsInstrumentation")) { + // runtime/preempt.go isAsyncSafePoint + // NOTE: If function had no PCDATA unsafe points before, + // the whole function will be marked as unsafe for preemption. + int Val = GoFunc->getPcdataUnsafePointVal(); + addVarintAnnotation(BC, Inst, Index, Val, /*IsNext*/ false, AllocId); + } + + continue; + } + + if (Index == GoFunc->getPcdataStackMapIndex()) { + auto addNoop = [&](const int32_t Val, const int32_t NextVal) { + BC.MIB->createNoop(NoopInst); + addVarintAnnotation(BC, NoopInst, Index, Val, /*IsNext*/ false, + AllocId); + addVarintAnnotation(BC, NoopInst, Index, NextVal, /*IsNext*/ true, + AllocId); + auto NextIt = std::next(BB->begin(), I + 1); + BB->insertInstruction(NextIt, NoopInst); + }; + + NextVal = getVarintAnnotation(BC, Inst, Index, /*IsNext*/ true); + int32_t NextInstVal; + Ret = getNextMCinstVal(BBIt, I, Index, NextInstVal, nullptr); + if (Ret < 0) { + Val = getVarintAnnotation(BC, Inst, Index); + // Check that the last instruction value equals next value + if (Val != NextVal) + addNoop(NextVal, NextVal); + + return 0; + } + + // We need to save original chain of values for call instructions so + // check that the value of the next instuction is the same as expected + if (NextVal == NextInstVal || !BC.MIB->isCall(Inst)) + continue; + + // If the Value is not the same as expected create nop + // with the right values + addNoop(NextVal, NextInstVal); + } + } + } + + return 0; +} + +int GolangPostPass::pclntabPass(BinaryContext &BC) { + const uint64_t PclntabAddr = getPcHeaderAddr(); + if (!PclntabAddr) { + errs() << "BOLT-ERROR: Pclntab address is zero!\n"; + return -1; + } + + BinaryData *PclntabSym = BC.getBinaryDataAtAddress(PclntabAddr); + if (!PclntabSym) { + errs() << "BOLT-ERROR: Failed to get pclntab symbol!\n"; + return -1; + } + + BinarySection *Section = &PclntabSym->getSection(); + DataExtractor DE = DataExtractor(Section->getContents(), + BC.AsmInfo->isLittleEndian(), getPsize()); + + ParallelUtilities::WorkFuncWithAllocTy workFun = + [&](BinaryFunction &Function, MCPlusBuilder::AllocatorIdTy AllocId) { + uint64_t FuncOffset = getPclntab()->getFunctabOffset() + + Function.getGolangFunctabOffset(); + assert(DE.isValidOffset(FuncOffset) && "Invalid offset"); + std::unique_ptr GoFunc = createGoFunc(); + GoFunc->read(BC, DE, Section, &FuncOffset); + + auto getPcdata = [&](const uint32_t Index, bool Force = false) { + uint32_t MapOffsetVal = GoFunc->getPcdata(Index); + if (!MapOffsetVal && !Force) + return; + + pcdataPass(&Function, GoFunc.get(), Index, AllocId); + }; + + // Force to run pcdata pass to disable async preemption for inserted by + // instrumentation instructions to ignore stack displacement + if (opts::Instrument) + getPcdata(GoFunc->getPcdataUnsafePointIndex(), /*Force*/ true); + getPcdata(GoFunc->getPcdataStackMapIndex()); + + if (Function.layout_begin() != Function.layout_end()) { + // Insert NOP to the end of function, if it ends with call instruction + // to provide correct PCSP table lately for runtime.gentraceback + // This is needed for rare cases with no-return calls, since the + // pcvalue searches for targetpc < pc and for tail cals we will have + // targetpc == pc. We could also add +1 in PCSP final offset, but + // it won't fix the preserve PCSP table case, so this solution seems + // to be more robust + BinaryBasicBlock *BBend = *Function.layout_rbegin(); + if (BC.MIB->isCall(*BBend->rbegin())) { + MCInst NoopInst; + BC.MIB->createNoop(NoopInst); + BBend->insertInstruction(BBend->end(), NoopInst); + } + } + }; + ParallelUtilities::PredicateTy skipFunc = + [&](const BinaryFunction &Function) { return !Function.isGolang(); }; + + ParallelUtilities::runOnEachFunctionWithUniqueAllocId( + BC, ParallelUtilities::SchedulingPolicy::SP_INST_QUADRATIC, workFun, + skipFunc, "pcdataGoPostProcess", /*ForceSequential*/ true); + return 0; +} + +void GolangPostPass::runOnFunctions(BinaryContext &BC) { + int Ret; + + skipPleaseUseCallersFramesPass(BC); + + instrumentExitCall(BC); + + Ret = pclntabPass(BC); + if (Ret < 0) { + errs() << "BOLT-ERROR: Golang postprocess pclntab pass failed!\n"; + exit(1); + } +} + +} // end namespace bolt +} // end namespace llvm diff --git a/bolt/lib/Passes/Golang-preprocess.cpp b/bolt/lib/Passes/Golang-preprocess.cpp new file mode 100644 --- /dev/null +++ b/bolt/lib/Passes/Golang-preprocess.cpp @@ -0,0 +1,403 @@ +//===--------- Passes/Golang-preprocess.h----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#include "bolt/Core/ParallelUtilities.h" +#include "bolt/Passes/Golang.h" + +#define DEBUG_TYPE "bolt-golang-preprocess" + +using namespace llvm; +using namespace bolt; + +namespace opts { +extern cl::opt GolangPcspPreserve; +} // end namespace opts + +namespace llvm { +namespace bolt { + +static inline uint32_t computeInstructionSize(BinaryContext &BC, MCInst &II, + const MCCodeEmitter *Emitter) { + if (BC.isAArch64()) + return 4; + + if (BC.MIB->hasAnnotation(II, "Size")) { + constexpr size_t InvalidSize = std::numeric_limits::max(); + uint32_t Size = + BC.MIB->getAnnotationWithDefault(II, "Size", InvalidSize); + assert(Size != InvalidSize && "Invalid size"); + return Size; + } + + return BC.computeInstructionSize(II, Emitter); +} + +static void inlTreePass(BinaryFunction *Function, GoFunc *GoFunc, + const unsigned AllocId, const MCCodeEmitter *Emitter) { + BinaryContext &BC = Function->getBinaryContext(); + const unsigned PcdataIndex = GoFunc->getPcdataInlTreeIndex(); + int32_t MaxVal = GoFunc->getPcdataMax(PcdataIndex); + if (MaxVal < 0) + return; + + const unsigned Index = GoFunc->getFuncdataInlTreeIndex(); + uint64_t FuncdataAddr = GoFunc->getFuncdata(Index); + if (!FuncdataAddr) + return; + + struct InlinedCall InlinedCall; + ErrorOr FuncdataSection = + BC.getSectionForAddress(FuncdataAddr); + if (!FuncdataSection) + return; + + DataExtractor DE = DataExtractor(FuncdataSection->getContents(), + BC.AsmInfo->isLittleEndian(), + BC.AsmInfo->getCodePointerSize()); + + for (uint32_t I = 0; I < (uint32_t)MaxVal + 1; ++I) { + uint64_t Offset = (uint64_t)(FuncdataAddr - FuncdataSection->getAddress()); + Offset += I * sizeof(InlinedCall); + assert(DE.isValidOffset(Offset) && "Invalid offset"); + InlinedCall.Parent = (int16_t)DE.getU16(&Offset); + InlinedCall.FuncID = DE.getU8(&Offset); + InlinedCall.Unused = DE.getU8(&Offset); + InlinedCall.File = (int32_t)DE.getU32(&Offset); + InlinedCall.Line = (int32_t)DE.getU32(&Offset); + InlinedCall.Func = (int32_t)DE.getU32(&Offset); + InlinedCall.ParentPc = (int32_t)DE.getU32(&Offset); + + for (BinaryBasicBlock *BB : Function->layout()) { + if (InlinedCall.ParentPc >= BB->getEndOffset()) + continue; + + uint32_t Offset = BB->getOffset(); + for (MCInst &II : *BB) { + if (BC.MIB->hasAnnotation(II, "Offset")) { + constexpr size_t InvalidOffset = std::numeric_limits::max(); + Offset = BC.MIB->getAnnotationWithDefault(II, "Offset", + InvalidOffset); + assert(Offset != InvalidOffset && "Invalid offset"); + } + + if (Offset < InlinedCall.ParentPc) { + Offset += computeInstructionSize(BC, II, Emitter); + continue; + } + + assert(Offset == InlinedCall.ParentPc && "Offset overflow"); + + // NOTE Annotatations must not be created with in concurent threads + static std::atomic_flag Lock = ATOMIC_FLAG_INIT; + while (Lock.test_and_set(std::memory_order_acquire)) + ; + addFuncdataAnnotation(BC, II, Index, I, AllocId); + Lock.clear(std::memory_order_release); + // To be able to restore right inline unwinding we will lock the + // instruction + bool &Locked = BC.MIB->getOrCreateAnnotationAs(II, "Locked"); + Locked = true; + break; + } + + break; + } + } +} + +static uint32_t readVarintPass(BinaryFunction *Function, GoFunc *GoFunc, + DataExtractor &DE, uint64_t *MapOffset, + const uint32_t Index, const uint8_t Quantum, + const unsigned AllocId, + const MCCodeEmitter *Emitter) { + BinaryContext &BC = Function->getBinaryContext(); + int32_t ValSum = -1, MaxVal = -1; + uint64_t OldOffsetSum, OffsetSum = 0; + bool IsFirst = true; + MCInst *PrevII = nullptr; + + do { + OldOffsetSum = OffsetSum; + int32_t Val = readVarintPair(DE, MapOffset, ValSum, OffsetSum, Quantum); + if (!Val && !IsFirst) { + if (Index == GoFunc->getPcdataStackMapIndex()) + addVarintAnnotation(BC, *PrevII, Index, ValSum, /*IsNext*/ true, + AllocId); + break; + } + + if (ValSum > MaxVal) + MaxVal = ValSum; + + for (BinaryBasicBlock *BB : Function->layout()) { + if (OldOffsetSum >= BB->getEndOffset()) + continue; + + uint32_t Offset = BB->getOffset(); + if (Offset > OffsetSum) + break; + + for (MCInst &II : *BB) { + if (BC.MIB->hasAnnotation(II, "Offset")) { + constexpr size_t InvalidOffset = std::numeric_limits::max(); + Offset = BC.MIB->getAnnotationWithDefault(II, "Offset", + InvalidOffset); + assert(Offset != InvalidOffset && "Invalid offset"); + } + + if (Offset < OldOffsetSum) { + Offset += computeInstructionSize(BC, II, Emitter); + continue; + } else if (Offset == OffsetSum) { + break; + } + + addVarintAnnotation(BC, II, Index, ValSum, /*IsNext*/ false, AllocId); + if (Index == GoFunc->getPcdataStackMapIndex() && PrevII) + addVarintAnnotation(BC, *PrevII, Index, ValSum, /*IsNext*/ true, + AllocId); + + PrevII = &II; + assert(Offset < OffsetSum && "Offset overflow"); + Offset += computeInstructionSize(BC, II, Emitter); + } + } + + IsFirst = false; + } while (1); + + return MaxVal; +} + +void GolangPrePass::deferreturnPass(BinaryFunction &BF, + const uint64_t DeferOffset, + const unsigned AllocId, + const MCCodeEmitter *Emitter) { + BinaryContext &BC = BF.getBinaryContext(); + uint64_t Offset = 0; + for (auto BB : BF.layout()) { + for (auto II = BB->begin(); II != BB->end(); ++II) { + if (BC.MIB->hasAnnotation(*II, "Offset")) { + constexpr auto InvalidOffset = std::numeric_limits::max(); + Offset = BC.MIB->getAnnotationWithDefault(*II, "Offset", + InvalidOffset); + assert(Offset != InvalidOffset); + } + + if (Offset < DeferOffset) { + Offset += computeInstructionSize(BC, *II, Emitter); + continue; + } + + if (Offset != DeferOffset) + break; + + assert(BC.MIB->isCall(*II)); + BC.MIB->addAnnotation(*II, "IsDefer", true, AllocId); + return; + } + } + + outs() << "Deferreturn call was not found for " << BF << "\n"; + exit(1); +} + +int GolangPrePass::pclntabPass(BinaryContext &BC) { + const uint64_t PclntabAddr = getPcHeaderAddr(); + if (!PclntabAddr) { + errs() << "BOLT-ERROR: Pclntab address is zero!\n"; + return -1; + } + + BinaryData *PclntabSym = BC.getBinaryDataAtAddress(PclntabAddr); + if (!PclntabSym) { + errs() << "BOLT-ERROR: Failed to get pclntab symbol!\n"; + return -1; + } + + BinarySection *Section = &PclntabSym->getSection(); + const class Pclntab *Pclntab = getPclntab(); + uint64_t Offset = Pclntab->getPclntabOffset(); + DataExtractor DE = DataExtractor(Section->getContents(), + BC.AsmInfo->isLittleEndian(), getPsize()); + for (uint64_t F = 0; F < Pclntab->getFunctionsCount(); ++F) { + assert(DE.isValidOffset(Offset) && "Invalid offset"); + struct Functab Functab; + RemoveRelaReloc(BC, Section, Offset); + Functab.Address = DE.getAddress(&Offset); + Functab.Offset = DE.getAddress(&Offset); + + BinaryFunction *Function = BC.getBinaryFunctionAtAddress(Functab.Address); + if (!Function) { + outs() << "Failed to find function by address " + << Twine::utohexstr(Functab.Address) << "\n"; + return -1; + } + + Function->setGolangFunctabOffset(Functab.Offset); + } + + // Remove maxpc relocation (last pclntab entry) + RemoveRelaReloc(BC, Section, Offset); + + ParallelUtilities::WorkFuncWithAllocTy workFun = + [&](BinaryFunction &Function, MCPlusBuilder::AllocatorIdTy AllocId) { + if (Function.layout_begin() == Function.layout_end()) + return; + + BinaryContext::IndependentCodeEmitter Emitter; + if (!opts::NoThreads) { + Emitter = + Function.getBinaryContext().createIndependentMCCodeEmitter(); + } + + uint64_t FuncOffset = + Pclntab->getFunctabOffset() + Function.getGolangFunctabOffset(); + std::unique_ptr GoFunc = createGoFunc(); + GoFunc->read(BC, DE, Section, &FuncOffset); + if (GoFunc->hasReservedID(Function.getDemangledName())) { + // The functions with reserved ID are special functions + // mostly written on asm that are dangerous to change + Function.setSimple(false); + } + + auto getPcdata = [&](const uint32_t Index) { + int32_t Max = -1; + uint32_t MapOffsetVal = GoFunc->getPcdata(Index); + if (MapOffsetVal) { + uint64_t MapOffset = Pclntab->getPctabOffset() + MapOffsetVal; + Max = readVarintPass(&Function, GoFunc.get(), DE, &MapOffset, Index, + Pclntab->getQuantum(), AllocId, + Emitter.MCE.get()); + } + + GoFunc->setPcdataMaxVal(Index, Max); + }; + + getPcdata(GoFunc->getPcdataUnsafePointIndex()); + getPcdata(GoFunc->getPcdataStackMapIndex()); + getPcdata(GoFunc->getPcdataInlTreeIndex()); + + uint64_t DeferOffset = GoFunc->getDeferreturnOffset(); + if (DeferOffset) + deferreturnPass(Function, DeferOffset, AllocId, Emitter.MCE.get()); + + // If the function does not have stack map index varint + // it was probably written in asm + if (GoFunc->getPcdataMax(GoFunc->getPcdataStackMapIndex()) == -1) + Function.setIsAsm(true); + + // ASM Functions might use the system stack and we won't be able to + // locate that the stack was switched. + // TODO For functions with deferreturn calls we preserve the table since + // the BB is unreachable we are unable calculate stack offset currently. + if (GoFunc->getPcspOffset() && + (Function.isAsm() || DeferOffset || opts::GolangPcspPreserve)) { + uint64_t Offset = Pclntab->getPctabOffset() + GoFunc->getPcspOffset(); + readVarintPass(&Function, GoFunc.get(), DE, &Offset, + GoFunc->getPcspIndex(), Pclntab->getQuantum(), AllocId, + Emitter.MCE.get()); + } + + { + // Remove funcdata relocations + uint32_t Foffset = GoFunc->getFuncdataOffset(); + for (int I = 0; I < GoFunc->getNfuncdata(); ++I) { + RemoveRelaReloc(BC, Section, Foffset); + Foffset += getPsize(); + } + } + + inlTreePass(&Function, GoFunc.get(), AllocId, Emitter.MCE.get()); + }; + + ParallelUtilities::PredicateTy skipFunc = + [&](const BinaryFunction &Function) { return !Function.isGolang(); }; + + ParallelUtilities::runOnEachFunctionWithUniqueAllocId( + BC, ParallelUtilities::SchedulingPolicy::SP_INST_QUADRATIC, workFun, + skipFunc, "pcdataGoPreProcess", /*ForceSequential*/ true); + + return 0; +} + +void GolangPrePass::goPassInit(BinaryContext &BC) { + // NOTE Currently we don't support PCSP table restoration for + // AARCH64 since we have many ways to increment/decrement stack + // values and often stack value is changed through other + // registers so we will need to track all registers in order + // to properly find stack movement values + if (BC.isAArch64() && !opts::GolangPcspPreserve) { + LLVM_DEBUG( + dbgs() << "BOLT-INFO: Enabling GolangPcspPreserve for AARCH64!\n"); + opts::GolangPcspPreserve = true; + } + + BC.MIB->getOrCreateAnnotationIndex("IsDefer"); + + // Initialize annotation index for multi-thread access + std::unique_ptr GoFunc = createGoFunc(); + auto initAnnotation = [&](const unsigned Index) { + BC.MIB->getOrCreateAnnotationIndex(getVarintName(Index)); + BC.MIB->getOrCreateAnnotationIndex(getVarintName(Index, /*IsNext*/ true)); + }; + + initAnnotation(GoFunc->getPcdataUnsafePointIndex()); + initAnnotation(GoFunc->getPcdataStackMapIndex()); + initAnnotation(GoFunc->getPcdataInlTreeIndex()); + initAnnotation(GoFunc->getPcspIndex()); +} + +void GolangPrePass::nopPass(BinaryContext &BC) { + // The golang might gemerate unreachable jumps e.g. + // https://go-review.googlesource.com/c/go/+/380894/ + // Removing the nops at branch destination might affect PCSP table generation + // for the code below the nop. Remove NOP instruction annotation at the + // beginning of the basic block in order to preserve BB layout for such cases. + // Shorten multi-byte NOP before annotation remove. + + ParallelUtilities::WorkFuncWithAllocTy workFun = + [&](BinaryFunction &Function, MCPlusBuilder::AllocatorIdTy AllocId) { + for (BinaryBasicBlock *BB : Function.layout()) { + MCInst &Inst = BB->front(); + if (!BC.MIB->isNoop(Inst)) + continue; + + BC.MIB->shortenInstruction(Inst, *BC.STI); + BC.MIB->removeAnnotation(Inst, "NOP"); + BC.MIB->removeAnnotation(Inst, "Size"); + } + }; + + ParallelUtilities::PredicateTy skipFunc = + [&](const BinaryFunction &Function) { return !Function.isGolang(); }; + + ParallelUtilities::runOnEachFunctionWithUniqueAllocId( + BC, ParallelUtilities::SchedulingPolicy::SP_INST_QUADRATIC, workFun, + skipFunc, "nopGoPreProcess", /*ForceSequential*/ true); +} + +void GolangPrePass::runOnFunctions(BinaryContext &BC) { + int Ret; + + goPassInit(BC); + + Ret = pclntabPass(BC); + if (Ret < 0) { + errs() << "BOLT-ERROR: Golang preprocess pclntab pass failed!\n"; + exit(1); + } + + nopPass(BC); +} + +} // end namespace bolt +} // end namespace llvm diff --git a/bolt/lib/Passes/Golang.cpp b/bolt/lib/Passes/Golang.cpp new file mode 100644 --- /dev/null +++ b/bolt/lib/Passes/Golang.cpp @@ -0,0 +1,1394 @@ +//===--- Golang.cpp -------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#include "bolt/Passes/Golang.h" +#include "bolt/Core/ParallelUtilities.h" +#include "llvm/Support/EndianStream.h" + +#include + +#define DEBUG_TYPE "bolt-golang" + +using namespace llvm; +using namespace bolt; + +namespace opts { +extern cl::OptionCategory BoltOptCategory; + +extern cl::opt Instrument; +extern cl::opt NoHugePages; +extern cl::opt AlignFunctions; + +cl::opt + GolangPcspPreserve("golang-preserve-pcsp", + cl::desc("Save pcsp table instead of reconstructing it"), + cl::init(false), cl::ZeroOrMore, cl::Hidden, + cl::cat(BoltOptCategory)); + +} // end namespace opts + +namespace llvm { +namespace bolt { + +#define KINDMASK ((1 << 5) - 1) // runtime/typekind.go +#define UNCOMMON_FLAG (1 << 0) // runtime/type.go + +// reflect/type.go ; runtime/typekind.go +enum Kind { + Invalid = 0, + Bool, + Int, + Int8, + Int16, + Int32, + Int64, + Uint, + Uint8, + Uint16, + Uint32, + Uint64, + Uintptr, + Float32, + Float64, + Complex64, + Complex128, + Array, + Chan, + Func, + Interface, + Map, + Ptr, + Slice, + String, + Struct, + UnsafePointer, + LastKind +}; + +static std::map BBSizes; + +static int updateBBSizes(BinaryContext &BC) { + ParallelUtilities::WorkFuncTy workFun = [&](BinaryFunction &BF) { + for (BinaryBasicBlock *BB : BF.layout()) + BBSizes[BB] = BB->estimateSize(); + }; + + ParallelUtilities::runOnEachFunction( + BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, workFun, nullptr, + "UpdateEstimatedSizes", + /*ForceSequential*/ true); + return 0; +} + +static uint64_t getBFCodeSize(BinaryFunction *BF) { + uint64_t Size = 0; + for (BinaryBasicBlock *BB : BF->layout()) + Size += BBSizes[BB]; + + return Size; +} + +static uint64_t getBFSize(BinaryFunction *BF) { + uint64_t Size = getBFCodeSize(BF); + Size += BF->estimateConstantIslandSize(); + return Size; +} + +static uint64_t getBFInstrOffset(BinaryBasicBlock *BB, MCInst *Instr) { + uint64_t Offset = 0; + BinaryFunction *BF = BB->getFunction(); + for (BinaryBasicBlock *BasicBlock : BF->layout()) { + if (BB == BasicBlock) + break; + + Offset += BBSizes[BasicBlock]; + } + + BinaryContext &BC = BF->getBinaryContext(); + for (MCInst &II : *BB) { + if (Instr == &II) + return Offset; + + Offset += BC.computeInstructionSize(II); + } + + llvm_unreachable("Wrong BB or Instr"); + exit(1); +} + +inline static uint64_t getNewTextStart(BinaryContext &BC) { + // NOTE The new text address is allocated after all passes were finished, + // for now use the first free address stored in BC.LayoutStartAddress + return alignTo(BC.LayoutStartAddress, BC.PageAlign); +} + +BinaryFunction *getBF(BinaryContext &BC, std::vector &BFs, + const char *Name) { + for (auto BFit = BFs.rbegin(); BFit != BFs.rend(); ++BFit) { + BinaryFunction *BF = *BFit; + if (BF->hasRestoredNameRegex(Name)) + return BF; + } + + return nullptr; +} + +BinaryFunction *getFirstBF(BinaryContext &BC, + std::vector &BFs) { + return getBF(BC, BFs, GolangPass::getFirstBFName()); +} + +BinaryFunction *getLastBF(BinaryContext &BC, + std::vector &BFs) { + return getBF(BC, BFs, GolangPass::getLastBFName()); +} + +uint64_t readEndianVal(DataExtractor &DE, uint64_t *Offset, uint16_t Size); + +uint32_t readVarint(DataExtractor &DE, uint64_t *Offset); + +int32_t readVarintPair(DataExtractor &DE, uint64_t *MapOffset, int32_t &ValSum, + uint32_t &OffsetSum); + +static void writeVarint(uint8_t *Data, uint64_t *Offset, uint32_t Val) { + while (Val >= 0x80) { + Data[(*Offset)++] = (uint8_t)(Val | 0x80); + Val >>= 7; + } + + Data[(*Offset)++] = (uint8_t)Val; +} + +static void writeVarint(uint8_t **Data, uint32_t Val) { + uint64_t Offset = 0; + writeVarint(*Data, &Offset, Val); + *Data += Offset; +} + +static void writeVarintPair(int32_t Val, int32_t &PrevVal, uint64_t Offset, + uint64_t &CurrentOffset, bool &IsFirst, + uint8_t **DataFuncOffset, const uint8_t Quantum) { + int32_t V = Val - PrevVal; + V = (V < 0) ? (((-V - 1) << 1) | 1) : V << 1; + assert((V != 0 || IsFirst) && "The value detla could not be zero"); + PrevVal = Val; + writeVarint(DataFuncOffset, (uint32_t)V); + assert((Offset - CurrentOffset) % Quantum == 0 && + "Offset it not multiple of quantum"); + uint32_t CurrentDelta = (Offset - CurrentOffset) / Quantum; + assert((CurrentDelta || IsFirst) && "The offset delta could not be zero"); + writeVarint(DataFuncOffset, CurrentDelta); + CurrentOffset = Offset; + IsFirst = false; +} + +template static T writeEndian(BinaryContext &BC, T Val) { + T Ret; + SmallVector Tmp; + raw_svector_ostream OS(Tmp); + enum support::endianness Endian = support::big; + if (BC.AsmInfo->isLittleEndian()) + Endian = support::little; + + struct support::endian::Writer Writer(OS, Endian); + Writer.write(Val); + memcpy(&Ret, OS.str().data(), sizeof(T)); + return Ret; +} + +void writeEndianVal(BinaryContext &BC, uint64_t Val, uint16_t Size, + uint8_t **Res) { + switch (Size) { + case 8: { + uint64_t Endian = writeEndian(BC, Val); + **(uint64_t **)Res = Endian; + break; + } + + case 4: { + uint32_t Endian = writeEndian(BC, (uint32_t)Val); + **(uint32_t **)Res = Endian; + break; + } + + case 2: { + uint16_t Endian = writeEndian(BC, (uint16_t)Val); + **(uint16_t **)Res = Endian; + break; + } + + case 1: { + **Res = (uint8_t)Val; + break; + } + + default: + llvm_unreachable("Wrong type size"); + exit(1); + } + + *Res += Size; +} + +inline void writeEndianPointer(BinaryContext &BC, uint64_t Val, uint8_t **Res) { + return writeEndianVal(BC, Val, BC.AsmInfo->getCodePointerSize(), Res); +} + +std::string getVarintName(uint32_t Index, bool IsNext); + +void addVarintAnnotation(BinaryContext &BC, MCInst &II, uint32_t Index, + int32_t Value, bool IsNext, unsigned AllocId); + +bool hasVarintAnnotation(BinaryContext &BC, MCInst &II, uint32_t Index, + bool IsNext); + +int32_t getVarintAnnotation(BinaryContext &BC, MCInst &II, uint32_t Index, + bool IsNext); + +std::string getFuncdataName(uint32_t Findex, uint32_t Size); + +std::string getFuncdataSizeName(uint32_t Findex); + +void addFuncdataAnnotation(BinaryContext &BC, MCInst &II, uint32_t Findex, + int32_t Value, unsigned AllocId); + +bool hasFuncdataAnnotation(BinaryContext &BC, MCInst &II, uint32_t Findex); + +uint32_t getFuncdataSizeAnnotation(BinaryContext &BC, MCInst &II, + uint32_t Findex); + +int32_t getFuncdataAnnotation(BinaryContext &BC, MCInst &II, uint32_t Findex, + uint32_t Index); + +void RemoveRelaReloc(BinaryContext &BC, BinarySection *Section, + uint64_t Offset); + +void AddRelaReloc(BinaryContext &BC, MCSymbol *Symbol, BinarySection *Section, + uint64_t Offset, uint64_t Addend = 0); + +static std::vector +getSortedGolangFunctions(BinaryContext &BC) { + std::vector BFs = BC.getSortedFunctions(); + BFs.erase(std::remove_if(BFs.begin(), BFs.end(), + [](BinaryFunction *BF) { + return !BF->isGolang() || BF->isFolded(); + }), + BFs.end()); + return BFs; +} + +Pclntab::~Pclntab() {} + +int Pclntab::readHeader(BinaryContext &BC, const uint64_t PclntabHeaderAddr) { + BinaryData *PclntabSym = BC.getBinaryDataAtAddress(PclntabHeaderAddr); + if (!PclntabSym) { + errs() << "BOLT-ERROR: Failed to get pclntab symbol!\n"; + return -1; + } + + BinarySection *Section = &PclntabSym->getSection(); + uint64_t Offset = PclntabHeaderAddr - Section->getAddress(); + setPclntabHeaderOffset(Offset); + DataExtractor DE = + DataExtractor(Section->getContents(), BC.AsmInfo->isLittleEndian(), + BC.AsmInfo->getCodePointerSize()); + + __readHeader(BC, DE); + + if (!checkMagic()) { + errs() << "BOLT-ERROR: Pclntab bad magic!\n"; + return -1; + } + + if (getPsize() != BC.AsmInfo->getCodePointerSize()) { + outs() << "BOLT-ERROR: Pclntab bad pointer size!\n"; + return -1; + } + + return 0; +} + +int Pclntab::writeHeader(BinaryContext &BC, uint8_t *Pclntab) { + setNewHeaderOffsets(); + __writeHeader(BC, Pclntab); + return 0; +} + +Module::~Module() {} + +int Module::read(BinaryContext &BC) { + // NOTE The local.moduledata are used in plugins. + // The firstmoduledata symbol still could be found there + // but it will point in BSS section */ + BinaryData *Module = BC.getFirstBinaryDataByName("local.moduledata"); + if (!Module) + Module = BC.getFirstBinaryDataByName("runtime.firstmoduledata"); + + if (!Module) { + errs() << "BOLT-ERROR: Failed to get firstmoduledata symbol!\n"; + return -1; + } + + BinarySection *Section = &Module->getSection(); + DataExtractor DE = + DataExtractor(Section->getContents(), BC.AsmInfo->isLittleEndian(), + BC.AsmInfo->getCodePointerSize()); + + uint64_t Offset = Module->getAddress() - Section->getAddress(); + uint64_t *ModuleArr = getModule(); + for (size_t I = 0; I < getModuleSize() / sizeof(uint64_t); ++I) { + assert(DE.isValidOffset(Offset) && "Invalid offset"); + ModuleArr[I] = DE.getAddress(&Offset); + } + + return 0; +} + +GoFunc::~GoFunc() {} + +std::unique_ptr GolangPass::createGoModule() { + if (opts::GolangPass == opts::GV_1_17_5) + return std::make_unique(); + else if (opts::GolangPass == opts::GV_1_17_2) + return std::make_unique(); + else if (opts::GolangPass == opts::GV_1_16_5) + return std::make_unique(); + else if (opts::GolangPass == opts::GV_1_14_12) + return std::make_unique(); + else if (opts::GolangPass == opts::GV_1_14_9) + return std::make_unique(); + + llvm_unreachable("Wrong golang version"); + exit(1); +} + +std::unique_ptr GolangPass::createGoPclntab() { + if (opts::GolangPass == opts::GV_1_17_5) + return std::make_unique(); + else if (opts::GolangPass == opts::GV_1_17_2) + return std::make_unique(); + else if (opts::GolangPass == opts::GV_1_16_5) + return std::make_unique(); + else if (opts::GolangPass == opts::GV_1_14_12) + return std::make_unique(); + else if (opts::GolangPass == opts::GV_1_14_9) + return std::make_unique(); + + llvm_unreachable("Wrong golang version"); + exit(1); +} + +std::unique_ptr GolangPass::createGoFunc() { + if (opts::GolangPass == opts::GV_1_17_5) + return std::make_unique(); + else if (opts::GolangPass == opts::GV_1_17_2) + return std::make_unique(); + else if (opts::GolangPass == opts::GV_1_16_5) + return std::make_unique(); + else if (opts::GolangPass == opts::GV_1_14_12) + return std::make_unique(); + else if (opts::GolangPass == opts::GV_1_14_9) + return std::make_unique(); + + llvm_unreachable("Wrong golang version"); + exit(1); +} + +struct StackVal { + uint32_t Size; + int32_t OldVal; + int32_t Val; +}; + +using InstBias = std::map; + +static uint32_t stackCounter(BinaryFunction *BF, BinaryBasicBlock *BB, + InstBias &Map, uint32_t SpVal) { + BinaryContext &BC = BF->getBinaryContext(); + unsigned pSize = BC.AsmInfo->getCodePointerSize(); + auto addVal = [&](InstBias &Map, MCInst *II, int32_t OldVal, int32_t NewVal) { + struct StackVal StackVal; + uint32_t Offset = getBFInstrOffset(BB, II); + StackVal.Size = BC.computeInstructionSize(*II); + StackVal.OldVal = OldVal; + StackVal.Val = NewVal; + Map[Offset] = StackVal; + }; + + for (MCInst &II : *BB) { + int Ret = 0; + if (&II == &(*BB->begin()) || &II == &(*BB->rbegin())) + addVal(Map, &II, SpVal, SpVal); + + // Ignore instrumentation stack usage + if (opts::Instrument && BC.MIB->hasAnnotation(II, "IsInstrumentation")) + continue; + + if ((Ret = BC.MIB->getStackAdjustment(II))) { + // NOTE Seems to be the only exception is runtime.rt0_go function + if (std::abs(Ret) % pSize == 0) { + addVal(Map, &II, SpVal, SpVal + Ret); + SpVal += Ret; + } + } + } + + return SpVal; +} + +int GolangPass::typePass(BinaryContext &BC, uint64_t TypeAddr) { + static std::unordered_set VisitedTypes; + uint64_t Offset; + uint64_t SectionAddr; + + if (VisitedTypes.find(TypeAddr) != VisitedTypes.end()) + return 0; + + VisitedTypes.insert(TypeAddr); + ErrorOr Section = BC.getSectionForAddress(TypeAddr); + if (!Section) { + errs() << "BOLT-ERROR: Failed to get section for type 0x" + << Twine::utohexstr(TypeAddr) << "\n"; + return -1; + } + + SectionAddr = Section->getAddress(); + DataExtractor DE = DataExtractor(Section->getContents(), + BC.AsmInfo->isLittleEndian(), getPsize()); + Offset = TypeAddr - SectionAddr; + assert(DE.isValidOffset(Offset) && "Invalid offset"); + + // runtime/type.go + struct Type { + uint64_t Size; // Pointer size + uint64_t Ptrdata; // Pointer size + uint32_t Hash; + uint8_t Tflag; + uint8_t Align; + uint8_t Fieldalign; + uint8_t Kind; + uint64_t CompareFunc; // Pointer size + uint64_t Gcdata; // Pointer size + int32_t NameOff; + int32_t PtrToThis; + } Type; + + Type.Size = DE.getAddress(&Offset); + Type.Ptrdata = DE.getAddress(&Offset); + Type.Hash = DE.getU32(&Offset); + Type.Tflag = DE.getU8(&Offset); + Type.Align = DE.getU8(&Offset); + Type.Fieldalign = DE.getU8(&Offset); + Type.Kind = DE.getU8(&Offset); + Type.CompareFunc = DE.getAddress(&Offset); + Type.Gcdata = DE.getAddress(&Offset); + Type.NameOff = (int32_t)DE.getU32(&Offset); + Type.PtrToThis = (int32_t)DE.getU32(&Offset); + + if (!(Type.Tflag & UNCOMMON_FLAG)) + return 0; + + uint8_t Kind = Type.Kind & KINDMASK; + assert(Kind < LastKind && "Wrong kind type"); + assert(DE.isValidOffset(Offset) && "Wrong offset"); + + // The furter structures are in runtime/type.go file + if ((Kind == Ptr) || (Kind == Slice)) { + // struct ptrtype { + // //typ _type; + // struct Type *elem; + // }; + // + // struct slicetype { + // //typ _type; + // elem *_type; + // }; + + uint64_t Address = DE.getAddress(&Offset); + typePass(BC, Address); + } else if (Kind == Struct) { + // struct structtype { + // //struct Type typ; + // pkgPath name // bytes *byte; + // fields []structfield; + // }; + + struct { + uint64_t Bytes; // Pointer size + uint64_t Type; // Pointer size + uint64_t OffsetAnon; // Pointer size + } Structfield; + + Offset += getPsize(); // Skip Name + uint64_t StructfieldAddress = DE.getAddress(&Offset); + uint64_t Size = DE.getAddress(&Offset); + Offset += getPsize(); // Skip second size + + assert(Section->containsAddress(StructfieldAddress) && + "Wrong StructfieldAddress"); + uint64_t StructfieldOffset = StructfieldAddress - Section->getAddress(); + while (Size--) { + Structfield.Bytes = DE.getAddress(&StructfieldOffset); + Structfield.Type = DE.getAddress(&StructfieldOffset); + Structfield.OffsetAnon = DE.getAddress(&StructfieldOffset); + typePass(BC, Structfield.Type); + } + } else if (Kind == Interface) { + // struct interfacetype { + // //struct Type typ; + // pkgPath name // bytes *byte; + // mhdr []Imethod; + // }; + + struct { + int32_t Name; + int32_t Ityp; + } Imethod; + + Offset += getPsize(); // Skip Name + uint64_t MhdrAddress = DE.getAddress(&Offset); + uint64_t Size = DE.getAddress(&Offset); + Offset += getPsize(); // Skip second size + + assert(Section->containsAddress(MhdrAddress) && "Wrong MhdrAddress"); + uint64_t MhdrOffset = MhdrAddress - Section->getAddress(); + while (Size--) { + Imethod.Name = (int32_t)DE.getU32(&MhdrOffset); + Imethod.Ityp = (int32_t)DE.getU32(&MhdrOffset); + typePass(BC, FirstModule->getTypes() + Imethod.Ityp); + } + } else if (Kind == Func) { + // struct functype { + // //typ _type; + // inCount uint16; + // outCount uint16; + // }; + + Offset += 2 * sizeof(uint16_t); + // NOTE in this case we must align offset + Offset = alignTo(Offset, getPsize()); + } else if (Kind == Array) { + // struct arraytype { + // //typ _type; + // elem *_type; + // slice *_type; + // len uintptr; + // }; + + uint64_t Addr; + Addr = DE.getAddress(&Offset); + typePass(BC, Addr); + Addr = DE.getAddress(&Offset); + typePass(BC, Addr); + Offset += getPsize(); + } else if (Kind == Chan) { + // struct chantype { + // //typ _type; + // elem *_type; + // dir uintptr; + // }; + + uint64_t Addr = DE.getAddress(&Offset); + typePass(BC, Addr); + Offset += getPsize(); + } else if (Kind == Map) { + // Large structure, seems to be no align needed though + // The first 3 fields are type struct + uint64_t Addr; + Addr = DE.getAddress(&Offset); + typePass(BC, Addr); + Addr = DE.getAddress(&Offset); + typePass(BC, Addr); + Addr = DE.getAddress(&Offset); + typePass(BC, Addr); + Offset += 2 * getPsize(); + } + + assert(Offset == alignTo(Offset, getPsize()) && "Wrong alignment"); + assert(DE.isValidOffset(Offset) && "Invalid Offset"); + + uint64_t UncommonOffset = Offset; + + // runtime/type.go + struct { + int32_t Pkgpath; + uint16_t Mcount; + uint16_t Xcount; + uint32_t Moff; + uint32_t Unused2; + } Uncommontype; + + Uncommontype.Pkgpath = (int32_t)DE.getU32(&Offset); + Uncommontype.Mcount = DE.getU16(&Offset); + Uncommontype.Xcount = DE.getU16(&Offset); + Uncommontype.Moff = DE.getU32(&Offset); + Uncommontype.Unused2 = DE.getU32(&Offset); + + assert(UncommonOffset + Uncommontype.Moff >= Offset && "Wrong Moff"); + Offset = UncommonOffset + Uncommontype.Moff; + while (Uncommontype.Mcount--) { + assert(DE.isValidOffset(Offset) && "Invalid offset"); + // runtime/type.go + struct { + int32_t NameOff; + int32_t TypeOff; + int32_t Ifn; + int32_t Tfn; + } Method; + + Method.NameOff = (int32_t)DE.getU32(&Offset); + Method.TypeOff = (int32_t)DE.getU32(&Offset); + uint32_t IfnOffset = Offset; + Method.Ifn = (int32_t)DE.getU32(&Offset); + uint32_t TfnOffset = Offset; + Method.Tfn = (int32_t)DE.getU32(&Offset); + + uint64_t StartAddr = getNewTextStart(BC); + uint64_t RType = Relocation::getAbs(sizeof(uint32_t)); + auto setFn = [&](int32_t Value, uint32_t Offset) { + if (Value == -1) + return; + + BinaryFunction *Fn = BC.getBinaryFunctionAtAddress(RuntimeText + Value); + if (!Fn) { + errs() << "BOLT-ERROR: Failed to get Ifn or Tfn!\n"; + exit(1); + } + + BC.addRelocation(SectionAddr + Offset, Fn->getSymbol(), RType, + -StartAddr); + }; + + setFn(Method.Ifn, IfnOffset); + setFn(Method.Tfn, TfnOffset); + } + + return 0; +} + +int GolangPass::typelinksPass(BinaryContext &BC) { + int Ret; + uint64_t Types = FirstModule->getTypes(); + if (!Types) { + errs() << "BOLT-ERROR: Types address is zero!\n"; + return -1; + } + + uint64_t Etypes = FirstModule->getEtypes(); + assert(Types < Etypes && "Wrong Etypes"); + const GoArray &TypeLinks = FirstModule->getTypelinks(); + uint64_t TypelinkAddr = TypeLinks.getAddress(); + uint64_t TypelinkCount = TypeLinks.getCount(); + if (!TypelinkAddr) { + errs() << "BOLT-ERROR: Typelink address is zero!\n"; + return -1; + } + + ErrorOr Section = BC.getSectionForAddress(TypelinkAddr); + if (!Section) { + errs() << "BOLT-WARNING: Failed to get typelink section!\n"; + return -1; + } + + DataExtractor DE = DataExtractor(Section->getContents(), + BC.AsmInfo->isLittleEndian(), getPsize()); + + uint64_t Offset = TypelinkAddr - Section->getAddress(); + while (TypelinkCount--) { + assert(DE.isValidOffset(Offset) && "Invalid offset"); + uint64_t Type = Types + DE.getU32(&Offset); + assert(Type < Etypes && "Wrong type offset"); + Ret = typePass(BC, Type); + if (Ret < 0) + return Ret; + } + + return 0; +} + +int GolangPass::textsectmapPass(BinaryContext &BC) { + uint64_t TextSectAddr = FirstModule->getTextsectmap().getAddress(); + if (!TextSectAddr) { + // Plugins does't have this structure + return 0; + } + + ErrorOr Section = BC.getSectionForAddress(TextSectAddr); + if (!Section) { + errs() << "BOLT-ERROR: Failed to get textsectmaps section!\n"; + return -1; + } + + BinaryData *EtextSymbol = BC.getFirstBinaryDataByName(getLastBFName()); + if (!EtextSymbol) { + errs() << "BOLT-ERROR: Failed to get etext symbol!\n"; + return -1; + } + + // We will need to fix length field (text size) of textsect structure + // + // //runtime/symtab.go + // struct textsect { + // uint64_t vaddr; // Pointer size + // uint64_t length; // Pointer size + // uint64_t baseaddr; // Pointer size + // }; + + uint32_t Offset = TextSectAddr + getPsize(); // length field + uint64_t RType = Relocation::getAbs(getPsize()); + uint64_t Addend = -getNewTextStart(BC); + BC.addRelocation(Offset, EtextSymbol->getSymbol(), RType, Addend); + return 0; +} + +int GolangPass::pcspPass(BinaryFunction *BF, uint8_t **SectionData, + const uint32_t Index, uint8_t Quantum, + bool ForcePreserve) { + struct BBVal { + BinaryBasicBlock *BB; + uint32_t Val; + bool IsUncond; + } __attribute__((packed)) BBVal = {}; + + std::queue Queue; + std::unordered_map BBList; + InstBias InstBias; + + if (BF->isAsm() || opts::GolangPcspPreserve || ForcePreserve) + return writeVarintPass(BF, SectionData, Index, Quantum); + + if (BF->layout_begin() == BF->layout_end()) + return 0; + + BBVal.BB = *BF->layout_begin(); + Queue.push(BBVal); + while (!Queue.empty()) { + BinaryBasicBlock *BB = Queue.front().BB; + uint32_t SpVal = Queue.front().Val; + bool IsUncond = Queue.front().IsUncond; + Queue.pop(); + + // We are interested to find condition branching to BB + // since uncondition one might be fallthrough + auto Search = BBList.find(BB); + if (Search != BBList.end() && + (/*not uncond*/ BBList[BB] == false || IsUncond)) + continue; + + BBList[BB] = IsUncond; + BBVal.Val = stackCounter(BF, BB, InstBias, SpVal); + for (BinaryBasicBlock *BBS : BB->successors()) { + BBVal.BB = BBS; + // We use getIndex() here to ensure that originally BBS was right + // after BB, so if BB has only one successor no jmp instruction was in BB, + // so potentially the BB could have noreturn call + BBVal.IsUncond = IsUncond || !!(BB->succ_size() == 1 && + BBS->getIndex() == BB->getIndex() + 1); + Queue.push(BBVal); + } + } + + uint64_t Offset, CurrentOffset = 0; + int32_t PrevVal = -1, CurrentVal = 0; + bool IsFirst = true; + if (InstBias.empty()) + goto out; + + for (auto &I : InstBias) { + if (I.second.Val == CurrentVal) + continue; + + Offset = I.first; + // If the condition was not met it means that the CurrentVal + // belongs to the last instruction of the prev BB + if (CurrentVal == I.second.OldVal) + Offset += I.second.Size; + + writeVarintPair(CurrentVal, PrevVal, Offset, CurrentOffset, IsFirst, + SectionData, Quantum); + CurrentVal = I.second.Val; + } + + // Add Last Value + Offset = InstBias.rbegin()->first + InstBias.rbegin()->second.Size; + if (CurrentOffset < Offset) { + writeVarintPair(CurrentVal, PrevVal, Offset, CurrentOffset, IsFirst, + SectionData, Quantum); + } + +out:; + **SectionData = 0; + (*SectionData)++; + return 0; +} + +uint32_t GolangPass::deferreturnPass(BinaryContext &BC, + BinaryFunction *Function) { + for (auto BB : Function->rlayout()) { + for (auto II = BB->begin(); II != BB->end(); ++II) { + if (!BC.MIB->hasAnnotation(*II, "IsDefer")) + continue; + + return getBFInstrOffset(BB, &(*II)); + } + } + + errs() << "Deferreturn call was not found for " << *Function << "\n"; + exit(1); +} + +int GolangPass::getNextMCinstVal(BinaryFunction::order_iterator BBIt, + uint64_t I, const uint32_t Index, int32_t &Val, + uint64_t *NextOffset) { + BinaryFunction *BF = (*BBIt)->getFunction(); + BinaryContext &BC = BF->getBinaryContext(); + // We're interating in value for the next instruction + auto II = std::next((*BBIt)->begin(), I + 1); + do { + if (II == (*BBIt)->end()) { + BBIt = std::next(BBIt); + if (BBIt == BF->layout_end()) { + // Last Instruction + return -1; + } + + II = (*BBIt)->begin(); + } + + while (II != (*BBIt)->end() && !hasVarintAnnotation(BC, *II, Index)) { + if (NextOffset) + *NextOffset += BC.computeInstructionSize(*II); + II = std::next(II); + } + + } while (II == (*BBIt)->end()); + + Val = getVarintAnnotation(BC, *II, Index); + return 0; +} + +int GolangPass::writeVarintPass(BinaryFunction *BF, uint8_t **DataFuncOffset, + const uint32_t Index, const uint8_t Quantum) { + int Ret; + uint64_t CurrentOffset = 0, Offset = 0; + int32_t PrevVal = -1, Val; + bool IsFirst = true; + size_t Size = getBFCodeSize(BF); + BinaryContext &BC = BF->getBinaryContext(); + for (auto BBIt = BF->layout_begin(); BBIt != BF->layout_end(); ++BBIt) { + BinaryBasicBlock *BB = *BBIt; + for (uint64_t I = 0; I < BB->size(); ++I) { + MCInst &Inst = BB->getInstructionAtIndex(I); + Offset += BC.computeInstructionSize(Inst); + if (Offset < CurrentOffset) + continue; + + if (!hasVarintAnnotation(BC, Inst, Index)) { + if (Offset == Size && IsFirst) + return -1; + + continue; + } + + Val = getVarintAnnotation(BC, Inst, Index); + + int32_t NextInstVal; + uint64_t NextOffset = Offset; + Ret = getNextMCinstVal(BBIt, I, Index, NextInstVal, &NextOffset); + if (Ret < 0) { + Offset = NextOffset; + goto bf_done; + } + + if (Val != NextInstVal) + writeVarintPair(Val, PrevVal, NextOffset, CurrentOffset, IsFirst, + DataFuncOffset, Quantum); + } + } + +bf_done:; + // Create entry for the last instruction + writeVarintPair(Val, PrevVal, Offset, CurrentOffset, IsFirst, DataFuncOffset, + Quantum); + **DataFuncOffset = 0; + (*DataFuncOffset)++; + return 0; +} + +static void inlTreePass(BinaryFunction *BF, struct GoFunc *GoFunc, + const char *OldPclntabNames, uint8_t **DataFuncOffset, + const uint8_t *const SectionData) { + BinaryContext &BC = BF->getBinaryContext(); + const unsigned InlIndex = GoFunc->getFuncdataInlTreeIndex(); + const uint64_t Address = GoFunc->getFuncdata(InlIndex); + if (!Address) + return; + + static std::unordered_map InlHash; // String hash, offset + ErrorOr FuncdataSection = BC.getSectionForAddress(Address); + if (!FuncdataSection) { + errs() << "BOLT-ERROR: Failed to get section for inline 0x" + << Twine::utohexstr(Address) << "\n"; + } + + DataExtractor DE = DataExtractor(FuncdataSection->getContents(), + BC.AsmInfo->isLittleEndian(), + BC.AsmInfo->getCodePointerSize()); + + std::unordered_map ParentOffset; // Val:newOffset + uint32_t MaxInlCount = 0; + for (BinaryBasicBlock *BB : BF->layout()) { + for (MCInst &II : *BB) { + if (!hasFuncdataAnnotation(BC, II, InlIndex)) + continue; + + uint32_t Size = getFuncdataSizeAnnotation(BC, II, InlIndex); + MaxInlCount += Size; + for (uint32_t I = 0; I < Size; ++I) { + int32_t Index = getFuncdataAnnotation(BC, II, InlIndex, I); + ParentOffset[Index] = getBFInstrOffset(BB, &II); + } + } + } + + for (uint32_t I = 0; I < MaxInlCount; ++I) { + uint64_t Offset = Address - FuncdataSection->getAddress(); + Offset += I * sizeof(InlinedCall); + + struct InlinedCall InlinedCall; + uint64_t ReadOffset = Offset; + assert(DE.isValidOffset(ReadOffset) && "Invalid offset"); + InlinedCall.Parent = (int16_t)DE.getU16(&ReadOffset); + InlinedCall.FuncID = DE.getU8(&ReadOffset); + InlinedCall.Unused = DE.getU8(&ReadOffset); + InlinedCall.File = DE.getU32(&ReadOffset); + InlinedCall.Line = DE.getU32(&ReadOffset); + InlinedCall.Func = DE.getU32(&ReadOffset); + InlinedCall.ParentPc = DE.getU32(&ReadOffset); + + // Copy inline function name if it was not copied already + const char *Name = OldPclntabNames + InlinedCall.Func; + std::hash hasher; + auto Hash = hasher(std::string(Name)); + if (InlHash.find(Hash) == InlHash.end()) { + InlHash[Hash] = *DataFuncOffset - SectionData; + size_t NameLen = strlen(Name) + 1; + memcpy(*DataFuncOffset, Name, NameLen); + *DataFuncOffset += NameLen; + } + + // Use addend as relocation value + MCSymbol *ZeroSym = BC.registerNameAtAddress("Zero", 0, 0, 0); + + // Zero out file and line fields + FuncdataSection->addRelocation(Offset + offsetof(struct InlinedCall, File), + ZeroSym, + Relocation::getAbs(sizeof(uint32_t))); + FuncdataSection->addRelocation(Offset + offsetof(struct InlinedCall, Line), + ZeroSym, + Relocation::getAbs(sizeof(uint32_t))); + + // Create relocation for inline function name + FuncdataSection->addRelocation( + Offset + offsetof(struct InlinedCall, Func), ZeroSym, + Relocation::getAbs(sizeof(uint32_t)), InlHash[Hash]); + + // Create relocation for parentPc offset + FuncdataSection->addRelocation( + Offset + offsetof(struct InlinedCall, ParentPc), ZeroSym, + Relocation::getAbs(sizeof(uint32_t)), ParentOffset[I]); + } +} + +int GolangPass::pclntabPass(BinaryContext &BC) { + int Ret; + const uint64_t PclntabAddr = getPcHeaderAddr(); + if (!PclntabAddr) { + errs() << "BOLT-ERROR: Pclntab address is zero!\n"; + return -1; + } + + BinaryData *PclntabSym = BC.getBinaryDataAtAddress(PclntabAddr); + if (!PclntabSym) { + errs() << "BOLT-ERROR: Failed to get pclntab symbol!\n"; + return -1; + } + + BinarySection *Section = &PclntabSym->getSection(); + const unsigned SectionFlags = BinarySection::getFlags(/*IsReadOnly=*/false, + /*IsText=*/false, + /*IsAllocatable=*/true); + uint64_t SectionSize = 0; + BinarySection *OutputSection = + &BC.registerExtraSection(".pclntab", ELF::SHT_PROGBITS, SectionFlags, + nullptr, ~0ULL, sizeof(uint64_t)); + + // NOTE Currently we don't know how much data we will have in pclntab section. + // We will reserve eights times more then original size using mmap. + // Due to pagefaults we won't really allocate unsued pages. + const uint64_t MmapSize = + alignTo(PclntabSym->getSize(), BC.RegularPageSize) * 8; + uint8_t *const SectionData = + (uint8_t *)mmap(nullptr, MmapSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (SectionData == MAP_FAILED) { + errs() << "Failed to mmap new .pclntab section\n"; + return -1; + } + + DataExtractor DE = DataExtractor(Section->getContents(), + BC.AsmInfo->isLittleEndian(), getPsize()); + + static std::vector BFs = getSortedGolangFunctions(BC); + const uint8_t PclnEntrySize = getPsize() * 2; + const size_t BFCount = BFs.size(); + // Reserve one entry for maxpc, written before FuncPart starts + const uint64_t FuncOffset = BFCount * PclnEntrySize + PclnEntrySize; + uint8_t *OffsetPart = SectionData + Pclntab->getPcHeaderSize(); + uint8_t *FuncPart = OffsetPart + FuncOffset; + + // NOTE IsFirstName variable is a hack used due to the bug in go1.16: + // https://go-review.googlesource.com/c/go/+/334789 + bool IsFirstName = true; + for (BinaryFunction *BF : BFs) { + assert((uint64_t)(FuncPart - SectionData) < MmapSize && "Overflow error"); + + { + // Add Functab Offsets + uint64_t Delta = (uint64_t)(OffsetPart - SectionData); + AddRelaReloc(BC, BF->getSymbol(), OutputSection, Delta); + OffsetPart += getPsize(); + writeEndianPointer(BC, FuncPart - SectionData, &OffsetPart); + } + + const uint64_t FunctabOffset = BF->getGolangFunctabOffset(); + uint64_t OldTabOffset = Pclntab->getFunctabOffset() + FunctabOffset; + std::unique_ptr GoFunc = createGoFunc(); + + // Read func structure + GoFunc->read(BC, DE, nullptr, &OldTabOffset); + + // Get target structure size + size_t FuncSize = GoFunc->getSize(BC); + uint8_t *DataFuncOffset = FuncPart + FuncSize; + + // We don't interested in metadata tables anymore + GoFunc->disableMetadata(); + + // Save space for npcdata + DataFuncOffset += GoFunc->getPcdataSize(); + + // Save aligned space for nfuncdata + DataFuncOffset = + SectionData + alignTo(DataFuncOffset - SectionData, getPsize()); + DataFuncOffset += GoFunc->getNfuncdata() * getPsize(); + + // Save name + const char *OldPclntabNames = + (char *)Section->getData() + Pclntab->getNameOffset(); + if (GoFunc->getNameOffset() || IsFirstName) { + IsFirstName = false; + const char *Name = OldPclntabNames + GoFunc->getNameOffset(); + size_t NameLen = strlen(Name) + 1; + memcpy(DataFuncOffset, Name, NameLen); + GoFunc->setNameOffset(DataFuncOffset - SectionData); + DataFuncOffset += NameLen; + } + + // Fix pcdata + auto setPcdata = [&](const uint32_t Index) { + GoFunc->setPcdata(Index, DataFuncOffset - SectionData); + if (writeVarintPass(BF, &DataFuncOffset, Index, Pclntab->getQuantum()) < + 0) { + GoFunc->setPcdata(Index, 0); + return; + } + }; + + setPcdata(GoFunc->getPcdataUnsafePointIndex()); + setPcdata(GoFunc->getPcdataStackMapIndex()); + setPcdata(GoFunc->getPcdataInlTreeIndex()); + + // Fix npcdata + GoFunc->fixNpcdata(); + + // Fix funcdata inline + inlTreePass(BF, GoFunc.get(), OldPclntabNames, &DataFuncOffset, + SectionData); + + // Fix deferreturn + if (GoFunc->getDeferreturnOffset()) + GoFunc->setDeferreturnOffset(deferreturnPass(BC, BF)); + + // Fix pcsp + if (GoFunc->getPcspOffset()) { + // TODO don't preserve PCSP table for functions with deferreturn + bool ForcePreserve = GoFunc->getDeferreturnOffset(); + GoFunc->setPcspOffset(DataFuncOffset - SectionData); + Ret = pcspPass(BF, &DataFuncOffset, GoFunc->getPcspIndex(), + Pclntab->getQuantum(), ForcePreserve); + if (Ret < 0) + goto failed; + } + + GoFunc->write(BF, &FuncPart, SectionData, OutputSection); + FuncPart = SectionData + alignTo(DataFuncOffset - SectionData, getPsize()); + } + + SectionSize = FuncPart - SectionData; + + { + // The last OffsetPart is maxpc and offset to filetab + std::vector BFs = BC.getSortedFunctions(); + BinaryFunction *LastBF = getLastBF(BC, BFs); + uint64_t Delta = (uint64_t)(OffsetPart - SectionData); + AddRelaReloc(BC, LastBF->getSymbol(), OutputSection, Delta); + OffsetPart += getPsize(); + writeEndianPointer(BC, 0, &OffsetPart); + } + + // Write fixed Pclntab structure + Pclntab->setFunctionsCount(BFCount); + Pclntab->writeHeader(BC, SectionData); + + // Fix section sizes size + FirstModule->setPclntabSize(SectionSize); + + // Fix ftab size + FirstModule->setFtabSize(BFCount); + + OutputSection->setOutputContents(SectionData, SectionSize); + OutputSection->setIsFinalized(); + + PclntabSym->setOutputSize(SectionSize); + PclntabSym->setOutputLocation(*OutputSection, 0); + return 0; + +failed:; + munmap(SectionData, MmapSize); + return Ret; +} + +int GolangPass::findFuncTabPass(BinaryContext &BC) { + // const uint32_t minfunc = 16; // runtime/symtab.go: minimum function size + const uint32_t pcsubbucketssize = 256; + // const uint32_t pcbucketsize = pcsubbucketssize * minfunc; // + // runtime/symtab.go: size of bucket in the pc->func lookup table + + uint64_t FindFuncTab = FirstModule->getFindfunctab(); + if (!FindFuncTab) { + errs() << "BOLT-ERROR: Findfunctab is zero!\n"; + return -1; + } + + BinaryData *FindfunctabSym = BC.getBinaryDataAtAddress(FindFuncTab); + if (!FindfunctabSym) { + errs() << "BOLT-ERROR: Failed to get findfunctab symbol!\n"; + return -1; + } + + const unsigned SectionFlags = BinarySection::getFlags(/*IsReadOnly=*/true, + /*IsText=*/false, + /*IsAllocatable=*/true); + uint64_t SectionSize = 0; + BinarySection *OutputSection = + &BC.registerExtraSection(".findfunctab", ELF::SHT_PROGBITS, SectionFlags, + nullptr, ~0ULL, sizeof(uint64_t)); + // runtime/symtab.go + struct { + uint32_t Idx; + uint8_t Subbuckets[16]; + } Findfuncbucket; + + // NOTE Currently we don't know how much BFs occupy in text section. + // We will reserve four times more then original size using mmap. + // Due to pagefaults we won't really allocate unsued pages. + const uint64_t MmapSize = + alignTo(FindfunctabSym->getSize(), BC.RegularPageSize) * 4; + uint8_t *const SectionData = + (uint8_t *)mmap(nullptr, MmapSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (SectionData == MAP_FAILED) { + errs() << "Failed to mmap new .findfunctab section\n"; + return -1; + } + + uint32_t LastIdx = 0, SubIndex = 0, Index = 0; + uint64_t Offset = getNewTextStart(BC), + NextOffset = Offset; // NOTE For align calc + uint8_t *Data = SectionData; + std::vector BFs = getSortedGolangFunctions(BC); + for (auto BFit = BFs.begin(); BFit != BFs.end(); ++BFit) { + assert((uint64_t)(Data - SectionData) < MmapSize && "Overflow error"); + BinaryFunction *BF = *BFit; + uint64_t Size = getBFSize(BF); + Offset += Size; + auto BFNext = std::next(BFit, 1); + if (BFNext != BFs.end()) { + if (BC.HasRelocations) + Offset = alignTo(Offset, BinaryFunction::MinAlign); + + unsigned Alignment, MaxAlignment; + std::tie(Alignment, MaxAlignment) = + BC.getBFAlignment(**BFNext, /*EmitColdPart*/ false); + uint64_t Pad = offsetToAlignment(Offset, llvm::Align(Alignment)); + if (Pad <= MaxAlignment) + Offset += Pad; + + // Cold part start, align section + if (BF->getIndex() < INVALID_BF_INDEX && + (*BFNext)->getIndex() == INVALID_BF_INDEX) + Offset = alignTo(Offset, opts::AlignFunctions); + } + + // Offset points to the next BF + // NextOffset points to the pcsubbucketssize aligned address somewhere + // in the current BF + if (Offset <= NextOffset) { + ++Index; + continue; + } + + // We are interested in the part of the function starting from NextOffset + Size = Offset - NextOffset; + do { + if (SubIndex % sizeof(Findfuncbucket.Subbuckets) == 0) { + writeEndianVal(BC, Index, sizeof(Findfuncbucket.Idx), &Data); + LastIdx = Index; + SubIndex = 0; + } + + *Data++ = Index - LastIdx; + ++SubIndex; + Size -= pcsubbucketssize; + } while ((int64_t)Size > 0); + + NextOffset = alignTo(Offset, pcsubbucketssize); + ++Index; + } + + SectionSize = Data - SectionData; + OutputSection->setOutputContents(SectionData, SectionSize); + FindfunctabSym->setOutputSize(SectionSize); + FindfunctabSym->setOutputLocation(*OutputSection, 0); + + // NOTE To be able to emit new data we need to have at least one relocation + // for OutputSection to be created. Since the first 4 bytes of the findfunctab + // is always 0 create dummy 4 bytes abs relocation there + MCSymbol *ZeroSym = BC.registerNameAtAddress("Zero", 0, 0, 0); + OutputSection->addRelocation(0, ZeroSym, + Relocation::getAbs(sizeof(uint32_t))); + OutputSection->setIsFinalized(); + return 0; +} + +int GolangPass::getSymbols(BinaryContext &BC) { + // The iface/eface ifn/tfn addresses are set relative to this symbol + BinaryData *TextSymbol = BC.getFirstBinaryDataByName(getFirstBFName()); + if (!TextSymbol) { + outs() << "BOLT-WARNING: Failed to get text start symbol!\n"; + return -1; + } + + RuntimeText = TextSymbol->getAddress(); + return 0; +} + +int GolangPass::checkGoVersion(BinaryContext &BC) { + auto failed = [&](void) -> int { + outs() << "BOLT-WARNING: Could not idetifiy Go version for input binary!\n"; + if (opts::GolangPass != opts::GV_AUTO) + return 0; + + outs() << "BOLT-ERROR: No compatible version found! Specify gc version " + "explicitly\n"; + return -1; + }; + + BinaryData *BuildVersion = + BC.getFirstBinaryDataByName("runtime.buildVersion"); + if (!BuildVersion) + return failed(); + + BinarySection *Section = &BuildVersion->getSection(); + DataExtractor DE = + DataExtractor(Section->getContents(), BC.AsmInfo->isLittleEndian(), + BC.AsmInfo->getCodePointerSize()); + + uint64_t GoVersionOffset = BuildVersion->getAddress() - Section->getAddress(); + uint64_t GoVersionAddr = DE.getAddress(&GoVersionOffset); + ErrorOr GoVersionSection = + BC.getSectionForAddress(GoVersionAddr); + if (!GoVersionSection) { + errs() + << "BOLT-ERROR: Failed to get binary section for go version string\n"; + return failed(); + } + + const char *BinaryVersion = GoVersionSection->getContents().data(); + BinaryVersion += (GoVersionAddr - GoVersionSection->getAddress()); + + const unsigned MaxVerLen = 9; + if (opts::GolangPass != opts::GV_AUTO) { + const char *ExpectedVersion = GolangStringVer[opts::GolangPass]; + if (memcmp(BinaryVersion, ExpectedVersion, strlen(ExpectedVersion))) { + // NOTE Limit expected version string to 9 chars + outs() << "BOLT-WARNING: The binary expected version is " + << ExpectedVersion + << " but found: " << std::string(BinaryVersion, MaxVerLen) << "\n"; + return 0; + } + } else { + for (int I = opts::GV_LATEST; I > opts::GV_FIRST; --I) { + if (!memcmp(BinaryVersion, GolangStringVer[I], + strlen(GolangStringVer[I]))) { + outs() << "BOLT-INFO: Golang version is: " << GolangStringVer[I] + << "\n"; + opts::GolangPass = (opts::GolangVersion)I; + return 0; + } + } + + outs() << "BOLT-INFO: The binary version is: " + << std::string(BinaryVersion, MaxVerLen) << "\n"; + return failed(); + } + + return 0; +} + +void GolangPass::runOnFunctions(BinaryContext &BC) { + int Ret; + +#define CALL_STAGE(func) \ + Ret = func(BC); \ + if (Ret < 0) { \ + errs() << "BOLT-ERROR: Golang " << #func << " stage failed!\n"; \ + exit(1); \ + } + + CALL_STAGE(updateBBSizes); + + CALL_STAGE(typelinksPass); + + CALL_STAGE(pclntabPass); + + CALL_STAGE(findFuncTabPass); + + CALL_STAGE(textsectmapPass); + + CALL_STAGE(FirstModule->patch); + +#undef CALL_STAGE +} + +} // end namespace bolt +} // end namespace llvm diff --git a/bolt/lib/Passes/IdenticalCodeFolding.cpp b/bolt/lib/Passes/IdenticalCodeFolding.cpp --- a/bolt/lib/Passes/IdenticalCodeFolding.cpp +++ b/bolt/lib/Passes/IdenticalCodeFolding.cpp @@ -12,6 +12,7 @@ #include "bolt/Passes/IdenticalCodeFolding.h" #include "bolt/Core/ParallelUtilities.h" +#include "bolt/Utils/CommandLineOpts.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/Timer.h" @@ -403,6 +404,25 @@ namespace llvm { namespace bolt { +bool IdenticalCodeFolding::shouldOptimizeICF(const BinaryFunction &BF) const { + if (BF.hasUnknownControlFlow()) + return false; + if (BF.isFolded()) + return false; + if (BF.hasSDTMarker()) + return false; + + if (opts::GolangPass != opts::GV_NONE) { + if (!BF.isGolang()) + return false; + + if (BF.getIndex() == GO_FIRST_BF_INDEX || BF.getIndex() == GO_LAST_BF_INDEX) + return false; + } + + return BinaryFunctionPass::shouldOptimize(BF); +} + void IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) { const size_t OriginalFunctionCount = BC.getBinaryFunctions().size(); uint64_t NumFunctionsFolded = 0; @@ -444,7 +464,7 @@ "ICF breakdown", opts::TimeICF); for (auto &BFI : BC.getBinaryFunctions()) { BinaryFunction &BF = BFI.second; - if (!this->shouldOptimize(BF)) + if (!shouldOptimizeICF(BF)) continue; CongruentBuckets[&BF].emplace(&BF); } diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp --- a/bolt/lib/Passes/Inliner.cpp +++ b/bolt/lib/Passes/Inliner.cpp @@ -107,12 +107,10 @@ cl::Hidden, cl::cat(BoltOptCategory)); -static cl::opt -NoInline("no-inline", - cl::desc("disable all inlining (overrides other inlining options)"), - cl::init(false), - cl::ZeroOrMore, - cl::cat(BoltOptCategory)); +cl::opt NoInline( + "no-inline", + cl::desc("disable all inlining (overrides other inlining options)"), + cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory)); /// This function returns true if any of inlining options are specified and the /// inlining pass should be executed. Whenever a new inlining option is added, diff --git a/bolt/lib/Passes/Instrumentation.cpp b/bolt/lib/Passes/Instrumentation.cpp --- a/bolt/lib/Passes/Instrumentation.cpp +++ b/bolt/lib/Passes/Instrumentation.cpp @@ -187,7 +187,9 @@ BinaryBasicBlock::iterator insertInstructions(InstructionListType &Instrs, BinaryBasicBlock &BB, BinaryBasicBlock::iterator Iter) { + BinaryContext &BC = BB.getFunction()->getBinaryContext(); for (MCInst &NewInst : Instrs) { + BC.MIB->getOrCreateAnnotationAs(NewInst, "IsInstrumentation") = true; Iter = BB.insertInstruction(Iter, NewInst); ++Iter; } @@ -369,6 +371,14 @@ } } + // If we have a suspicion that the function was written in ASM + // lets treat it as a leaf function since the code there might + // use stack lower than a function frame and we can wipe saved + // values on stack by instrumenting it + // TODO Track SP access by the function to determine such cases + if (Function.isAsm()) + IsLeafFunction = true; + for (auto BBI = Function.begin(), BBE = Function.end(); BBI != BBE; ++BBI) { BinaryBasicBlock &BB = *BBI; bool HasUnconditionalBranch = false; @@ -512,6 +522,8 @@ if (!BC.isX86()) return; + BC.MIB->getOrCreateAnnotationIndex("IsInstrumentation"); + const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/false, /*IsText=*/false, /*IsAllocatable=*/true); diff --git a/bolt/lib/Passes/LongJmp.cpp b/bolt/lib/Passes/LongJmp.cpp --- a/bolt/lib/Passes/LongJmp.cpp +++ b/bolt/lib/Passes/LongJmp.cpp @@ -37,6 +37,13 @@ namespace { constexpr unsigned ColdFragAlign = 16; +inline uint64_t getInstructionSize(const BinaryContext &BC, + const MCInst &Inst) { + if (BC.isAArch64()) + return 4; + return BC.computeInstructionSize(Inst); +} + void relaxStubToShortJmp(BinaryBasicBlock &StubBB, const MCSymbol *Tgt) { const BinaryContext &BC = StubBB.getFunction()->getBinaryContext(); InstructionListType Seq; @@ -67,6 +74,12 @@ } bool shouldInsertStub(const BinaryContext &BC, const MCInst &Inst) { + // We are not insterested in calls relaxation for X86. Also it has a problem + // with inserted functions e.g. instrumentation indirect calls since we + // did not update layout for them + if (BC.isX86()) + return BC.MIB->isBranch(Inst) && !BC.MIB->isIndirectBranch(Inst); + return (BC.MIB->isBranch(Inst) || BC.MIB->isCall(Inst)) && !BC.MIB->isIndirectBranch(Inst) && !BC.MIB->isIndirectCall(Inst); } @@ -144,9 +157,9 @@ } int BitsAvail = BC.MIB->getPCRelEncodingSize(Inst) - 1; uint64_t Mask = ~((1ULL << BitsAvail) - 1); - uint64_t PCRelTgtAddress = Cand->first; - PCRelTgtAddress = DotAddress > PCRelTgtAddress ? DotAddress - PCRelTgtAddress - : PCRelTgtAddress - DotAddress; + uint64_t InstSize = getInstructionSize(BC, Inst); + uint64_t PCRelTgtAddress = + getTargetOffset(BC, InstSize, Cand->first, DotAddress); LLVM_DEBUG({ if (Candidates.size() > 1) dbgs() << "Considering stub group with " << Candidates.size() @@ -276,7 +289,6 @@ } void LongJmpPass::tentativeBBLayout(const BinaryFunction &Func) { - const BinaryContext &BC = Func.getBinaryContext(); uint64_t HotDot = HotAddresses[&Func]; uint64_t ColdDot = ColdAddresses[&Func]; bool Cold = false; @@ -284,10 +296,10 @@ if (Cold || BB->isCold()) { Cold = true; BBAddresses[BB] = ColdDot; - ColdDot += BC.computeCodeSize(BB->begin(), BB->end()); + ColdDot += BB->estimateSize(); } else { BBAddresses[BB] = HotDot; - HotDot += BC.computeCodeSize(BB->begin(), BB->end()); + HotDot += BB->estimateSize(); } } } @@ -300,9 +312,11 @@ if (!Func->isSplit()) continue; DotAddress = alignTo(DotAddress, BinaryFunction::MinAlign); - uint64_t Pad = - offsetToAlignment(DotAddress, llvm::Align(Func->getAlignment())); - if (Pad <= Func->getMaxColdAlignmentBytes()) + unsigned Alignment, MaxAlignment; + std::tie(Alignment, MaxAlignment) = + BC.getBFAlignment(*Func, /*EmitColdPart*/ true); + uint64_t Pad = offsetToAlignment(DotAddress, llvm::Align(Alignment)); + if (Pad <= MaxAlignment) DotAddress += Pad; ColdAddresses[Func] = DotAddress; LLVM_DEBUG(dbgs() << Func->getPrintName() << " cold tentative: " @@ -357,11 +371,12 @@ if (opts::HotFunctionsAtEnd) DotAddress = alignTo(DotAddress, opts::AlignText); } - DotAddress = alignTo(DotAddress, BinaryFunction::MinAlign); - uint64_t Pad = - offsetToAlignment(DotAddress, llvm::Align(Func->getAlignment())); - if (Pad <= Func->getMaxAlignmentBytes()) + unsigned Alignment, MaxAlignment; + std::tie(Alignment, MaxAlignment) = + BC.getBFAlignment(*Func, /*EmitColdPart*/ false); + uint64_t Pad = offsetToAlignment(DotAddress, llvm::Align(Alignment)); + if (Pad <= MaxAlignment) DotAddress += Pad; HotAddresses[Func] = DotAddress; LLVM_DEBUG(dbgs() << Func->getPrintName() << " tentative: " @@ -455,6 +470,7 @@ bool LongJmpPass::relaxStub(BinaryBasicBlock &StubBB) { const BinaryFunction &Func = *StubBB.getFunction(); const BinaryContext &BC = Func.getBinaryContext(); + assert(BC.isAArch64() && "Unsupported arch"); const int Bits = StubBits[&StubBB]; // Already working with the largest range? if (Bits == static_cast(BC.AsmInfo->getCodePointerSize() * 8)) @@ -467,11 +483,11 @@ ~((1ULL << (RangeSingleInstr - 1)) - 1); const MCSymbol *RealTargetSym = BC.MIB->getTargetSymbol(*StubBB.begin()); - const BinaryBasicBlock *TgtBB = Func.getBasicBlockForLabel(RealTargetSym); - uint64_t TgtAddress = getSymbolAddress(BC, RealTargetSym, TgtBB); uint64_t DotAddress = BBAddresses[&StubBB]; - uint64_t PCRelTgtAddress = DotAddress > TgtAddress ? DotAddress - TgtAddress - : TgtAddress - DotAddress; + const uint64_t InstSize = 4; + uint64_t PCRelTgtAddress = + getTargetOffset(Func, InstSize, RealTargetSym, DotAddress); + // If it fits in one instruction, do not relax if (!(PCRelTgtAddress & SingleInstrMask)) return false; @@ -505,44 +521,68 @@ return true; } -bool LongJmpPass::needsStub(const BinaryBasicBlock &BB, const MCInst &Inst, - uint64_t DotAddress) const { - const BinaryFunction &Func = *BB.getFunction(); - const BinaryContext &BC = Func.getBinaryContext(); - const MCSymbol *TgtSym = BC.MIB->getTargetSymbol(Inst); - assert(TgtSym && "getTargetSymbol failed"); +uint64_t LongJmpPass::getTargetOffset(const BinaryContext &BC, + uint64_t InstSize, uint64_t TargetAddress, + uint64_t DotAddress) const { + uint64_t PCRelTgtAddress; + PCRelTgtAddress = DotAddress > TargetAddress ? DotAddress - TargetAddress + : TargetAddress - DotAddress; + if (BC.isX86() && TargetAddress < DotAddress) { + // For x86 we need to take into account instruction size + PCRelTgtAddress += InstSize; + } + + return PCRelTgtAddress; +} +uint64_t LongJmpPass::getTargetOffset(const BinaryFunction &Func, + uint64_t InstSize, const MCSymbol *TgtSym, + uint64_t DotAddress) const { + const BinaryContext &BC = Func.getBinaryContext(); const BinaryBasicBlock *TgtBB = Func.getBasicBlockForLabel(TgtSym); // Check for shared stubs from foreign functions if (!TgtBB) { auto SSIter = SharedStubs.find(TgtSym); - if (SSIter != SharedStubs.end()) + if (SSIter != SharedStubs.end()) { TgtBB = SSIter->second; + } } - int BitsAvail = BC.MIB->getPCRelEncodingSize(Inst) - 1; - uint64_t Mask = ~((1ULL << BitsAvail) - 1); + uint64_t Address = getSymbolAddress(BC, TgtSym, TgtBB); + return getTargetOffset(BC, InstSize, Address, DotAddress); +} - uint64_t PCRelTgtAddress = getSymbolAddress(BC, TgtSym, TgtBB); - PCRelTgtAddress = DotAddress > PCRelTgtAddress ? DotAddress - PCRelTgtAddress - : PCRelTgtAddress - DotAddress; +uint64_t LongJmpPass::getTargetOffset(const BinaryFunction &Func, + const MCInst &Inst, + uint64_t DotAddress) const { + const BinaryContext &BC = Func.getBinaryContext(); + const MCSymbol *TgtSym = BC.MIB->getTargetSymbol(Inst); + assert(TgtSym && "getTargetSymbol failed"); + uint64_t InstSize = getInstructionSize(BC, Inst); + return getTargetOffset(Func, InstSize, TgtSym, DotAddress); +} - return PCRelTgtAddress & Mask; +bool LongJmpPass::needsStub(const BinaryBasicBlock &BB, const MCInst &Inst, + uint64_t DotAddress) const { + const BinaryFunction &Func = *BB.getFunction(); + const BinaryContext &BC = Func.getBinaryContext(); + uint64_t PCRelTgtAddress = getTargetOffset(Func, Inst, DotAddress); + int BitsAvail = BC.MIB->getPCRelEncodingSize(Inst) - 1; + uint64_t Mask = ~((1ULL << BitsAvail) - 1); + return !!(PCRelTgtAddress & Mask); } -bool LongJmpPass::relax(BinaryFunction &Func) { +LongJmpPass::RelaxRet LongJmpPass::relax(BinaryFunction &Func) { const BinaryContext &BC = Func.getBinaryContext(); - bool Modified = false; + RelaxRet Modified = RelaxRet::NotModified; - assert(BC.isAArch64() && "Unsupported arch"); - constexpr int InsnSize = 4; // AArch64 std::vector>> Insertions; BinaryBasicBlock *Frontier = getBBAtHotColdSplitPoint(Func); uint64_t FrontierAddress = Frontier ? BBAddresses[Frontier] : 0; if (FrontierAddress) - FrontierAddress += Frontier->getNumNonPseudos() * InsnSize; + FrontierAddress += Frontier->estimateSize(); // Add necessary stubs for branch targets we know we can't fit in the // instruction @@ -552,21 +592,33 @@ if (Stubs[&Func].count(&BB)) continue; - for (MCInst &Inst : BB) { + for (auto II = BB.begin(); II != BB.end(); ++II) { + MCInst &Inst = *II; if (BC.MIB->isPseudo(Inst)) continue; + uint64_t InstSize = getInstructionSize(BC, Inst); if (!shouldInsertStub(BC, Inst)) { - DotAddress += InsnSize; + DotAddress += InstSize; continue; } // Check and relax direct branch or call if (!needsStub(BB, Inst, DotAddress)) { - DotAddress += InsnSize; + DotAddress += InstSize; + continue; + } + + Modified = + static_cast(static_cast(Modified) | + static_cast(RelaxRet::InstrRelaxed)); + + // Try to relax instruction in-place, we will check if it fits + // on the next round + if (BC.MIB->relaxInstruction(Inst)) { + DotAddress += getInstructionSize(BC, Inst); continue; } - Modified = true; // Insert stubs close to the patched BB if call, but far away from the // hot path if a branch, since this branch target is the cold region @@ -578,16 +630,22 @@ uint64_t Mask = ~((1ULL << BitsAvail) - 1); assert(FrontierAddress > DotAddress && "Hot code should be before the frontier"); - uint64_t PCRelTgt = FrontierAddress - DotAddress; + uint64_t PCRelTgt = + getTargetOffset(BC, InstSize, FrontierAddress, DotAddress); if (!(PCRelTgt & Mask)) InsertionPoint = Frontier; } + // Always put stubs at the end of the function if non-simple. We can't // change the layout of non-simple functions because it has jump tables // that we do not control. if (!Func.isSimple()) InsertionPoint = &*std::prev(Func.end()); + Modified = + static_cast(static_cast(Modified) | + static_cast(RelaxRet::StubsInserted)); + // Create a stub to handle a far-away target Insertions.emplace_back(InsertionPoint, replaceTargetWithStub(BB, Inst, DotAddress, @@ -595,7 +653,7 @@ ? FrontierAddress : DotAddress)); - DotAddress += InsnSize; + DotAddress += InstSize; } } @@ -604,7 +662,10 @@ if (!Stubs[&Func].count(&BB) || !BB.isValid()) continue; - Modified |= relaxStub(BB); + if (relaxStub(BB)) + Modified = + static_cast(static_cast(Modified) | + static_cast(RelaxRet::InstrRelaxed)); } for (std::pair> &Elmt : @@ -625,18 +686,19 @@ bool Modified; uint32_t Iterations = 0; do { - ++Iterations; Modified = false; + ++Iterations; tentativeLayout(BC, Sorted); updateStubGroups(); for (BinaryFunction *Func : Sorted) { - if (relax(*Func)) { - // Don't ruin non-simple functions, they can't afford to have the layout - // changed. - if (Func->isSimple()) - Func->fixBranches(); + RelaxRet Ret = relax(*Func); + if (Ret != RelaxRet::NotModified) Modified = true; - } + // Don't ruin non-simple functions, they can't afford to have the layout + // changed. Also if we don't inserted stubs we don't have to run + // fixBranches and it could lead to the shortening relaxed instructions. + if (static_cast(Ret) & RelaxRet::StubsInserted && Func->isSimple()) + Func->fixBranches(); } } while (Modified); outs() << "BOLT-INFO: Inserted " << NumHotStubs diff --git a/bolt/lib/Passes/ReorderFunctions.cpp b/bolt/lib/Passes/ReorderFunctions.cpp --- a/bolt/lib/Passes/ReorderFunctions.cpp +++ b/bolt/lib/Passes/ReorderFunctions.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "bolt/Passes/ReorderFunctions.h" +#include "bolt/Passes/Golang.h" #include "bolt/Passes/HFSort.h" #include "llvm/Support/CommandLine.h" #include @@ -18,6 +19,7 @@ #define DEBUG_TYPE "hfsort" using namespace llvm; +using namespace bolt; namespace opts { @@ -123,7 +125,7 @@ std::map &BFs) { std::vector FuncAddr(Cg.numNodes()); // Just for computing stats uint64_t TotalSize = 0; - uint32_t Index = 0; + uint32_t Index = FIRST_BF_INDEX; // Set order of hot functions based on clusters. for (const Cluster &Cluster : Clusters) { @@ -290,7 +292,7 @@ case RT_EXEC_COUNT: { std::vector SortedFunctions(BFs.size()); - uint32_t Index = 0; + uint32_t Index = FIRST_BF_INDEX; std::transform(BFs.begin(), BFs.end(), SortedFunctions.begin(), @@ -384,6 +386,27 @@ reorder(std::move(Clusters), BFs); + if (opts::GolangPass != opts::GV_NONE && opts::ReorderFunctions != RT_USER) { + // | 0: runtime.text | + // | sorted functions | + // | -3: unsorted functions | + // | -2: runtime.etext | + // | -1: injected functions(including startup) | + + for (auto &I : BFs) { + auto BF = &I.second; + if (BF->hasRestoredNameRegex(GolangPass::getFirstBFName())) { + BF->setIndex(GO_FIRST_BF_INDEX); + } else if (BF->hasRestoredNameRegex(GolangPass::getLastBFName())) { + BF->setIndex(GO_LAST_BF_INDEX); + } else if (!BF->isGolang()) { + BF->setIndex(INVALID_BF_INDEX); + } else if (!BF->hasValidIndex()) { + BF->setIndex(GO_UNUSED_BF_INDEX); + } + } + } + std::unique_ptr FuncsFile; if (!opts::GenerateFunctionOrderFile.empty()) { FuncsFile = std::make_unique(opts::GenerateFunctionOrderFile, diff --git a/bolt/lib/Passes/SplitFunctions.cpp b/bolt/lib/Passes/SplitFunctions.cpp --- a/bolt/lib/Passes/SplitFunctions.cpp +++ b/bolt/lib/Passes/SplitFunctions.cpp @@ -45,21 +45,18 @@ cl::Hidden, cl::cat(BoltOptCategory)); -static cl::opt -SplitFunctions("split-functions", - cl::desc("split functions into hot and cold regions"), - cl::init(SplitFunctions::ST_NONE), - cl::values(clEnumValN(SplitFunctions::ST_NONE, "0", - "do not split any function"), - clEnumValN(SplitFunctions::ST_LARGE, "1", - "in non-relocation mode only split functions too large " - "to fit into original code space"), - clEnumValN(SplitFunctions::ST_LARGE, "2", - "same as 1 (backwards compatibility)"), - clEnumValN(SplitFunctions::ST_ALL, "3", - "split all functions")), - cl::ZeroOrMore, - cl::cat(BoltOptCategory)); +cl::opt SplitFunctions( + "split-functions", cl::desc("split functions into hot and cold regions"), + cl::init(SplitFunctions::ST_NONE), + cl::values( + clEnumValN(SplitFunctions::ST_NONE, "0", "do not split any function"), + clEnumValN(SplitFunctions::ST_LARGE, "1", + "in non-relocation mode only split functions too large " + "to fit into original code space"), + clEnumValN(SplitFunctions::ST_LARGE, "2", + "same as 1 (backwards compatibility)"), + clEnumValN(SplitFunctions::ST_ALL, "3", "split all functions")), + cl::ZeroOrMore, cl::cat(BoltOptCategory)); static cl::opt SplitThreshold("split-threshold", diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -13,6 +13,7 @@ #include "bolt/Passes/AsmDump.h" #include "bolt/Passes/CMOVConversion.h" #include "bolt/Passes/FrameOptimizer.h" +#include "bolt/Passes/Golang.h" #include "bolt/Passes/IdenticalCodeFolding.h" #include "bolt/Passes/IndirectCallPromotion.h" #include "bolt/Passes/Inliner.h" @@ -223,10 +224,10 @@ cl::desc("turn on the stoke analysis"), cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory)); -static cl::opt -StringOps("inline-memcpy", - cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"), - cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory)); +cl::opt StringOps( + "inline-memcpy", + cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"), + cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory)); static cl::opt StripRepRet("strip-rep-ret", @@ -253,6 +254,11 @@ cl::ZeroOrMore, cl::ReallyHidden, cl::cat(BoltOptCategory)); +cl::opt InstructionsLowering("lower-instructions", + cl::desc("Instructions lowering pass"), + cl::init(true), cl::ZeroOrMore, cl::Hidden, + cl::cat(BoltCategory)); + } // namespace opts namespace llvm { @@ -327,9 +333,6 @@ Manager.registerPass(std::make_unique(), opts::AsmDump.getNumOccurrences()); - if (opts::Instrument) - Manager.registerPass(std::make_unique(NeverPrint)); - // Here we manage dependencies/order manually, since passes are run in the // order they're registered. @@ -339,6 +342,12 @@ if (opts::PrintProfileStats) Manager.registerPass(std::make_unique(NeverPrint)); + if (opts::GolangPass != opts::GV_NONE) + Manager.registerPass(std::make_unique(BC)); + + if (opts::Instrument) + Manager.registerPass(std::make_unique(NeverPrint)); + Manager.registerPass(std::make_unique(NeverPrint)); Manager.registerPass(std::make_unique(NeverPrint)); @@ -444,9 +453,13 @@ // memory profiling data. Manager.registerPass(std::make_unique()); - if (BC.isAArch64()) { + if (BC.isAArch64()) Manager.registerPass(std::make_unique()); + if (opts::GolangPass != opts::GV_NONE) + Manager.registerPass(std::make_unique(BC)); + + if (BC.isAArch64() || opts::GolangPass != opts::GV_NONE) { // Tighten branches according to offset differences between branch and // targets. No extra instructions after this pass, otherwise we may have // relocations out of range and crash during linking. @@ -478,7 +491,8 @@ // function reordering. It's unsafe to use any CFG or instruction analysis // after this point. Manager.registerPass( - std::make_unique(PrintAfterLowering)); + std::make_unique(PrintAfterLowering), + opts::InstructionsLowering); // In non-relocation mode, mark functions that do not fit into their original // space as non-simple if we have to (e.g. for correct debug info update). @@ -486,6 +500,9 @@ if (!BC.HasRelocations) Manager.registerPass(std::make_unique(NeverPrint)); + if (opts::GolangPass != opts::GV_NONE) + Manager.registerPass(std::make_unique(BC)); + Manager.registerPass(std::make_unique(NeverPrint)); Manager.runPasses(); diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -15,8 +15,10 @@ #include "bolt/Core/MCPlusBuilder.h" #include "bolt/Core/ParallelUtilities.h" #include "bolt/Core/Relocation.h" +#include "bolt/Passes/BinaryPasses.h" #include "bolt/Passes/CacheMetrics.h" #include "bolt/Passes/ReorderFunctions.h" +#include "bolt/Passes/SplitFunctions.h" #include "bolt/Profile/BoltAddressTranslation.h" #include "bolt/Profile/DataAggregator.h" #include "bolt/Profile/DataReader.h" @@ -79,6 +81,15 @@ extern cl::list ReorderData; extern cl::opt ReorderFunctions; extern cl::opt TimeBuild; +extern cl::opt PreserveBlocksAlignment; +extern cl::opt AlignBlocks; +extern cl::opt NoInline; +extern cl::opt StringOps; +extern cl::opt FrameOptimization; +extern cl::opt SplitFunctions; +extern llvm::cl::opt InsertRetpolines; +extern cl::opt InstructionsLowering; +extern cl::list Peepholes; static cl::opt ForceToDataRelocations("force-data-relocations", @@ -1730,6 +1741,59 @@ opts::HotTextMoveSections.addValue(".never_hugify"); } + if ((BC->isAArch64() || opts::GolangPass != opts::GV_NONE) && + (opts::AlignBlocks || opts::PreserveBlocksAlignment)) { + errs() << "BOLT-WARNING: Disabling block alignment\n"; + opts::AlignBlocks = false; + opts::PreserveBlocksAlignment = false; + } + + if (opts::GolangPass != opts::GV_NONE) { + // Golang does not support inlining + opts::NoInline = true; + opts::StringOps = false; + + // Instructions should not be lowered for golang + opts::InstructionsLowering = false; + + // Enable double jump elimination. Currently golang uses them heavely and + // this option is needed for preserving pcsp tables values correctly. + opts::Peepholes.push_back(Peepholes::PEEP_DOUBLE_JUMPS); + + if (opts::FrameOptimization != FOP_NONE) { + errs() << "BOLT-WARNING: Golang does not support frame optimizations\n"; + opts::FrameOptimization = FOP_NONE; + } + + if (opts::SplitFunctions != SplitFunctions::ST_NONE) { + errs() << "BOLT-WARNING: Golang does not support function splitting\n"; + opts::SplitFunctions = SplitFunctions::ST_NONE; + } + + if (opts::UseOldText) { + errs() << "BOLT-WARNING: Cannot combine -use-old-text and -golang\n"; + opts::UseOldText = false; + } + + if (opts::Lite) { + errs() << "BOLT-WARNING: Lite mode is not compatible with -golang. " + "Disabling.\n"; + opts::Lite = false; + } + + if (opts::HotFunctionsAtEnd) { + errs() << "BOLT-WARNING: Golang does not support hot functions at end. " + "Disabling.\n"; + opts::HotFunctionsAtEnd = false; + } + + if (opts::InsertRetpolines) { + errs() << "BOLT-WARNING: Retpoline pass is not compatible with -golang. " + "Disabling.\n"; + opts::InsertRetpolines = false; + } + } + if (opts::UseOldText && !BC->OldTextSectionAddress) { errs() << "BOLT-WARNING: cannot use old .text as the section was not found" "\n"; @@ -1747,7 +1811,7 @@ opts::AlignText = (unsigned)opts::AlignFunctions; if (BC->isX86() && opts::Lite.getNumOccurrences() == 0 && !opts::StrictMode && - !opts::UseOldText) + !opts::UseOldText && opts::GolangPass == opts::GV_NONE) opts::Lite = true; if (opts::Lite && opts::UseOldText) { @@ -2853,6 +2917,10 @@ void RewriteInstance::disassembleFunctions() { NamedRegionTimer T("disassembleFunctions", "disassemble functions", TimerGroupName, TimerGroupDesc, opts::TimeRewrite); + // Create annotation indices to allow lock-free execution + BC->MIB->getOrCreateAnnotationIndex("Size"); + BC->MIB->getOrCreateAnnotationIndex("Locked"); + for (auto &BFI : BC->getBinaryFunctions()) { BinaryFunction &Function = BFI.second; @@ -3658,6 +3726,9 @@ if (!AllocationDone) NextAvailableAddress = allocateAt(NextAvailableAddress); + NextAvailableAddress = alignTo(NextAvailableAddress, BC->RegularPageSize); + NewExtraSegmentAddress = NextAvailableAddress; + // Do the mapping for ORC layer based on the allocation. for (BinarySection *Section : CodeSections) { LLVM_DEBUG( @@ -3780,6 +3851,10 @@ std::vector Sections = { ".eh_frame", Twine(getOrgSecPrefix(), ".eh_frame").str(), ".gcc_except_table", ".rodata", ".rodata.cold"}; + + Sections.reserve(Sections.size() + BC->ExtraSectionsNames.size()); + Sections.insert(Sections.end(), BC->ExtraSectionsNames.begin(), + BC->ExtraSectionsNames.end()); if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) RtLibrary->addRuntimeLibSections(Sections); @@ -3881,12 +3956,22 @@ // Writing new pheader table. Phnum += 1; // only adding one new segment // Segment size includes the size of the PHDR area. - NewTextSegmentSize = NextAvailableAddress - PHDRTableAddress; + if (BC->ExtraSectionsNames.size()) { + Phnum += 1; + NewTextSegmentSize = NewExtraSegmentAddress - PHDRTableAddress; + NewExtraSegmentSize = NextAvailableAddress - NewExtraSegmentAddress; + } else { + NewTextSegmentSize = NextAvailableAddress - PHDRTableAddress; + } } else { assert(!PHDRTableAddress && "unexpected address for program header table"); // Update existing table. PHDRTableOffset = Obj.getHeader().e_phoff; NewTextSegmentSize = NextAvailableAddress - NewTextSegmentAddress; + if (BC->ExtraSectionsNames.size()) { + errs() << "Could not use additional sections with UseGnuStack option\n"; + exit(1); + } } OS.seek(PHDRTableOffset); @@ -3919,6 +4004,28 @@ return NewPhdr; }; + auto createNewExtraSectionsPhdr = [&](unsigned ELFFlags) { + ELF64LEPhdrTy NewPhdr; + NewPhdr.p_type = ELF::PT_LOAD; + NewPhdr.p_offset = getFileOffsetForAddress(NewExtraSegmentAddress); + NewPhdr.p_vaddr = NewExtraSegmentAddress; + NewPhdr.p_paddr = NewExtraSegmentAddress; + NewPhdr.p_filesz = NewExtraSegmentSize; + NewPhdr.p_memsz = NewExtraSegmentSize; + NewPhdr.p_align = BC->RegularPageSize; + + NewPhdr.p_flags = ELF::PF_R; + if (ELFFlags & ELF::SHF_WRITE) + NewPhdr.p_flags |= ELF::PF_W; + + // FIXME: Currently instrumentation is experimental and the runtime data + // is emitted with code, thus everything needs to be writable + if (ELFFlags & ELF::SHF_EXECINSTR || opts::Instrument) + NewPhdr.p_flags |= ELF::PF_X; + + return NewPhdr; + }; + // Copy existing program headers with modifications. for (const ELF64LE::Phdr &Phdr : cantFail(Obj.program_headers())) { ELF64LE::Phdr NewPhdr = Phdr; @@ -3942,16 +4049,30 @@ } else if (opts::UseGnuStack && Phdr.p_type == ELF::PT_GNU_STACK) { NewPhdr = createNewTextPhdr(); ModdedGnuStack = true; - } else if (!opts::UseGnuStack && Phdr.p_type == ELF::PT_DYNAMIC) { - // Insert the new header before DYNAMIC. - ELF64LE::Phdr NewTextPhdr = createNewTextPhdr(); - OS.write(reinterpret_cast(&NewTextPhdr), - sizeof(NewTextPhdr)); - AddedSegment = true; } + OS.write(reinterpret_cast(&NewPhdr), sizeof(NewPhdr)); } + if (!opts::UseGnuStack) { + ELF64LEPhdrTy NewTextPhdr = createNewTextPhdr(); + OS.write(reinterpret_cast(&NewTextPhdr), sizeof(NewTextPhdr)); + if (BC->ExtraSectionsNames.size()) { + unsigned ELFFlags = 0; + for (const auto &SectionName : BC->ExtraSectionsNames) { + ErrorOr Section = + BC->getUniqueSectionByName(SectionName); + ELFFlags |= Section->getELFFlags(); + } + + ELF64LEPhdrTy NewExtraPhdr = createNewExtraSectionsPhdr(ELFFlags); + OS.write(reinterpret_cast(&NewExtraPhdr), + sizeof(NewExtraPhdr)); + } + + AddedSegment = true; + } + if (!opts::UseGnuStack && !AddedSegment) { // Append the new header to the end of the table. ELF64LE::Phdr NewTextPhdr = createNewTextPhdr(); @@ -4640,9 +4761,10 @@ : Function->getCodeSection()->getIndex(); } else { // Check if the symbol belongs to moved data object and update it. - BinaryData *BD = opts::ReorderData.empty() - ? nullptr - : BC->getBinaryDataAtAddress(Symbol.st_value); + BinaryData *BD = + opts::ForceToDataRelocations || !opts::ReorderData.empty() + ? BC->getBinaryDataAtAddress(Symbol.st_value) + : nullptr; if (BD && BD->isMoved() && !BD->isJumpTable()) { assert((!BD->getSize() || !Symbol.st_size || Symbol.st_size == BD->getSize()) && @@ -4657,6 +4779,7 @@ << " (" << OutputSection.getIndex() << ")\n"); NewSymbol.st_shndx = OutputSection.getIndex(); NewSymbol.st_value = BD->getOutputAddress(); + NewSymbol.st_size = BD->getOutputSize(); } else { // Otherwise just update the section for the symbol. if (Symbol.st_shndx < ELF::SHN_LORESERVE) @@ -4776,6 +4899,23 @@ addSymbol("__hot_data_end"); } + auto addSectionSymbol = [&](uint64_t Address, unsigned index) { + ELFSymTy Symbol; + Symbol.st_value = Address; + Symbol.st_shndx = index; + Symbol.st_name = AddToStrTab(""); + Symbol.st_size = 0; + Symbol.st_other = 0; + Symbol.setBindingAndType(ELF::STB_LOCAL, ELF::STT_SECTION); + + Symbols.insert(Symbols.begin() + index, Symbol); + }; + + for (const auto &SectionName : BC->ExtraSectionsNames) { + ErrorOr section = BC->getUniqueSectionByName(SectionName); + addSectionSymbol(section->getOutputAddress(), section->getIndex()); + } + // Put local symbols at the beginning. std::stable_sort(Symbols.begin(), Symbols.end(), [](const ELFSymTy &A, const ELFSymTy &B) { @@ -4938,12 +5078,18 @@ uint64_t SectionAddress = Section.getOutputAddress(); SectionAddress = SectionAddress == 0 ? Section.getAddress() : SectionAddress; - MCSymbol *Symbol = Rel.Symbol; uint32_t SymbolIdx = 0; uint64_t Addend = Rel.Addend; if (Rel.Symbol) { - SymbolIdx = getOutputDynamicSymbolIndex(Symbol); + if (!IsRelative && !Rel.isIRelative()) { + SymbolIdx = getOutputDynamicSymbolIndex(Rel.Symbol); + } else { + // The R_*_(I)RELATIVE relocation inserted by BOLT + ErrorOr Address = BC->getSymbolValue(*Rel.Symbol); + if (Address) + Addend += getNewFunctionOrDataAddress(*Address); + } } else { // Usually this case is used for R_*_(I)RELATIVE relocations const uint64_t Address = getNewFunctionOrDataAddress(Addend); diff --git a/bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp b/bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp --- a/bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp +++ b/bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp @@ -57,7 +57,8 @@ "the input binary\n"; exit(1); } - if (!BC.FiniFunctionAddress && !BC.IsStaticExecutable) { + if (!BC.FiniFunctionAddress && !BC.IsStaticExecutable && + opts::GolangPass == opts::GV_NONE) { errs() << "BOLT-ERROR: input binary lacks DT_FINI entry in the dynamic " "section but instrumentation currently relies on patching " "DT_FINI to write the profile\n"; @@ -180,6 +181,8 @@ emitString("__bolt_instr_filename", opts::InstrumentationFilename); emitString("__bolt_instr_binpath", opts::InstrumentationBinpath); emitIntValue("__bolt_instr_use_pid", !!opts::InstrumentationFileAppendPID, 1); + emitValue(BC.Ctx->getOrCreateSymbol("__bolt_trampoline_instr_fini_call"), + nullptr); if (BC.isMachO()) { MCSection *TablesSection = BC.Ctx->getMachOSection( diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -879,6 +879,11 @@ return true; } + bool relaxInstruction(MCInst &Inst) const override { + (void)Inst; + return false; + } + bool lowerTailCall(MCInst &Inst) override { removeAnnotation(Inst, MCPlus::MCAnnotation::kTailCall); if (getConditionalTailCall(Inst)) diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp --- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp +++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp @@ -60,6 +60,21 @@ } } +unsigned getLongBranchOpcode(unsigned Opcode) { + switch (Opcode) { + default: + return Opcode; + case X86::JMP_1: + return X86::JMP_4; + case X86::JMP_2: + return X86::JMP_4; + case X86::JCC_1: + return X86::JCC_4; + case X86::JCC_2: + return X86::JCC_4; + } +} + unsigned getShortArithOpcode(unsigned Opcode) { return X86::getShortOpcodeArith(Opcode); } @@ -1224,6 +1239,42 @@ } } + int getStackAdjustment(const MCInst &Inst) const override { + int Ret; + + if ((Ret = getPushSize(Inst))) { + return Ret; + } else if ((Ret = getPopSize(Inst))) { + return -Ret; + } + + const int ValOp = 2; + int Sign = 1; + + switch (Inst.getOpcode()) { + default: + return 0; + case X86::SUB64ri32: + case X86::SUB64ri8: + break; + case X86::ADD64ri32: + case X86::ADD64ri8: + Sign = -1; + break; + } + + const MCInstrDesc &MCII = Info->get(Inst.getOpcode()); + for (int I = 0, E = MCII.getNumDefs(); I != E; ++I) { + const MCOperand &Operand = Inst.getOperand(I); + if (Operand.isReg() && Operand.getReg() == X86::RSP) { + assert(Inst.getOperand(ValOp).isImm() && "unexpected operand"); + return (int)Inst.getOperand(ValOp).getImm() * Sign; + } + } + + return 0; + } + bool isStackAdjustment(const MCInst &Inst) const override { switch (Inst.getOpcode()) { default: @@ -1828,6 +1879,8 @@ if (int64_t(Imm) == int64_t(int32_t(Imm))) NewOpcode = X86::MOV64ri32; } + } else if (isNoop(Inst) && hasAnnotation(Inst, "NOP")) { + NewOpcode = X86::NOOP; } else { // If it's arithmetic instruction check if signed operand fits in 1 byte. const unsigned ShortOpcode = getShortArithOpcode(OldOpcode); @@ -1911,6 +1964,21 @@ return true; } + bool relaxInstruction(MCInst &Inst) const override { + unsigned OldOpcode = Inst.getOpcode(); + unsigned NewOpcode = OldOpcode; + + if (isBranch(Inst) || isTailCall(Inst)) { + NewOpcode = getLongBranchOpcode(OldOpcode); + } + + if (NewOpcode == OldOpcode) + return false; + + Inst.setOpcode(NewOpcode); + return true; + } + bool lowerTailCall(MCInst &Inst) override { if (Inst.getOpcode() == X86::JMP_4 && isTailCall(Inst)) { Inst.setOpcode(X86::JMP_1); @@ -1953,7 +2021,7 @@ --I; // Ignore nops and CFIs - if (isPseudo(*I)) + if (isPseudo(*I) || isNoop(*I)) continue; // Stop when we find the first non-terminator @@ -2501,6 +2569,7 @@ } bool createNoop(MCInst &Inst) const override { + Inst.clear(); Inst.setOpcode(X86::NOOP); return true; } @@ -2893,6 +2962,8 @@ return true; } + int getUncondBranchEncodingSize() const override { return 8; } + bool createCall(MCInst &Inst, const MCSymbol *Target, MCContext *Ctx) override { Inst.setOpcode(X86::CALL64pcrel32); @@ -2901,6 +2972,35 @@ return true; } + int getShortJmpEncodingSize() const override { return 8; } + + int getPCRelEncodingSize(const MCInst &Inst) const override { + switch (Inst.getOpcode()) { + default: + llvm_unreachable("Failed to get pcrel encoding size"); + return 0; + case X86::JMP_1: + case X86::JCC_1: + return 8; + case X86::JMP_2: + case X86::JCC_2: + case X86::JMP16m: + case X86::JMP16r: + return 16; + case X86::JMP_4: + case X86::JCC_4: + case X86::JMP32m: + case X86::JMP32r: + case X86::CALL64pcrel32: + return 32; + case X86::JMP64m: + case X86::JMP64r: + case X86::CALL64m: + case X86::CALL64r: + return 64; + } + } + bool createTailCall(MCInst &Inst, const MCSymbol *Target, MCContext *Ctx) override { return createDirectCall(Inst, Target, Ctx, /*IsTailCall*/ true); @@ -3216,6 +3316,32 @@ Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // AddrSegmentReg } + bool createIndirectCall(MCInst &Inst, const MCSymbol *TargetLocation, + MCContext *Ctx, bool IsTailCall) { + Inst.setOpcode(IsTailCall ? X86::JMP32m : X86::CALL64m); + Inst.addOperand(MCOperand::createReg(X86::RIP)); // BaseReg + Inst.addOperand(MCOperand::createImm(1)); // ScaleAmt + Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // IndexReg + Inst.addOperand(MCOperand::createExpr( // Displacement + MCSymbolRefExpr::create(TargetLocation, MCSymbolRefExpr::VK_None, + *Ctx))); + Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // AddrSegmentReg + if (IsTailCall) + setTailCall(Inst); + return true; + } + + InstructionListType createInstrumentFiniCall(MCSymbol *HandlerFuncAddr, + MCContext *Ctx, + bool IsTailCall) override { + // NOTE: We don't have to check HandlerFuncAddr content for 0 before call + // at runtime since Golang does't call any constructors + std::vector Insts(1); + createIndirectCall(Insts[0], HandlerFuncAddr, Ctx, IsTailCall); + addAnnotation(Insts[0], "IsInstrumentation", true); + return Insts; + } + InstructionListType createInstrumentedIndirectCall(const MCInst &CallInst, bool TailCall, MCSymbol *HandlerFuncAddr, diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp --- a/bolt/lib/Utils/CommandLineOpts.cpp +++ b/bolt/lib/Utils/CommandLineOpts.cpp @@ -205,6 +205,20 @@ cl::cat(BoltCategory), cl::sub(*cl::AllSubCommands)); +cl::opt GolangPass( + "golang", + cl::desc("Use for input binary generated with golang gc compiler"), + cl::init(opts::GV_NONE), + cl::values( + clEnumValN(opts::GV_NONE, "0", "do not use golang optimizations"), + clEnumValN(opts::GV_AUTO, "1", "auto detect golang version"), + clEnumValN(opts::GV_1_14_9, "1.14.9", "set gc version to 1.14.9"), + clEnumValN(opts::GV_1_14_12, "1.14.12", "set gc version to 1.14.12"), + clEnumValN(opts::GV_1_16_5, "1.16.5", "set gc version to 1.16.5"), + clEnumValN(opts::GV_1_17_2, "1.17.2", "set gc version to 1.17.2"), + clEnumValN(opts::GV_1_17_5, "1.17.5", "set gc version to 1.17.5")), + cl::Optional, cl::cat(BoltOptCategory)); + bool processAllFunctions() { if (opts::AggregateOnly) return false; diff --git a/bolt/runtime/instr.cpp b/bolt/runtime/instr.cpp --- a/bolt/runtime/instr.cpp +++ b/bolt/runtime/instr.cpp @@ -106,6 +106,8 @@ // TODO: We need better linking support to make that happen. extern void (*__bolt_ind_call_counter_func_pointer)(); extern void (*__bolt_ind_tailcall_counter_func_pointer)(); +// Function pointer to __bolt_instr_fini +extern void (*__bolt_trampoline_instr_fini_call)(); // Function pointers to init/fini trampoline routines in the binary, so we can // resume regular execution of these functions that we hooked extern void __bolt_start_trampoline(); @@ -1541,6 +1543,7 @@ extern "C" void __bolt_instr_indirect_call(); extern "C" void __bolt_instr_indirect_tailcall(); +extern "C" void __bolt_instr_fini(); /// Initialization code extern "C" void __attribute((force_align_arg_pointer)) __bolt_instr_setup() { @@ -1560,6 +1563,8 @@ __bolt_ind_call_counter_func_pointer = __bolt_instr_indirect_call; __bolt_ind_tailcall_counter_func_pointer = __bolt_instr_indirect_tailcall; + __bolt_trampoline_instr_fini_call = __bolt_instr_fini; + // Conservatively reserve 100MiB shared pages GlobalAlloc.setMaxSize(0x6400000); GlobalAlloc.setShared(true);