Index: include/llvm/CodeGen/AsmPrinter.h =================================================================== --- include/llvm/CodeGen/AsmPrinter.h +++ include/llvm/CodeGen/AsmPrinter.h @@ -99,6 +99,11 @@ /// default, this is equal to CurrentFnSym. MCSymbol *CurrentFnSymForSize; + /// Map global GOT equivalent MCSymbols to GlobalVariables and keep track of + /// its number of uses by other globals. + typedef std::pair GOTEquivUsePair; + DenseMap GlobalGOTEquivs; + private: // The garbage collection metadata printer table. void *GCMetadataPrinters; // Really a DenseMap. @@ -244,6 +249,21 @@ /// \brief Print a general LLVM constant to the .s file. void EmitGlobalConstant(const Constant *CV); + /// \brief Unnamed constant global variables solely contaning a pointer to + /// another globals variable act like a global variable "proxy", or GOT + /// equivalents, i.e., it's only used to hold the address of the latter. One + /// optimization is to replace accesses to these proxies by using the GOT + /// entry for the final global instead. Hence, we select GOT equivalent + /// candidates among all the module global variables, avoid emitting them + /// unnecessarily and finally replace references to them by pc relative + /// accesses to GOT entries. + void computeGlobalGOTEquivs(Module &M); + + /// \brief Constant expressions using GOT equivalent globals may not be + /// eligible for PC relative GOT entry conversion, in such cases we need to + /// emit the proxies we previously omitted in EmitGlobalVariable. + void emitGlobalGOTEquivs(); + //===------------------------------------------------------------------===// // Overridable Hooks //===------------------------------------------------------------------===// Index: include/llvm/Target/TargetLoweringObjectFile.h =================================================================== --- include/llvm/Target/TargetLoweringObjectFile.h +++ include/llvm/Target/TargetLoweringObjectFile.h @@ -41,10 +41,14 @@ const TargetLoweringObjectFile&) LLVM_DELETED_FUNCTION; void operator=(const TargetLoweringObjectFile&) LLVM_DELETED_FUNCTION; +protected: + bool SupportIndirectSymViaGOTPCRel; + public: MCContext &getContext() const { return *Ctx; } - TargetLoweringObjectFile() : MCObjectFileInfo(), Ctx(nullptr), DL(nullptr) {} + TargetLoweringObjectFile() : MCObjectFileInfo(), Ctx(nullptr), DL(nullptr), + SupportIndirectSymViaGOTPCRel(false) {} virtual ~TargetLoweringObjectFile(); @@ -151,6 +155,18 @@ return nullptr; } + /// \brief Target supports replacing a data "PC"-relative access to a symbol + /// through another symbol, by accessing the later via a GOT entry instead? + bool supportIndirectSymViaGOTPCRel() const { + return SupportIndirectSymViaGOTPCRel; + } + + /// \brief Get the target specific PC relative GOT entry relocation + virtual const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym, + int64_t Offset) const { + return nullptr; + } + protected: virtual const MCSection * SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Index: lib/CodeGen/AsmPrinter/AsmPrinter.cpp =================================================================== --- lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -41,6 +41,7 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" @@ -336,6 +337,11 @@ if (EmitSpecialLLVMGlobal(GV)) return; + // Skip the emission of global equivalents. The symbol can be emitted later + // on by emitGlobalGOTEquivs in case it turns out to be needed. + if (GlobalGOTEquivs.count(getSymbol(GV))) + return; + if (isVerbose()) { GV->printAsOperand(OutStreamer.GetCommentOS(), /*PrintType=*/false, GV->getParent()); @@ -892,11 +898,95 @@ OutStreamer.AddBlankLine(); } +/// \brief Compute the number of Global Variables that uses a Constant. +static unsigned getNumGlobalVariableUses(const Constant *C) { + if (!C) + return 0; + + if (isa(C)) + return 1; + + unsigned NumUses = 0; + for (auto *CU : C->users()) + NumUses += getNumGlobalVariableUses(dyn_cast(CU)); + + return NumUses; +} + +/// \brief Only consider global GOT equivalents if at least one user is a +/// cstexpr inside an initializer of another global variables. Also, don't +/// handle cstexpr inside instructions. During global variable emission, +/// candidates are skipped and are emitted later in case at least one cstexpr +/// isn't replaced by a PC relative GOT entry access. +static bool isGOTEquivalentCandidate(const GlobalVariable *GV, + unsigned &NumGOTEquivUsers) { + // Global GOT equivalents are unnamed private globals with a constant + // pointer initializer to another global symbol. They must point to a + // GlobalVariable or Function, i.e., as GlobalValue. + if (!GV->hasUnnamedAddr() || !GV->hasInitializer() || !GV->isConstant() || + !GV->isDiscardableIfUnused() || !dyn_cast(GV->getOperand(0))) + return false; + + // To be a got equivalent, at least one of its users need to be a constant + // expression used by another global variable. + for (auto *U : GV->users()) + NumGOTEquivUsers += getNumGlobalVariableUses(cast(U)); + + return NumGOTEquivUsers > 0; +} + +/// \brief Unnamed constant global variables solely contaning a pointer to +/// another globals variable is equivalent to a GOT table entry; it contains the +/// the address of another symbol. Optimize it and replace accesses to these +/// "GOT equivalents" by using the GOT entry for the final global instead. +/// Compute GOT equivalent candidates among all global variables to avoid +/// emitting them if possible later on, after it use is replaced by a GOT entry +/// access. +void AsmPrinter::computeGlobalGOTEquivs(Module &M) { + if (!getObjFileLowering().supportIndirectSymViaGOTPCRel()) + return; + + for (const auto &G : M.globals()) { + unsigned NumGOTEquivUsers = 0; + if (!isGOTEquivalentCandidate(&G, NumGOTEquivUsers)) + continue; + + const MCSymbol *GOTEquivSym = getSymbol(&G); + GlobalGOTEquivs[GOTEquivSym] = std::make_pair(&G, NumGOTEquivUsers); + } +} + +/// \brief Constant expressions using GOT equivalent globals may not be eligible +/// for PC relative GOT entry conversion, in such cases we need to emit such +/// globals we previously omitted in EmitGlobalVariable. +void AsmPrinter::emitGlobalGOTEquivs() { + if (!getObjFileLowering().supportIndirectSymViaGOTPCRel()) + return; + + while (!GlobalGOTEquivs.empty()) { + DenseMap::iterator I = + GlobalGOTEquivs.begin(); + const MCSymbol *S = I->first; + const GlobalVariable *GV = I->second.first; + GlobalGOTEquivs.erase(S); + EmitGlobalVariable(GV); + } +} + bool AsmPrinter::doFinalization(Module &M) { + // Gather all GOT equivalent globals in the module. We really need two + // passes over the globals: one to compute and another to avoid its emission + // in EmitGlobalVariable, otherwise we would not be able to handle cases + // where the got equivalent shows up before its use. + computeGlobalGOTEquivs(M); + // Emit global variables. for (const auto &G : M.globals()) EmitGlobalVariable(&G); + // Emit remaining GOT equivalent globals. + emitGlobalGOTEquivs(); + // Emit visibility info for declarations for (const Function &F : M) { if (!F.isDeclaration()) @@ -1686,7 +1776,9 @@ } } -static void emitGlobalConstantImpl(const Constant *C, AsmPrinter &AP); +static void emitGlobalConstantImpl(const Constant *C, AsmPrinter &AP, + const Constant *BaseCV = nullptr, + uint64_t Offset = 0); /// isRepeatedByteSequence - Determine whether the given value is /// composed of a repeated sequence of identical bytes and return the @@ -1815,20 +1907,22 @@ } -static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP) { +static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP, + const Constant *BaseCV, uint64_t Offset) { // See if we can aggregate some values. Make sure it can be // represented as a series of bytes of the constant value. int Value = isRepeatedByteSequence(CA, AP.TM); + const DataLayout &DL = *AP.TM.getDataLayout(); if (Value != -1) { - uint64_t Bytes = - AP.TM.getDataLayout()->getTypeAllocSize( - CA->getType()); + uint64_t Bytes = DL.getTypeAllocSize(CA->getType()); AP.OutStreamer.EmitFill(Bytes, Value); } else { - for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) - emitGlobalConstantImpl(CA->getOperand(i), AP); + for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) { + emitGlobalConstantImpl(CA->getOperand(i), AP, BaseCV, Offset); + Offset += DL.getTypeAllocSize(CA->getOperand(i)->getType()); + } } } @@ -1844,7 +1938,8 @@ AP.OutStreamer.EmitZeros(Padding); } -static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP) { +static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP, + const Constant *BaseCV, uint64_t Offset) { // Print the fields in successive locations. Pad to align if needed! const DataLayout *DL = AP.TM.getDataLayout(); unsigned Size = DL->getTypeAllocSize(CS->getType()); @@ -1853,15 +1948,15 @@ for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) { const Constant *Field = CS->getOperand(i); + // Print the actual field value. + emitGlobalConstantImpl(Field, AP, BaseCV, Offset+SizeSoFar); + // Check if padding is needed and insert one or more 0s. uint64_t FieldSize = DL->getTypeAllocSize(Field->getType()); uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1)) - Layout->getElementOffset(i)) - FieldSize; SizeSoFar += FieldSize + PadSize; - // Now print the actual field value. - emitGlobalConstantImpl(Field, AP); - // Insert padding - this may include padding to increase the size of the // current field up to the ABI size (if the struct is not packed) as well // as padding to ensure that the next field starts at the right offset. @@ -1977,9 +2072,100 @@ } } -static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) { +/// \brief Transform a not absolute MCExpr containing a reference to a GOT +/// equivalent global, by a target specific GOT pc relative access to the +/// final symbol. +static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME, + const Constant *BaseCst, + uint64_t Offset) { + // The global @foo below illustrates a global that uses a got equivalent. + // + // @bar = global i32 42 + // @gotequiv = private unnamed_addr constant i32* @bar + // @foo = i32 trunc (i64 sub (i64 ptrtoint (i32** @gotequiv to i64), + // i64 ptrtoint (i32* @foo to i64)) + // to i32) + // + // The cstexpr in @foo is converted into the MCExpr `ME`, where we actually + // check whether @foo is suitable to use a GOTPCREL. `ME` is usually in the + // form: + // + // foo = cstexpr, where + // cstexpr := - "." + + // cstexpr := - ( - ) + + // + // After canonicalization by EvaluateAsRelocatable `ME` turns into: + // + // cstexpr := - + gotpcrelcst, where + // gotpcrelcst := + + // + MCValue MV; + if (!(*ME)->EvaluateAsRelocatable(MV, nullptr, nullptr) || MV.isAbsolute()) + return; + + const MCSymbol *GOTEquivSym = &MV.getSymA()->getSymbol(); + if (!AP.GlobalGOTEquivs.count(GOTEquivSym)) + return; + + const GlobalValue *BaseGV = dyn_cast(BaseCst); + if (!BaseGV) + return; + + const MCSymbol *BaseSym = AP.getSymbol(BaseGV); + if (BaseSym != &MV.getSymB()->getSymbol()) + return; + + // Make sure to match: + // + // gotpcrelcst := + + // + int64_t GOTPCRelCst = Offset + MV.getConstant(); + if (GOTPCRelCst < 0) + return; + + // Emit the GOT PC relative to replace the got equivalent global, i.e.: + // + // bar: + // .long 42 + // gotequiv: + // .quad bar + // foo: + // .long gotequiv - "." + + // + // is replaced by the target specific equivalent to: + // + // bar: + // .long 42 + // foo: + // .long bar@GOTPCREL+ + // + AsmPrinter::GOTEquivUsePair Result = AP.GlobalGOTEquivs[GOTEquivSym]; + const GlobalVariable *GV = Result.first; + unsigned NumUses = Result.second; + const GlobalValue *FinalGV = dyn_cast(GV->getOperand(0)); + const MCSymbol *FinalSym = AP.getSymbol(FinalGV); + *ME = AP.getObjFileLowering().getIndirectSymViaGOTPCRel(FinalSym, + GOTPCRelCst); + + // Update GOT equivalent usage information + --NumUses; + if (NumUses) + AP.GlobalGOTEquivs[GOTEquivSym] = std::make_pair(GV, NumUses); + else + AP.GlobalGOTEquivs.erase(GOTEquivSym); +} + +static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP, + const Constant *BaseCV, uint64_t Offset) { const DataLayout *DL = AP.TM.getDataLayout(); uint64_t Size = DL->getTypeAllocSize(CV->getType()); + + // Globals with sub-elements such as combinations of arrays and structs + // are handled recursively by emitGlobalConstantImpl. Keep track of the + // constant symbol base and the current position with BaseCV and Offset. + if (!BaseCV && CV->hasOneUse()) + BaseCV = dyn_cast(CV->user_back()); + if (isa(CV) || isa(CV)) return AP.OutStreamer.EmitZeros(Size); @@ -2012,10 +2198,10 @@ return emitGlobalConstantDataSequential(CDS, AP); if (const ConstantArray *CVA = dyn_cast(CV)) - return emitGlobalConstantArray(CVA, AP); + return emitGlobalConstantArray(CVA, AP, BaseCV, Offset); if (const ConstantStruct *CVS = dyn_cast(CV)) - return emitGlobalConstantStruct(CVS, AP); + return emitGlobalConstantStruct(CVS, AP, BaseCV, Offset); if (const ConstantExpr *CE = dyn_cast(CV)) { // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of @@ -2038,7 +2224,15 @@ // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it // thread the streamer with EmitValue. - AP.OutStreamer.EmitValue(AP.lowerConstant(CV), Size); + const MCExpr *ME = AP.lowerConstant(CV); + + // Since lowerConstant already folded and got rid of all IR pointer and + // integer casts, detect GOT equivalent accesses by looking into the MCExpr + // directly. + if (AP.getObjFileLowering().supportIndirectSymViaGOTPCRel()) + handleIndirectSymViaGOTPCRel(AP, &ME, BaseCV, Offset); + + AP.OutStreamer.EmitValue(ME, Size); } /// EmitGlobalConstant - Print a general LLVM constant to the .s file. Index: lib/Target/X86/X86TargetObjectFile.h =================================================================== --- lib/Target/X86/X86TargetObjectFile.h +++ lib/Target/X86/X86TargetObjectFile.h @@ -19,6 +19,8 @@ /// x86-64. class X86_64MachoTargetObjectFile : public TargetLoweringObjectFileMachO { public: + X86_64MachoTargetObjectFile(); + const MCExpr * getTTypeGlobalReference(const GlobalValue *GV, unsigned Encoding, Mangler &Mang, const TargetMachine &TM, @@ -30,6 +32,10 @@ MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM, MachineModuleInfo *MMI) const override; + + const MCExpr * + getIndirectSymViaGOTPCRel(const MCSymbol *Sym, + int64_t Offset) const override; }; /// X86LinuxTargetObjectFile - This implementation is used for linux x86 Index: lib/Target/X86/X86TargetObjectFile.cpp =================================================================== --- lib/Target/X86/X86TargetObjectFile.cpp +++ lib/Target/X86/X86TargetObjectFile.cpp @@ -21,6 +21,11 @@ using namespace llvm; using namespace dwarf; +X86_64MachoTargetObjectFile::X86_64MachoTargetObjectFile() + : TargetLoweringObjectFileMachO() { + SupportIndirectSymViaGOTPCRel = true; +} + const MCExpr *X86_64MachoTargetObjectFile::getTTypeGlobalReference( const GlobalValue *GV, unsigned Encoding, Mangler &Mang, const TargetMachine &TM, MachineModuleInfo *MMI, @@ -46,6 +51,17 @@ return TM.getSymbol(GV, Mang); } +const MCExpr *X86_64MachoTargetObjectFile::getIndirectSymViaGOTPCRel( + const MCSymbol *Sym, int64_t Offset) const { + // On Darwin/X86-64, we need to use foo@GOTPCREL+4 to access the got entry + // from a data section. In case there's an additional offset, then use + // foo@GOTPCREL+4+. + const MCExpr *Res = + MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext()); + const MCExpr *Off = MCConstantExpr::Create(Offset+4, getContext()); + return MCBinaryExpr::CreateAdd(Res, Off, getContext()); +} + void X86LinuxTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) { TargetLoweringObjectFileELF::Initialize(Ctx, TM); Index: test/MC/X86/cstexpr-gotpcrel.ll =================================================================== --- /dev/null +++ test/MC/X86/cstexpr-gotpcrel.ll @@ -0,0 +1,84 @@ +; RUN: llc -mtriple=x86_64-apple-darwin %s -o - | FileCheck %s + +; GOT equivalent globals references can be replaced by the GOT entry of the +; final symbol instead. Test: +; +; (1) GOT equivalent usage inside nested constant arrays. +; (2) Multiple uses of GOT equivalents. +; (3) Don't replace GOT equivalent usage within instructions. +; (4) Emit the GOT equivalent in case on of its uses can't be replaced by the +; GOT entry. +; (5) Support for arbitrary constants into the GOTPCREL offset +; + +%struct.data = type { i32, %struct.anon } +%struct.anon = type { i32, i32 } + +; Check that these got equivalent symbols are not emitted in the begining. +; CHECK-NOT: _localgotequiv +; CHECK-NOT: _extgotequiv +@localfoo = global i32 42 +@localgotequiv = private unnamed_addr constant i32* @localfoo + +@extfoo = external global i32 +@extgotequiv = private unnamed_addr constant i32* @extfoo + +; CHECK: l_bargotequiv: +; CHECK-NEXT: .quad _extbar +@extbar = external global i32 +@bargotequiv = private unnamed_addr constant i32* @extbar + + +@table = global [4 x %struct.data] [ +; CHECK-LABEL: _table + %struct.data { i32 1, %struct.anon { i32 2, i32 3 } }, +; CHECK: .long 5 +; CHECK-NOT: .long _localgotequiv-(_table+20) +; CHECK-NEXT: .long _localfoo@GOTPCREL+4 + %struct.data { i32 4, %struct.anon { i32 5, + i32 trunc (i64 sub (i64 ptrtoint (i32** @localgotequiv to i64), + i64 ptrtoint (i32* getelementptr inbounds ([4 x %struct.data]* @table, i32 0, i64 1, i32 1, i32 1) to i64)) + to i32)} + }, +; CHECK: .long 5 +; CHECK-NOT: _extgotequiv-(_table+32) +; CHECK-NEXT: .long _extfoo@GOTPCREL+4 + %struct.data { i32 4, %struct.anon { i32 5, + i32 trunc (i64 sub (i64 ptrtoint (i32** @extgotequiv to i64), + i64 ptrtoint (i32* getelementptr inbounds ([4 x %struct.data]* @table, i32 0, i64 2, i32 1, i32 1) to i64)) + to i32)} + }, +; CHECK: .long 5 +; CHECK-NOT: _extgotequiv-(_table+44) +; CHECK-NEXT: .long _extfoo@GOTPCREL+28 + %struct.data { i32 4, %struct.anon { i32 5, + i32 add (i32 trunc (i64 sub (i64 ptrtoint (i32** @extgotequiv to i64), + i64 ptrtoint (i32* getelementptr inbounds ([4 x %struct.data]* @table, i32 0, i64 3, i32 1, i32 1) to i64)) + to i32), i32 24)} + } +], align 16 + + +; CHECK-LABEL: _delta +; CHECK: .long _extfoo@GOTPCREL+4 +@delta = global i32 trunc (i64 sub (i64 ptrtoint (i32** @extgotequiv to i64), + i64 ptrtoint (i32* @delta to i64)) + to i32) + +; CHECK-LABEL: _deltaplus: +; CHECK: .long _localfoo@GOTPCREL+59 +@deltaplus = global i32 add (i32 trunc (i64 sub (i64 ptrtoint (i32** @localgotequiv to i64), + i64 ptrtoint (i32* @deltaplus to i64)) + to i32), i32 55) + + +define i32 @t0(i32 %a) { + %x = add i32 trunc (i64 sub (i64 ptrtoint (i32** @bargotequiv to i64), + i64 ptrtoint (i32 (i32)* @t0 to i64)) + to i32), %a + ret i32 %x +} + +; Check that these got equivalent symbols are not emitted in the end. +; CHECK-NOT: _localgotequiv +; CHECK-NOT: _extgotequiv