Index: include/llvm/CodeGen/AsmPrinter.h =================================================================== --- include/llvm/CodeGen/AsmPrinter.h +++ include/llvm/CodeGen/AsmPrinter.h @@ -99,6 +99,11 @@ /// default, this is equal to CurrentFnSym. MCSymbol *CurrentFnSymForSize; + /// Map global GOT equivalent MCSymbols to GlobalVariables and keep track of + /// its number of uses by other globals. + typedef std::pair GOTEquivUsePair; + DenseMap GlobalGOTEquivs; + private: // The garbage collection metadata printer table. void *GCMetadataPrinters; // Really a DenseMap. @@ -248,6 +253,21 @@ /// \brief Print a general LLVM constant to the .s file. void EmitGlobalConstant(const Constant *CV); + /// \brief Unnamed constant global variables solely contaning a pointer to + /// another globals variable act like a global variable "proxy", or GOT + /// equivalents, i.e., it's only used to hold the address of the latter. One + /// (very) minor optimization is to replace accesses to these proxies by using + /// the GOT entry for the final global instead. Hence, we select GOT + /// equivalent candidates among all the module global variables, avoid + /// emitting them unnecessarily and finally replace references to them by pc + /// relative accesses to GOT entries. + void computeGlobalGOTEquivs(Module &M); + + /// \brief Constant expressions using GOT equivalent globals may not be + /// eligible for PC relative GOT entry conversion, in such cases we need to + /// emit the proxies we previously omitted in EmitGlobalVariable. + void emitGlobalGOTEquivs(); + //===------------------------------------------------------------------===// // Overridable Hooks //===------------------------------------------------------------------===// Index: include/llvm/Target/TargetLoweringObjectFile.h =================================================================== --- include/llvm/Target/TargetLoweringObjectFile.h +++ include/llvm/Target/TargetLoweringObjectFile.h @@ -41,10 +41,14 @@ const TargetLoweringObjectFile&) LLVM_DELETED_FUNCTION; void operator=(const TargetLoweringObjectFile&) LLVM_DELETED_FUNCTION; +protected: + bool SupportIndirectSymViaGOTPCRel; + public: MCContext &getContext() const { return *Ctx; } - TargetLoweringObjectFile() : MCObjectFileInfo(), Ctx(nullptr), DL(nullptr) {} + TargetLoweringObjectFile() : MCObjectFileInfo(), Ctx(nullptr), DL(nullptr), + SupportIndirectSymViaGOTPCRel(false) {} virtual ~TargetLoweringObjectFile(); @@ -151,6 +155,19 @@ return nullptr; } + /// \brief Target supports replacing a data "PC"-relative access to a symbol + /// through another symbol, by accessing the later via a GOT entry instead? + virtual bool supportIndirectSymViaGOTPCRel() const { + return SupportIndirectSymViaGOTPCRel; + } + + /// \brief Get the target specific PC relative GOT entry relocation + virtual const MCExpr * + getIndirectSymViaGOTPCRel(const MCSymbol *Sym, + const MCExpr *DotExpr) const { + return nullptr; + } + protected: virtual const MCSection * SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Index: lib/CodeGen/AsmPrinter/AsmPrinter.cpp =================================================================== --- lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -336,6 +336,10 @@ if (EmitSpecialLLVMGlobal(GV)) return; + // Skip the emission of global proxies. + if (GlobalGOTEquivs.count(getSymbol(GV))) + return; + if (isVerbose()) { GV->printAsOperand(OutStreamer.GetCommentOS(), /*PrintType=*/false, GV->getParent()); @@ -892,11 +896,87 @@ OutStreamer.AddBlankLine(); } +// Only consider global GOT equivalents if all users are constant expressions +// inside initializers of other global variables, e.g. we don't handle cstexpr +// inside instructions. During global variable emission, candidates are skipped +// and could be emitted later on in case at least one cstexpr isn't replaced by +// a PC relative GOT entry access. +static bool isGOTEquivalentCandidate(const GlobalVariable *GV) { + // Global GOT equivalents are unnamed private globals with a constant + // pointer initializer to another global symbol. + if (!GV->hasUnnamedAddr() || !GV->hasInitializer() || + !GV->getNumOperands() || !GV->isDiscardableIfUnused()) + return false; + + const GlobalVariable *FinalGV = dyn_cast(GV->getOperand(0)); + if (!FinalGV) + return false; + + const Constant *C = dyn_cast(FinalGV); + if (!C & !isa(C) || !C->getType()->isPointerTy()) + return false; + + // Check the constant users. + for (auto *U : GV->users()) { + const Constant *C = cast(U); + while (C && C->getNumUses() == 1 && !isa(C)) + C = dyn_cast(C->user_back()); + + if (!C || !isa(C)) + return false; + } + + return true; +} + +// Unnamed constant global variables solely contaning a pointer to another +// globals variable act like a global variable "proxy", or GOT equivalents, +// i.e., it's only used to hold the address of the latter. One (very) minor +// optimization is to replace accesses to these proxies by using the GOT entry +// for the final global instead. Hence, we select GOT equivalent candidates +// among all the module global variables, avoid emitting them unnecessarily and +// finally replace references to them by pc relative accesses to GOT entries. +void AsmPrinter::computeGlobalGOTEquivs(Module &M) { + if (!getObjFileLowering().supportIndirectSymViaGOTPCRel()) + return; + + for (const auto &G : M.globals()) { + if (!isGOTEquivalentCandidate(&G)) + continue; + + const MCSymbol *ProxySym = getSymbol(&G); + GlobalGOTEquivs[ProxySym] = std::make_pair(&G, G.getNumUses()); + } +} + +// Constant expressions using GOT equivalent globals may not be eligible for PC +// relative GOT entry conversion, in such cases we need to emit the proxies we +// previously omitted in EmitGlobalVariable. +void AsmPrinter::emitGlobalGOTEquivs() { + if (getObjFileLowering().supportIndirectSymViaGOTPCRel()) + return; + + while (!GlobalGOTEquivs.empty()) { + DenseMap::iterator I = + GlobalGOTEquivs.begin(); + const MCSymbol *S = I->first; + const GlobalVariable *GV = I->second.first; + GlobalGOTEquivs.erase(S); + EmitGlobalVariable(GV); + } +} + bool AsmPrinter::doFinalization(Module &M) { + // Gather all GOT equivalent globals in the module. + computeGlobalGOTEquivs(M); + // Emit global variables. for (const auto &G : M.globals()) EmitGlobalVariable(&G); + // Emit remaining GOT equivalent globals. + emitGlobalGOTEquivs(); + // Emit visibility info for declarations for (const Function &F : M) { if (!F.isDeclaration()) @@ -1684,7 +1764,8 @@ } } -static void emitGlobalConstantImpl(const Constant *C, AsmPrinter &AP); +static void emitGlobalConstantImpl(const Constant *C, AsmPrinter &AP, + uint64_t Offset = 0); /// isRepeatedByteSequence - Determine whether the given value is /// composed of a repeated sequence of identical bytes and return the @@ -1813,20 +1894,22 @@ } -static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP) { +static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP, + uint64_t Offset) { // See if we can aggregate some values. Make sure it can be // represented as a series of bytes of the constant value. int Value = isRepeatedByteSequence(CA, AP.TM); + const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout(); if (Value != -1) { - uint64_t Bytes = - AP.TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize( - CA->getType()); + uint64_t Bytes = DL.getTypeAllocSize(CA->getType()); AP.OutStreamer.EmitFill(Bytes, Value); } else { - for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) - emitGlobalConstantImpl(CA->getOperand(i), AP); + for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) { + emitGlobalConstantImpl(CA->getOperand(i), AP, Offset); + Offset += DL.getTypeAllocSize(CA->getOperand(i)->getType()); + } } } @@ -1842,7 +1925,8 @@ AP.OutStreamer.EmitZeros(Padding); } -static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP) { +static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP, + uint64_t Offset) { // Print the fields in successive locations. Pad to align if needed! const DataLayout *DL = AP.TM.getSubtargetImpl()->getDataLayout(); unsigned Size = DL->getTypeAllocSize(CS->getType()); @@ -1851,15 +1935,15 @@ for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) { const Constant *Field = CS->getOperand(i); + // Print the actual field value. + emitGlobalConstantImpl(Field, AP, Offset+SizeSoFar); + // Check if padding is needed and insert one or more 0s. uint64_t FieldSize = DL->getTypeAllocSize(Field->getType()); uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1)) - Layout->getElementOffset(i)) - FieldSize; SizeSoFar += FieldSize + PadSize; - // Now print the actual field value. - emitGlobalConstantImpl(Field, AP); - // Insert padding - this may include padding to increase the size of the // current field up to the ABI size (if the struct is not packed) as well // as padding to ensure that the next field starts at the right offset. @@ -1975,7 +2059,72 @@ } } -static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) { +static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, uint64_t Offset, + const MCExpr **ME) { + // The symbol we are about to emit needs to be a cstexpr in the form + // + // cstexpr := - "." + // := - + + // + // where X is the symbol base of the current emission + const MCBinaryExpr *BE = dyn_cast(*ME); + if (!BE || BE->getOpcode() != MCBinaryExpr::Sub) + return; + + const MCSymbolRefExpr *Proxy = dyn_cast(BE->getLHS()); + if (!Proxy) + return; + const MCSymbol *ProxySym = &Proxy->getSymbol(); + if (!AP.GlobalGOTEquivs.count(ProxySym)) + return; + + // Check the "." expression, that is, make sure X + offset == ".". If the + // current emission offset differs from the one in 'offset', this isn't a "PC" + // relative delta. + if (dyn_cast(BE->getRHS())) { + if (Offset != 0) + return; + } else if (const MCBinaryExpr *BExpr = dyn_cast(BE->getRHS())) { + if ((BExpr->getOpcode() != MCBinaryExpr::Add) || + !dyn_cast(BExpr->getLHS())) + return; + + const MCConstantExpr *XOff = dyn_cast(BExpr->getRHS()); + if (!XOff || (uint64_t)XOff->getValue() != Offset) + return; + } + + // Emit the GOT PC relative access to replace the proxy usage, i.e., replace: + // + // A: + // .long 42 + // Proxy: + // .quad A + // X: + // .long Proxy - "." + // + // with a target specific equivalent to: + // + // X: + // .long A@GOTPCREL + AsmPrinter::GOTEquivUsePair Result = AP.GlobalGOTEquivs[ProxySym]; + const GlobalVariable *GV = Result.first; + const GlobalVariable *FinalGV = dyn_cast(GV->getOperand(0)); + const MCSymbol *FinalSym = AP.getSymbol(FinalGV); + unsigned NumUses = Result.second; + *ME = AP.getObjFileLowering().getIndirectSymViaGOTPCRel(FinalSym, + BE->getRHS()); + + // Update proxy usage information + --NumUses; + if (NumUses) + AP.GlobalGOTEquivs[ProxySym] = std::make_pair(GV, --NumUses); + else + AP.GlobalGOTEquivs.erase(ProxySym); +} + +static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP, + uint64_t Offset) { const DataLayout *DL = AP.TM.getSubtargetImpl()->getDataLayout(); uint64_t Size = DL->getTypeAllocSize(CV->getType()); if (isa(CV) || isa(CV)) @@ -2010,10 +2159,10 @@ return emitGlobalConstantDataSequential(CDS, AP); if (const ConstantArray *CVA = dyn_cast(CV)) - return emitGlobalConstantArray(CVA, AP); + return emitGlobalConstantArray(CVA, AP, Offset); if (const ConstantStruct *CVS = dyn_cast(CV)) - return emitGlobalConstantStruct(CVS, AP); + return emitGlobalConstantStruct(CVS, AP, Offset); if (const ConstantExpr *CE = dyn_cast(CV)) { // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of @@ -2036,7 +2185,15 @@ // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it // thread the streamer with EmitValue. - AP.OutStreamer.EmitValue(AP.lowerConstant(CV), Size); + const MCExpr *ME = AP.lowerConstant(CV); + + // Since lowerConstant already folded and got rid of all IR pointer and + // integer casts, detect GOT equivalent accesses by looking into the MCExpr + // directly. + if (AP.getObjFileLowering().supportIndirectSymViaGOTPCRel()) + handleIndirectSymViaGOTPCRel(AP, Offset, &ME); + + AP.OutStreamer.EmitValue(ME, Size); } /// EmitGlobalConstant - Print a general LLVM constant to the .s file. Index: lib/Target/X86/X86TargetObjectFile.h =================================================================== --- lib/Target/X86/X86TargetObjectFile.h +++ lib/Target/X86/X86TargetObjectFile.h @@ -19,6 +19,8 @@ /// x86-64. class X86_64MachoTargetObjectFile : public TargetLoweringObjectFileMachO { public: + X86_64MachoTargetObjectFile(); + const MCExpr * getTTypeGlobalReference(const GlobalValue *GV, unsigned Encoding, Mangler &Mang, const TargetMachine &TM, @@ -30,6 +32,10 @@ MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM, MachineModuleInfo *MMI) const override; + + const MCExpr * + getIndirectSymViaGOTPCRel(const MCSymbol *Sym, + const MCExpr *DotExpr) const override; }; /// X86LinuxTargetObjectFile - This implementation is used for linux x86 Index: lib/Target/X86/X86TargetObjectFile.cpp =================================================================== --- lib/Target/X86/X86TargetObjectFile.cpp +++ lib/Target/X86/X86TargetObjectFile.cpp @@ -21,6 +21,11 @@ using namespace llvm; using namespace dwarf; +X86_64MachoTargetObjectFile::X86_64MachoTargetObjectFile() + : TargetLoweringObjectFileMachO() { + SupportIndirectSymViaGOTPCRel = true; +} + const MCExpr *X86_64MachoTargetObjectFile::getTTypeGlobalReference( const GlobalValue *GV, unsigned Encoding, Mangler &Mang, const TargetMachine &TM, MachineModuleInfo *MMI, @@ -46,6 +51,16 @@ return TM.getSymbol(GV, Mang); } +const MCExpr *X86_64MachoTargetObjectFile::getIndirectSymViaGOTPCRel( + const MCSymbol *Sym, const MCExpr *DotExpr) const { + // On Darwin/X86-64, we can use foo@GOTPCREL+4, which is an indirect + // pc-relative reference. + const MCExpr *Res = + MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext()); + const MCExpr *Four = MCConstantExpr::Create(4, getContext()); + return MCBinaryExpr::CreateAdd(Res, Four, getContext()); +} + void X86LinuxTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) { TargetLoweringObjectFileELF::Initialize(Ctx, TM); Index: test/MC/X86/cstexpr-gotpcrel.ll =================================================================== --- /dev/null +++ test/MC/X86/cstexpr-gotpcrel.ll @@ -0,0 +1,36 @@ +; RUN: llc -mtriple=x86_64-apple-darwin %s -o - | FileCheck %s + +; Test that we can replace got equivalent globals by references to the +; GOT entry of the final symbol instead. +%struct.data = type { i32, %struct.anon } +%struct.anon = type { i32, i32 } + +; CHECK-NOT: _localgotequiv +@localfoo = global i32 42 +@localgotequiv = private unnamed_addr constant i32* @localfoo + +; CHECK-NOT: _extgotequiv +@extfoo = external global i32 +@extgotequiv = private unnamed_addr constant i32* @extfoo + +@table = global [3 x %struct.data] [ +; CHECK-LABEL: _table + %struct.data { i32 1, %struct.anon { i32 2, i32 3 } }, +; CHECK: .long 5 +; CHECK-NOT: .long _localgotequiv-(_table+20) +; CHECK-NEXT: .long _localfoo@GOTPCREL+4 + %struct.data { i32 4, %struct.anon { i32 5, + i32 trunc (i64 sub (i64 ptrtoint (i32** @localgotequiv to i64), + i64 ptrtoint (i32* getelementptr inbounds ([3 x %struct.data]* @table, i32 0, i64 1, i32 1, i32 1) to i64)) + to i32)} + }, +; CHECK: .long 5 +; CHECK-NOT: _extgotequiv-(_table+32) +; CHECK-NEXT: .long _extfoo@GOTPCREL+4 + %struct.data { i32 4, %struct.anon { i32 5, + i32 trunc (i64 sub (i64 ptrtoint (i32** @extgotequiv to i64), + i64 ptrtoint (i32* getelementptr inbounds ([3 x %struct.data]* @table, i32 0, i64 2, i32 1, i32 1) to i64)) + to i32)} + } +], align 16 +