Index: include/llvm/CodeGen/AsmPrinter.h =================================================================== --- include/llvm/CodeGen/AsmPrinter.h +++ include/llvm/CodeGen/AsmPrinter.h @@ -99,6 +99,11 @@ /// default, this is equal to CurrentFnSym. MCSymbol *CurrentFnSymForSize; + /// Map global proxy MCSymbols to GlobalVariables and keep track of its + /// number of uses by other globals. + typedef std::pair GblProxyUsePair; + DenseMap GlobalProxies; + private: // The garbage collection metadata printer table. void *GCMetadataPrinters; // Really a DenseMap. @@ -245,6 +250,20 @@ /// \brief Print a general LLVM constant to the .s file. void EmitGlobalConstant(const Constant *CV); + /// \brief Unnamed constant global variables solely contaning a pointet to + /// another globals variables are act like a global variable "proxy", i.e., + /// it's only used to hold the address of the latter. One (very) minor + /// optimization is to replace accesses to these proxies by using the GOT + /// entry for the final global instead. Hence, we select proxy candidates + /// among the module global variables, avoid emitting them unnecessarily and + /// finally replace references to them by pc relative accesses to GOT entries. + void GetGlobalProxies(Module &M); + + /// \brief Constant expressions using global proxies may not be converted to + /// PC relative GOT entry references, in this cases we need to emit the + /// proxies we previously omitted in EmitGlobalVariable. + void EmitGlobalProxies(); + //===------------------------------------------------------------------===// // Overridable Hooks //===------------------------------------------------------------------===// Index: include/llvm/Target/TargetLoweringObjectFile.h =================================================================== --- include/llvm/Target/TargetLoweringObjectFile.h +++ include/llvm/Target/TargetLoweringObjectFile.h @@ -151,6 +151,17 @@ return nullptr; } + /// \brief Target supports replacing a data "PC"-relative access to a symbol + /// through another symbol, by accessing the later via a GOT entry instead? + virtual bool supportIndirectSymViaGOTPCRel() const { return false; } + + /// \brief Get the target specific PC relative GOT entry relocation + virtual const MCExpr * + getIndirectSymViaGOTPCRel(const MCSymbol *Sym, + const MCExpr *DotExpr) const { + return nullptr; + } + protected: virtual const MCSection * SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Index: lib/CodeGen/AsmPrinter/AsmPrinter.cpp =================================================================== --- lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -337,6 +337,11 @@ if (EmitSpecialLLVMGlobal(GV)) return; + // Skip the emission of global proxies. + if (getObjFileLowering().supportIndirectSymViaGOTPCRel() && + GlobalProxies.count(getSymbol(GV))) + return; + if (isVerbose()) { GV->printAsOperand(OutStreamer.GetCommentOS(), /*PrintType=*/false, GV->getParent()); @@ -879,11 +884,92 @@ OutStreamer.AddBlankLine(); } +// isGlobalProxyCandidate - Only consider global proxies if all users are +// constant expressions inside initializers of other global variables, e.g. we +// don't handle cstexpr inside instructions. During global variable emission, +// proxy candidates are skipped and could be emitted later on in case at least +// one cstexpr isn't replaced by a PC relative GOT entry access. +static bool isGlobalProxyCandidate(const GlobalVariable *GV) { + if (GV->use_empty()) + return false; + + for (auto *U : GV->users()) { + const Constant *CE = dyn_cast(U); + if (!CE) + return false; + + const Constant *C = cast(CE); + while (C && C->getNumUses() == 1 && !isa(C)) + C = dyn_cast(C->user_back()); + + if (!isa(C)) + return false; + } + + return true; +} + +// GetGlobalProxies - Unnamed constant global variables solely contaning a +// pointet to another globals variables are act like a global variable "proxy", +// i.e., it's only used to hold the address of the latter. One (very) minor +// optimization is to replace accesses to these proxies by using the GOT entry +// for the final global instead. Hence, we select proxy candidates among the +// module global variables, avoid emitting them unnecessarily and finally +// replace references to them by pc relative accesses to GOT entries. +void AsmPrinter::GetGlobalProxies(Module &M) { + if (getObjFileLowering().supportIndirectSymViaGOTPCRel()) + return; + + for (const auto &G : M.globals()) { + // Global proxies are unnamed globals with a constant pointer initializer + // to another global symbol + if (!G.hasUnnamedAddr() || !G.hasInitializer() || !G.getNumOperands()) + continue; + + const GlobalVariable *FinalGV = dyn_cast(G.getOperand(0)); + if (!FinalGV) + continue; + + const Constant *C = dyn_cast(FinalGV); + if (!C & !isa(C) || !C->getType()->isPointerTy()) + continue; + + if (!isGlobalProxyCandidate(&G)) + continue; + + const MCSymbol *ProxySym = getSymbol(&G); + GlobalProxies[ProxySym] = std::make_pair(&G, G.getNumUses()); + } +} + +// EmitGlobalProxies - Constant expressions using global proxies may not be +// converted to PC relative GOT entry references, in this cases we need to +// emit the proxies we previously omitted in EmitGlobalVariable. +void AsmPrinter::EmitGlobalProxies() { + if (getObjFileLowering().supportIndirectSymViaGOTPCRel()) + return; + + while (!GlobalProxies.empty()) { + DenseMap::iterator I = + GlobalProxies.begin(); + const MCSymbol *S = I->first; + const GlobalVariable *GV = I->second.first; + GlobalProxies.erase(S); + EmitGlobalVariable(GV); + } +} + bool AsmPrinter::doFinalization(Module &M) { + // Gather all global proxies in the module. + GetGlobalProxies(M); + // Emit global variables. for (const auto &G : M.globals()) EmitGlobalVariable(&G); + // Emit remaining global proxies. + EmitGlobalProxies(); + // Emit visibility info for declarations for (const Function &F : M) { if (!F.isDeclaration()) @@ -1671,7 +1757,8 @@ } } -static void emitGlobalConstantImpl(const Constant *C, AsmPrinter &AP); +static void emitGlobalConstantImpl(const Constant *C, AsmPrinter &AP, + uint64_t Offset = 0); /// isRepeatedByteSequence - Determine whether the given value is /// composed of a repeated sequence of identical bytes and return the @@ -1800,20 +1887,22 @@ } -static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP) { +static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP, + uint64_t Offset) { // See if we can aggregate some values. Make sure it can be // represented as a series of bytes of the constant value. int Value = isRepeatedByteSequence(CA, AP.TM); + const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout(); if (Value != -1) { - uint64_t Bytes = - AP.TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize( - CA->getType()); + uint64_t Bytes = DL.getTypeAllocSize(CA->getType()); AP.OutStreamer.EmitFill(Bytes, Value); } else { - for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) - emitGlobalConstantImpl(CA->getOperand(i), AP); + for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) { + emitGlobalConstantImpl(CA->getOperand(i), AP, Offset); + Offset += DL.getTypeAllocSize(CA->getOperand(i)->getType()); + } } } @@ -1829,7 +1918,8 @@ AP.OutStreamer.EmitZeros(Padding); } -static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP) { +static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP, + uint64_t Offset) { // Print the fields in successive locations. Pad to align if needed! const DataLayout *DL = AP.TM.getSubtargetImpl()->getDataLayout(); unsigned Size = DL->getTypeAllocSize(CS->getType()); @@ -1838,15 +1928,15 @@ for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) { const Constant *Field = CS->getOperand(i); + // Print the actual field value. + emitGlobalConstantImpl(Field, AP, Offset+SizeSoFar); + // Check if padding is needed and insert one or more 0s. uint64_t FieldSize = DL->getTypeAllocSize(Field->getType()); uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1)) - Layout->getElementOffset(i)) - FieldSize; SizeSoFar += FieldSize + PadSize; - // Now print the actual field value. - emitGlobalConstantImpl(Field, AP); - // Insert padding - this may include padding to increase the size of the // current field up to the ABI size (if the struct is not packed) as well // as padding to ensure that the next field starts at the right offset. @@ -1962,7 +2052,72 @@ } } -static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) { +static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, uint64_t Offset, + const MCExpr **ME) { + // The symbol we are about to emit needs to be a cstexpr in the form + // + // cstexpr := - "." + // := - + + // + // where X is the symbol base of the current emission + const MCBinaryExpr *BE = dyn_cast(*ME); + if (!BE || BE->getOpcode() != MCBinaryExpr::Sub) + return; + + const MCSymbolRefExpr *Proxy = dyn_cast(BE->getLHS()); + if (!Proxy) + return; + const MCSymbol *ProxySym = &Proxy->getSymbol(); + if (!AP.GlobalProxies.count(ProxySym)) + return; + + // Check the "." expression, that is, make sure X + offset == ".". If the + // current emission offset differs from the one in 'offset', this isn't a "PC" + // relative delta. + if (dyn_cast(BE->getRHS())) { + if (Offset != 0) + return; + } else if (const MCBinaryExpr *BExpr = dyn_cast(BE->getRHS())) { + if ((BExpr->getOpcode() != MCBinaryExpr::Add) || + !dyn_cast(BExpr->getLHS())) + return; + + const MCConstantExpr *XOff = dyn_cast(BExpr->getRHS()); + if (!XOff || (uint64_t)XOff->getValue() != Offset) + return; + } + + // Emit the GOT PC relative access to replace the proxy usage, i.e., replace: + // + // A: + // .long 42 + // Proxy: + // .quad A + // X: + // .long Proxy - "." + // + // with a target specific equivalent to: + // + // X: + // .long A@GOTPCREL + AsmPrinter::GblProxyUsePair Result = AP.GlobalProxies[ProxySym]; + const GlobalVariable *GV = Result.first; + const GlobalVariable *FinalGV = dyn_cast(GV->getOperand(0)); + const MCSymbol *FinalSym = AP.getSymbol(FinalGV); + unsigned NumUses = Result.second; + *ME = AP.getObjFileLowering().getIndirectSymViaGOTPCRel(FinalSym, + BE->getRHS()); + + // Update proxy usage information + --NumUses; + if (NumUses) + AP.GlobalProxies[ProxySym] = std::make_pair(GV, --NumUses); + else + AP.GlobalProxies.erase(ProxySym); +} + +static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP, + uint64_t Offset) { const DataLayout *DL = AP.TM.getSubtargetImpl()->getDataLayout(); uint64_t Size = DL->getTypeAllocSize(CV->getType()); if (isa(CV) || isa(CV)) @@ -1997,10 +2152,10 @@ return emitGlobalConstantDataSequential(CDS, AP); if (const ConstantArray *CVA = dyn_cast(CV)) - return emitGlobalConstantArray(CVA, AP); + return emitGlobalConstantArray(CVA, AP, Offset); if (const ConstantStruct *CVS = dyn_cast(CV)) - return emitGlobalConstantStruct(CVS, AP); + return emitGlobalConstantStruct(CVS, AP, Offset); if (const ConstantExpr *CE = dyn_cast(CV)) { // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of @@ -2023,7 +2178,11 @@ // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it // thread the streamer with EmitValue. - AP.OutStreamer.EmitValue(AP.lowerConstant(CV), Size); + const MCExpr *ME = AP.lowerConstant(CV); + if (AP.getObjFileLowering().supportIndirectSymViaGOTPCRel()) + handleIndirectSymViaGOTPCRel(AP, Offset, &ME); + + AP.OutStreamer.EmitValue(ME, Size); } /// EmitGlobalConstant - Print a general LLVM constant to the .s file. Index: lib/Target/X86/X86TargetObjectFile.h =================================================================== --- lib/Target/X86/X86TargetObjectFile.h +++ lib/Target/X86/X86TargetObjectFile.h @@ -30,6 +30,12 @@ MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM, MachineModuleInfo *MMI) const override; + + bool supportIndirectSymViaGOTPCRel() const override { return true; } + + const MCExpr * + getIndirectSymViaGOTPCRel(const MCSymbol *Sym, + const MCExpr *DotExpr) const override; }; /// X86LinuxTargetObjectFile - This implementation is used for linux x86 Index: lib/Target/X86/X86TargetObjectFile.cpp =================================================================== --- lib/Target/X86/X86TargetObjectFile.cpp +++ lib/Target/X86/X86TargetObjectFile.cpp @@ -46,6 +46,16 @@ return TM.getSymbol(GV, Mang); } +const MCExpr *X86_64MachoTargetObjectFile::getIndirectSymViaGOTPCRel( + const MCSymbol *Sym, const MCExpr *DotExpr) const { + // On Darwin/X86-64, we can use foo@GOTPCREL+4, which is an indirect + // pc-relative reference. + const MCExpr *Res = + MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext()); + const MCExpr *Four = MCConstantExpr::Create(4, getContext()); + return MCBinaryExpr::CreateAdd(Res, Four, getContext()); +} + void X86LinuxTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) { TargetLoweringObjectFileELF::Initialize(Ctx, TM); Index: test/MC/X86/cstexpr-gotpcrel.ll =================================================================== --- /dev/null +++ test/MC/X86/cstexpr-gotpcrel.ll @@ -0,0 +1,47 @@ +; RUN: llc -filetype=obj -mtriple=x86_64-apple-darwin %s -o - | llvm-readobj -r | FileCheck -check-prefix=X86-OBJ %s +; RUN: llc -mtriple=x86_64-apple-darwin %s -o - | FileCheck -check-prefix=X86 %s + +; X86-OBJ: Format: Mach-O 64-bit x86-64 +; X86-OBJ: Arch: x86_64 +; X86-OBJ: AddressSize: 64bit +; X86-OBJ: Relocations [ +; X86-OBJ: Section __data { +%struct.data = type { i32, %struct.anon } +%struct.anon = type { i32, i32 } + +; Offsets in table could be to local or external symbols +; and must be proxied by another globals - in the final object, the +; indirection for the external is replaced by the GOT and unnamed_addr +; isn't emitted. + +; X86-NOT: _localproxy +@localfoo = global i32 42 +@localproxy = unnamed_addr constant i32* @localfoo + +; X86-NOT: _extproxy +@extfoo = external global i32 +@extproxy = unnamed_addr constant i32* @extfoo + +@table = global [3 x %struct.data] [ +; X86-LABEL: _table + %struct.data { i32 1, %struct.anon { i32 2, i32 3 } }, +; X86: .long 5 +; X86-NOT: .long _localproxy-(_table+20) +; X86-NEXT: .long _localfoo@GOTPCREL+4 + %struct.data { i32 4, %struct.anon { i32 5, + i32 trunc (i64 sub (i64 ptrtoint (i32** @localproxy to i64), + i64 ptrtoint (i32* getelementptr inbounds ([3 x %struct.data]* @table, i32 0, i64 1, i32 1, i32 1) to i64)) + to i32)} + }, +; X86: .long 5 +; X86-NOT: _extproxy-(_table+32) +; X86-NEXT: .long _extfoo@GOTPCREL+4 + %struct.data { i32 4, %struct.anon { i32 5, + i32 trunc (i64 sub (i64 ptrtoint (i32** @extproxy to i64), + i64 ptrtoint (i32* getelementptr inbounds ([3 x %struct.data]* @table, i32 0, i64 2, i32 1, i32 1) to i64)) + to i32)} + } +], align 16 + +; X86-OBJ: 0x30 1 2 1 X86_64_RELOC_GOT 0 _extfoo +; X86-OBJ-NEXT: 0x24 1 2 1 X86_64_RELOC_GOT 0 _localfoo