Index: bindings/ocaml/llvm/llvm.ml =================================================================== --- bindings/ocaml/llvm/llvm.ml +++ bindings/ocaml/llvm/llvm.ml @@ -228,6 +228,7 @@ | LocalDynamic | InitialExec | LocalExec + | Emulated end module AtomicOrdering = struct Index: bindings/ocaml/llvm/llvm.mli =================================================================== --- bindings/ocaml/llvm/llvm.mli +++ bindings/ocaml/llvm/llvm.mli @@ -291,6 +291,7 @@ | LocalDynamic | InitialExec | LocalExec + | Emulated end (** The ordering of an atomic [load], [store], [cmpxchg], [atomicrmw] or Index: docs/BitCodeFormat.rst =================================================================== --- docs/BitCodeFormat.rst +++ docs/BitCodeFormat.rst @@ -724,6 +724,7 @@ * ``localdynamic``: code 2 * ``initialexec``: code 3 * ``localexec``: code 4 + * ``emulated``: code 5 * *unnamed_addr*: If present and non-zero, indicates that the variable has ``unnamed_addr`` Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -482,10 +482,15 @@ For variables in modules that will not be loaded dynamically. ``localexec`` For variables defined in the executable and only used within it. +``emulated`` + For variables to work on platforms that do not support ELF TLS models. If no explicit model is given, the "general dynamic" model is used. +When a target does not implement "general dynamic" model, it might +implemet the "emulated" mode as the default. -The models correspond to the ELF TLS models; see `ELF Handling For +The models, except the emulated, +correspond to the ELF TLS models; see `ELF Handling For Thread-Local Storage `_ for more information on under which circumstances the different models may be used. The target may choose a different TLS model if the specified Index: include/llvm-c/Core.h =================================================================== --- include/llvm-c/Core.h +++ include/llvm-c/Core.h @@ -358,7 +358,8 @@ LLVMGeneralDynamicTLSModel, LLVMLocalDynamicTLSModel, LLVMInitialExecTLSModel, - LLVMLocalExecTLSModel + LLVMLocalExecTLSModel, + LLVMEmulatedTLSModel } LLVMThreadLocalMode; typedef enum { Index: include/llvm/IR/GlobalValue.h =================================================================== --- include/llvm/IR/GlobalValue.h +++ include/llvm/IR/GlobalValue.h @@ -114,7 +114,8 @@ GeneralDynamicTLSModel, LocalDynamicTLSModel, InitialExecTLSModel, - LocalExecTLSModel + LocalExecTLSModel, + EmulatedTLSModel }; ~GlobalValue() override { @@ -146,6 +147,8 @@ /// If the value is "Thread Local", its value isn't shared by the threads. bool isThreadLocal() const { return getThreadLocalMode() != NotThreadLocal; } + /// Set thread local model to GeneralDynamicTLSModel if the argument is true. + /// Use setThreadLocalMode for other thread local models. void setThreadLocal(bool Val) { setThreadLocalMode(Val ? GeneralDynamicTLSModel : NotThreadLocal); } Index: include/llvm/MC/MCObjectFileInfo.h =================================================================== --- include/llvm/MC/MCObjectFileInfo.h +++ include/llvm/MC/MCObjectFileInfo.h @@ -216,6 +216,7 @@ MCSection *getTextSection() const { return TextSection; } MCSection *getDataSection() const { return DataSection; } MCSection *getBSSSection() const { return BSSSection; } + MCSection *getReadOnlySection() const { return ReadOnlySection; } MCSection *getLSDASection() const { return LSDASection; } MCSection *getCompactUnwindSection() const { return CompactUnwindSection; } MCSection *getDwarfAbbrevSection() const { return DwarfAbbrevSection; } Index: include/llvm/Support/CodeGen.h =================================================================== --- include/llvm/Support/CodeGen.h +++ include/llvm/Support/CodeGen.h @@ -40,7 +40,8 @@ GeneralDynamic, LocalDynamic, InitialExec, - LocalExec + LocalExec, + Emulated }; } Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -2792,6 +2792,10 @@ virtual bool useLoadStackGuardNode() const { return false; } + + /// Lower TLS global address SDNode for target independent emulated TLS model. + virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, + SelectionDAG &DAG) const; }; /// Given an LLVM IR type and return type attributes, compute the return value Index: lib/AsmParser/LLLexer.cpp =================================================================== --- lib/AsmParser/LLLexer.cpp +++ lib/AsmParser/LLLexer.cpp @@ -520,6 +520,7 @@ KEYWORD(localdynamic); KEYWORD(initialexec); KEYWORD(localexec); + KEYWORD(emulated); KEYWORD(zeroinitializer); KEYWORD(undef); KEYWORD(null); Index: lib/AsmParser/LLParser.cpp =================================================================== --- lib/AsmParser/LLParser.cpp +++ lib/AsmParser/LLParser.cpp @@ -1152,10 +1152,11 @@ /// := 'localdynamic' /// := 'initialexec' /// := 'localexec' +/// := 'emulated' bool LLParser::ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM) { switch (Lex.getKind()) { default: - return TokError("expected localdynamic, initialexec or localexec"); + return TokError("expected localdynamic, initialexec, localexec, or emulated"); case lltok::kw_localdynamic: TLM = GlobalVariable::LocalDynamicTLSModel; break; @@ -1165,6 +1166,9 @@ case lltok::kw_localexec: TLM = GlobalVariable::LocalExecTLSModel; break; + case lltok::kw_emulated: + TLM = GlobalVariable::EmulatedTLSModel; + break; } Lex.Lex(); Index: lib/AsmParser/LLToken.h =================================================================== --- lib/AsmParser/LLToken.h +++ lib/AsmParser/LLToken.h @@ -47,7 +47,7 @@ kw_externally_initialized, kw_extern_weak, kw_external, kw_thread_local, - kw_localdynamic, kw_initialexec, kw_localexec, + kw_localdynamic, kw_initialexec, kw_localexec, kw_emulated, kw_zeroinitializer, kw_undef, kw_null, kw_to, Index: lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- lib/Bitcode/Reader/BitcodeReader.cpp +++ lib/Bitcode/Reader/BitcodeReader.cpp @@ -585,6 +585,7 @@ case 2: return GlobalVariable::LocalDynamicTLSModel; case 3: return GlobalVariable::InitialExecTLSModel; case 4: return GlobalVariable::LocalExecTLSModel; + case 5: return GlobalVariable::EmulatedTLSModel; } } Index: lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- lib/Bitcode/Writer/BitcodeWriter.cpp +++ lib/Bitcode/Writer/BitcodeWriter.cpp @@ -536,6 +536,7 @@ case GlobalVariable::LocalDynamicTLSModel: return 2; case GlobalVariable::InitialExecTLSModel: return 3; case GlobalVariable::LocalExecTLSModel: return 4; + case GlobalVariable::EmulatedTLSModel: return 5; } llvm_unreachable("Invalid TLS model"); } Index: lib/CodeGen/AsmPrinter/AsmPrinter.cpp =================================================================== --- lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -341,8 +341,23 @@ return TM.getSymbol(GV, *Mang); } +static MCSymbol *createEmuTLSControlSym(MCSymbol *GVSym, MCContext &C) { + return C.getOrCreateSymbol(Twine("__emutls_v.") + GVSym->getName()); +} + +static MCSymbol *createEmuTLSInitSym(MCSymbol *GVSym, MCContext &C) { + return C.getOrCreateSymbol(Twine("__emutls_t.") + GVSym->getName()); +} + /// EmitGlobalVariable - Emit the specified global variable to the .s file. void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { + bool IsEmuTLSVar = + GV->getThreadLocalMode() == llvm::GlobalVariable::EmulatedTLSModel; + assert((!IsEmuTLSVar || getObjFileLowering().getDataRelLocalSection()) && + "Need relocatable local section for emulated TLS variables"); + assert(!(IsEmuTLSVar && GV->hasCommonLinkage()) && + "No emulated TLS variables in the common section"); + if (GV->hasInitializer()) { // Check to see if this is a special global used by LLVM, if so, emit it. if (EmitSpecialLLVMGlobal(GV)) @@ -353,7 +368,9 @@ if (GlobalGOTEquivs.count(getSymbol(GV))) return; - if (isVerbose()) { + if (isVerbose() && !IsEmuTLSVar) { + // When printing the control variable __emutls_v.*, + // we don't need to print the original TLS variable name. GV->printAsOperand(OutStreamer->GetCommentOS(), /*PrintType=*/false, GV->getParent()); OutStreamer->GetCommentOS() << '\n'; @@ -361,8 +378,15 @@ } MCSymbol *GVSym = getSymbol(GV); - EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration()); + MCSymbol *EmittedSym = IsEmuTLSVar ? + createEmuTLSControlSym(GVSym, OutContext) : GVSym; + // createEmuTLSControlSym only creates the symbol with name and default attributes. + // GV's or GVSym's attributes will be used for the EmittedSym. + EmitVisibility(EmittedSym, GV->getVisibility(), !GV->isDeclaration()); + + // A declaration has no initializer and no control variable in the current + // module for the emulated TLS model. if (!GV->hasInitializer()) // External globals require no extra code. return; @@ -372,7 +396,7 @@ "' is already defined"); if (MAI->hasDotTypeDotSizeDirective()) - OutStreamer->EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject); + OutStreamer->EmitSymbolAttribute(EmittedSym, MCSA_ELF_TypeObject); SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); @@ -384,6 +408,44 @@ // sections and expected to be contiguous (e.g. ObjC metadata). unsigned AlignLog = getGVAlignmentLog2(GV, *DL); + bool AllZeroInitValue = false; + if (GV->hasInitializer()) { + const Constant *InitValue = GV->getInitializer(); + if (isa(InitValue)) + AllZeroInitValue = true; + else { + const ConstantInt *InitIntValue = dyn_cast(InitValue); + if (InitIntValue && InitIntValue->isZero()) + AllZeroInitValue = true; + } + } + if (IsEmuTLSVar) { + // If there is init value, use .data.rel.local section; + // otherwise use the .data section. + MCSection *TLSVarSection = const_cast( + (GV->hasInitializer() && !AllZeroInitValue) + ? getObjFileLowering().getDataRelLocalSection() + : getObjFileLowering().getDataSection()); + OutStreamer->SwitchSection(TLSVarSection); + EmitLinkage(GV, EmittedSym); // same linkage as GV + unsigned WordSize = DL->getPointerSize(); + unsigned Alignment = DL->getPointerABIAlignment(); + EmitAlignment(Log2_32(Alignment)); + OutStreamer->EmitLabel(EmittedSym); + OutStreamer->EmitIntValue(Size, WordSize); + OutStreamer->EmitIntValue((1 << AlignLog), WordSize); + OutStreamer->EmitIntValue(0, WordSize); + if (GV->hasInitializer() && !AllZeroInitValue) { + OutStreamer->EmitSymbolValue( + createEmuTLSInitSym(GVSym, OutContext), WordSize); + } else + OutStreamer->EmitIntValue(0, WordSize); + if (MAI->hasDotTypeDotSizeDirective()) + OutStreamer->emitELFSize(cast(EmittedSym), + MCConstantExpr::create(4 * WordSize, OutContext)); + OutStreamer->AddBlankLine(); // End of the __emutls_v.* variable. + } + for (const HandlerInfo &HI : Handlers) { NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled); HI.Handler->setSymbolSize(GVSym, Size); @@ -391,6 +453,8 @@ // Handle common and BSS local symbols (.lcomm). if (GVKind.isCommon() || GVKind.isBSSLocal()) { + assert(!(IsEmuTLSVar && GVKind.isCommon()) && + "No emulated TLS variables in the common section"); if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. unsigned Align = 1 << AlignLog; @@ -435,12 +499,21 @@ return; } - MCSection *TheSection = + if (IsEmuTLSVar && AllZeroInitValue) + return; // No need of initialization values. + + MCSymbol *EmittedInitSym = IsEmuTLSVar ? + createEmuTLSInitSym(GVSym, OutContext) : GVSym; + // createEmuTLSInitSym only creates the symbol with name and default attributes. + // GV's or GVSym's attributes will be used for the EmittedInitSym. + + MCSection *TheSection = IsEmuTLSVar ? + getObjFileLowering().getReadOnlySection() : getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM); // Handle the zerofill directive on darwin, which is a special form of BSS // emission. - if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective()) { + if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective() && !IsEmuTLSVar) { if (Size == 0) Size = 1; // zerofill of 0 bytes is undefined. // .globl _foo @@ -460,7 +533,7 @@ // TLOF class. This will also make it more obvious that stuff like // MCStreamer::EmitTBSSSymbol is macho specific and only called from macho // specific code. - if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) { + if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective() && !IsEmuTLSVar) { // Emit the .tbss symbol MCSymbol *MangSym = OutContext.getOrCreateSymbol(GVSym->getName() + Twine("$tlv$init")); @@ -503,16 +576,18 @@ OutStreamer->SwitchSection(TheSection); - EmitLinkage(GV, GVSym); + // emutls_t.* symbols are only used in the current compilation unit. + if (!IsEmuTLSVar) + EmitLinkage(GV, EmittedInitSym); EmitAlignment(AlignLog, GV); - OutStreamer->EmitLabel(GVSym); + OutStreamer->EmitLabel(EmittedInitSym); EmitGlobalConstant(GV->getInitializer()); if (MAI->hasDotTypeDotSizeDirective()) // .size foo, 42 - OutStreamer->emitELFSize(cast(GVSym), + OutStreamer->emitELFSize(cast(EmittedInitSym), MCConstantExpr::create(Size, OutContext)); OutStreamer->AddBlankLine(); Index: lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp =================================================================== --- lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -148,28 +148,33 @@ DIELoc *Loc = new (DIEValueAllocator) DIELoc; const MCSymbol *Sym = Asm->getSymbol(Global); if (Global->isThreadLocal()) { - // FIXME: Make this work with -gsplit-dwarf. - unsigned PointerSize = Asm->getDataLayout().getPointerSize(); - assert((PointerSize == 4 || PointerSize == 8) && - "Add support for other sizes if necessary"); - // Based on GCC's support for TLS: - if (!DD->useSplitDwarf()) { - // 1) Start with a constNu of the appropriate pointer size - addUInt(*Loc, dwarf::DW_FORM_data1, - PointerSize == 4 ? dwarf::DW_OP_const4u : dwarf::DW_OP_const8u); - // 2) containing the (relocated) offset of the TLS variable - // within the module's TLS block. - addExpr(*Loc, dwarf::DW_FORM_udata, - Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym)); + if (Global->getThreadLocalMode() == + llvm::GlobalValue::ThreadLocalMode::EmulatedTLSModel) { + // TODO: add debug info for emulated thread local mode. } else { - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); - addUInt(*Loc, dwarf::DW_FORM_udata, - DD->getAddressPool().getIndex(Sym, /* TLS */ true)); + // FIXME: Make this work with -gsplit-dwarf. + unsigned PointerSize = Asm->getDataLayout().getPointerSize(); + assert((PointerSize == 4 || PointerSize == 8) && + "Add support for other sizes if necessary"); + // Based on GCC's support for TLS: + if (!DD->useSplitDwarf()) { + // 1) Start with a constNu of the appropriate pointer size + addUInt(*Loc, dwarf::DW_FORM_data1, + PointerSize == 4 ? dwarf::DW_OP_const4u : dwarf::DW_OP_const8u); + // 2) containing the (relocated) offset of the TLS variable + // within the module's TLS block. + addExpr(*Loc, dwarf::DW_FORM_udata, + Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym)); + } else { + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); + addUInt(*Loc, dwarf::DW_FORM_udata, + DD->getAddressPool().getIndex(Sym, /* TLS */ true)); + } + // 3) followed by an OP to make the debugger do a TLS lookup. + addUInt(*Loc, dwarf::DW_FORM_data1, + DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address + : dwarf::DW_OP_form_tls_address); } - // 3) followed by an OP to make the debugger do a TLS lookup. - addUInt(*Loc, dwarf::DW_FORM_data1, - DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address - : dwarf::DW_OP_form_tls_address); } else { DD->addArangeLabel(SymbolCU(this, Sym)); addOpAddress(*Loc, Sym); Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2995,3 +2995,46 @@ DAG.getConstant(0, dl, NVT), Ret, ISD::SETLT); return true; } + +//===----------------------------------------------------------------------===// +// Implementation of Emulated TLS Model +//===----------------------------------------------------------------------===// + +SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, + SelectionDAG &DAG) const { + // Access to address of TLS varialbe xyz is lowered to a function call: + // __emutls_get_address( address of global variable named "__emutls_v.xyz" ) + EVT PtrVT = getPointerTy(); + PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext()); + SDLoc dl(GA); + + ArgListTy Args; + ArgListEntry Entry; + std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str(); + Module *VariableModule = const_cast(GA->getGlobal()->getParent()); + StringRef EmuTlsVarName(NameString); + GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName); + if (!EmuTlsVar) + EmuTlsVar = dyn_cast_or_null( + VariableModule->getOrInsertGlobal(EmuTlsVarName, VoidPtrType)); + Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT); + Entry.Ty = VoidPtrType; + Args.push_back(Entry); + + SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()); + CLI.setCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args), 0); + std::pair CallResult = LowerCallTo(CLI); + + // TLSADDR will be codegen'ed as call. Inform MFI that function has calls. + // At last for X86 targets, maybe good for other targets too? + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setAdjustsStack(true); // Is this only for X86 target? + MFI->setHasCalls(true); + + assert((GA->getOffset() == 0) && + "Emulated TLS must have zero offset in GlobalAddressSDNode"); + return CallResult.first; +} Index: lib/IR/AsmWriter.cpp =================================================================== --- lib/IR/AsmWriter.cpp +++ lib/IR/AsmWriter.cpp @@ -2296,6 +2296,9 @@ case GlobalVariable::LocalExecTLSModel: Out << "thread_local(localexec) "; break; + case GlobalVariable::EmulatedTLSModel: + Out << "thread_local(emulated) "; + break; } } Index: lib/IR/Core.cpp =================================================================== --- lib/IR/Core.cpp +++ lib/IR/Core.cpp @@ -1599,6 +1599,8 @@ return LLVMInitialExecTLSModel; case GlobalVariable::LocalExecTLSModel: return LLVMLocalExecTLSModel; + case GlobalVariable::EmulatedTLSModel: + return LLVMEmulatedTLSModel; } llvm_unreachable("Invalid GlobalVariable thread local mode"); @@ -1623,6 +1625,9 @@ case LLVMLocalExecTLSModel: GV->setThreadLocalMode(GlobalVariable::LocalExecTLSModel); break; + case LLVMEmulatedTLSModel: + GV->setThreadLocalMode(GlobalVariable::EmulatedTLSModel); + break; } } Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3154,6 +3154,10 @@ const GlobalAddressSDNode *GA = cast(Op); TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal()); + + if (Model == TLSModel::Emulated) + return LowerToTLSEmulatedModel(GA, DAG); + if (!EnableAArch64ELFLocalDynamicTLSGeneration) { if (Model == TLSModel::LocalDynamic) Model = TLSModel::GeneralDynamic; Index: lib/Target/AArch64/AArch64MCInstLower.cpp =================================================================== --- lib/Target/AArch64/AArch64MCInstLower.cpp +++ lib/Target/AArch64/AArch64MCInstLower.cpp @@ -112,6 +112,9 @@ case TLSModel::GeneralDynamic: RefFlags |= AArch64MCExpr::VK_TLSDESC; break; + case TLSModel::Emulated: + assert(false && "Unexpect TLSModel::Emulated"); + break; } } else { // No modifier means this is a generic reference, classified as absolute for Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -2568,6 +2568,8 @@ case TLSModel::InitialExec: case TLSModel::LocalExec: return LowerToTLSExecModels(GA, DAG, model); + case TLSModel::Emulated: + return LowerToTLSEmulatedModel(GA, DAG); } llvm_unreachable("bogus TLS model"); } Index: lib/Target/CppBackend/CPPBackend.cpp =================================================================== --- lib/Target/CppBackend/CPPBackend.cpp +++ lib/Target/CppBackend/CPPBackend.cpp @@ -353,6 +353,9 @@ case GlobalVariable::LocalExecTLSModel: Out << "GlobalVariable::LocalExecTLSModel"; break; + case GlobalVariable::EmulatedTLSModel: + Out << "GlobalVariable::EmulatedTLSModel"; + break; } } Index: lib/Target/Mips/MipsISelLowering.cpp =================================================================== --- lib/Target/Mips/MipsISelLowering.cpp +++ lib/Target/Mips/MipsISelLowering.cpp @@ -1727,6 +1727,10 @@ TLSModel::Model model = getTargetMachine().getTLSModel(GV); + if (model == TLSModel::Emulated) { + return LowerToTLSEmulatedModel(GA, DAG); + } + if (model == TLSModel::GeneralDynamic || model == TLSModel::LocalDynamic) { // General Dynamic and Local Dynamic TLS Model. unsigned Flag = (model == TLSModel::LocalDynamic) ? MipsII::MO_TLSLDM Index: lib/Target/PowerPC/PPCCTRLoops.cpp =================================================================== --- lib/Target/PowerPC/PPCCTRLoops.cpp +++ lib/Target/PowerPC/PPCCTRLoops.cpp @@ -206,7 +206,8 @@ if (!TM) return true; TLSModel::Model Model = TM->getTLSModel(GV); - return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic; + return Model == TLSModel::GeneralDynamic || + Model == TLSModel::LocalDynamic || Model == TLSModel::Emulated; } bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -1998,6 +1998,10 @@ TLSModel::Model Model = getTargetMachine().getTLSModel(GV); + if (Model == TLSModel::Emulated) { + return LowerToTLSEmulatedModel(GA, DAG); + } + if (Model == TLSModel::LocalExec) { SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_HA); Index: lib/Target/Sparc/SparcISelLowering.cpp =================================================================== --- lib/Target/Sparc/SparcISelLowering.cpp +++ lib/Target/Sparc/SparcISelLowering.cpp @@ -1875,6 +1875,10 @@ TLSModel::Model model = getTargetMachine().getTLSModel(GV); + if (model == TLSModel::Emulated) { + return LowerToTLSEmulatedModel(GA, DAG); + } + if (model == TLSModel::GeneralDynamic || model == TLSModel::LocalDynamic) { unsigned HiTF = ((model == TLSModel::GeneralDynamic) ? SparcMCExpr::VK_Sparc_TLS_GD_HI22 Index: lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.cpp +++ lib/Target/SystemZ/SystemZISelLowering.cpp @@ -2576,6 +2576,10 @@ false, false, false, 0); break; } + + case TLSModel::Emulated: { + return LowerToTLSEmulatedModel(Node, DAG); + } } // Add the base and offset together. Index: lib/Target/TargetMachine.cpp =================================================================== --- lib/Target/TargetMachine.cpp +++ lib/Target/TargetMachine.cpp @@ -102,6 +102,8 @@ return TLSModel::InitialExec; case GlobalVariable::LocalExecTLSModel: return TLSModel::LocalExec; + case GlobalVariable::EmulatedTLSModel: + return TLSModel::Emulated; } llvm_unreachable("invalid TLS model"); } @@ -130,7 +132,9 @@ // If the user specified a more specific model, use that. TLSModel::Model SelectedModel = getSelectedTLSModel(GV); - if (SelectedModel > Model) + // If selected TLSModel::Emulated should be used since + // the target does not support other models. + if (SelectedModel > Model || SelectedModel == TLSModel::Emulated) return SelectedModel; return Model; Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -11471,6 +11471,8 @@ return LowerToTLSExecModel( GA, DAG, getPointerTy(), model, Subtarget->is64Bit(), DAG.getTarget().getRelocationModel() == Reloc::PIC_); + case TLSModel::Emulated: + return LowerToTLSEmulatedModel(GA, DAG); } llvm_unreachable("Unknown TLS model."); } Index: test/Assembler/tls-models.ll =================================================================== --- test/Assembler/tls-models.ll +++ test/Assembler/tls-models.ll @@ -5,8 +5,10 @@ ; CHECK: @b = thread_local(localdynamic) global i32 0 ; CHECK: @c = thread_local(initialexec) global i32 0 ; CHECK: @d = thread_local(localexec) global i32 0 +; CHECK: @e = thread_local(emulated) global i32 0 @a = thread_local global i32 0 @b = thread_local(localdynamic) global i32 0 @c = thread_local(initialexec) global i32 0 @d = thread_local(localexec) global i32 0 +@e = thread_local(emulated) global i32 0 Index: test/CodeGen/AArch64/arm64-tls-dynamic-together.ll =================================================================== --- test/CodeGen/AArch64/arm64-tls-dynamic-together.ll +++ test/CodeGen/AArch64/arm64-tls-dynamic-together.ll @@ -15,4 +15,47 @@ ; CHECK: .tlsdesccall general_dynamic_var ; CHECK-NEXT: blr {{x[0-9]+}} +; CHECK-NOT: __emutls_v.general_dynamic_var: +} + +@emulated_var = external thread_local(emulated) global i32 + +define i32 @test_emulated() { +; CHECK-LABEL: test_emulated: + + %val = load i32, i32* @emulated_var + ret i32 %val + +; CHECK: adrp{{.+}}__emutls_v.emulated_var +; CHECK: bl __emutls_get_address + +; CHECK-NOT: __emutls_v.general_dynamic_var: +; CHECK-NOT: __emutls_v.emulated_var +; CHECK-NOT: __emutls_t.emulated_var +} + +@emulated_init_var = thread_local(emulated) global i32 37, align 8 + +define i32 @test_emulated_init() { +; CHECK-LABEL: test_emulated_init: + + %val = load i32, i32* @emulated_init_var + ret i32 %val + +; CHECK: adrp{{.+}}__emutls_v.emulated_init_var +; CHECK: bl __emutls_get_address + +; CHECK-NOT: __emutls_v.general_dynamic_var: + +; CHECK: .align 3 +; CHECK-LABEL: __emutls_v.emulated_init_var: +; CHECK-NEXT: .xword 4 +; CHECK-NEXT: .xword 8 +; CHECK-NEXT: .xword 0 +; CHECK-NEXT: .xword __emutls_t.emulated_init_var + +; CHECK-LABEL: __emutls_t.emulated_init_var: +; CHECK-NEXT: .word 37 + +; CHECK-NOT: __emutls_v.general_dynamic_var: } Index: test/CodeGen/AArch64/emutls.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/emutls.ll @@ -0,0 +1,366 @@ +; RUN: llc -mtriple=arm-linux-android -relocation-model=pic < %s | FileCheck -check-prefix=ARM32 %s +; RUN: llc -mtriple=aarch64-linux-android -relocation-model=pic < %s | FileCheck -check-prefix=ARM64 %s + +; Copied from X86/emutls.ll + +; Use my_emutls_get_address like __emutls_get_address. +@my_emutls_v_xyz = external global i8*, align 4 +declare i8* @my_emutls_get_address(i8*) + +define i32 @my_get_xyz() { +; ARM32-LABEL: my_get_xyz: +; ARM32: ldr r0, +; ARM32-NEXT: ldr r1, +; ARM32: add r0, pc, r0 +; ARM32-NEXT: ldr r0, [r1, r0] +; ARM32-NEXT: bl my_emutls_get_address(PLT) +; ARM32-NEXT: ldr r0, [r0] +; ARM64-LABEL: my_get_xyz: +; ARM64: adrp x0, :got:my_emutls_v_xyz +; ARM64-NEXT: ldr x0, [x0, :got_lo12:my_emutls_v_xyz] +; ARM64-NEXT: bl my_emutls_get_address +; ARM64-NEXT: ldr w0, [x0] +; ARM64-NEXT: ldp x29, x30, [sp] + +entry: + %call = call i8* @my_emutls_get_address(i8* bitcast (i8** @my_emutls_v_xyz to i8*)) + %0 = bitcast i8* %call to i32* + %1 = load i32, i32* %0, align 4 + ret i32 %1 +} + +@i1 = thread_local(emulated) global i32 15 +@i2 = external thread_local(emulated) global i32 +@i3 = internal thread_local(emulated) global i32 15 +@i4 = hidden thread_local(emulated) global i32 15 +@i5 = external hidden thread_local(emulated) global i32 +@s1 = thread_local(emulated) global i16 15 +@b1 = thread_local(emulated) global i8 0 + +define i32 @f1() { +; ARM32-LABEL: f1: +; ARM32: ldr r0, +; ARM32-NEXT: ldr r1, +; ARM32: add r0, pc, r0 +; ARM32-NEXT: ldr r0, [r1, r0] +; ARM32-NEXT: bl __emutls_get_address(PLT) +; ARM32-NEXT: ldr r0, [r0] +; ARM64-LABEL: f1: +; ARM64: adrp x0, :got:__emutls_v.i1 +; ARM64-NEXT: ldr x0, [x0, :got_lo12:__emutls_v.i1] +; ARM64-NEXT: bl __emutls_get_address +; ARM64-NEXT: ldr w0, [x0] +; ARM64-NEXT: ldp x29, x30, [sp] + +entry: + %tmp1 = load i32, i32* @i1 + ret i32 %tmp1 +} + +define i32* @f2() { +; ARM32-LABEL: f2: +; ARM32: ldr r0, +; ARM32-NEXT: ldr r1, +; ARM32: add r0, pc, r0 +; ARM32-NEXT: ldr r0, [r1, r0] +; ARM32-NEXT: bl __emutls_get_address(PLT) +; ARM32-NEXT: pop +; ARM64-LABEL: f2: +; ARM64: adrp x0, :got:__emutls_v.i1 +; ARM64-NEXT: ldr x0, [x0, :got_lo12:__emutls_v.i1] +; ARM64-NEXT: bl __emutls_get_address +; ARM64-NEXT: ldp x29, x30, [sp] + +entry: + ret i32* @i1 +} + +define i32 @f3() nounwind { +; ARM32-LABEL: f3: +; ARM32: ldr r0, +; ARM32-NEXT: ldr r1, +; ARM32: add r0, pc, r0 +; ARM32-NEXT: ldr r0, [r1, r0] +; ARM32-NEXT: bl __emutls_get_address(PLT) +; ARM32-NEXT: ldr r0, [r0] + +entry: + %tmp1 = load i32, i32* @i2 + ret i32 %tmp1 +} + +define i32* @f4() { +; ARM32-LABEL: f4: +; ARM32: ldr r0, +; ARM32-NEXT: ldr r1, +; ARM32: add r0, pc, r0 +; ARM32-NEXT: ldr r0, [r1, r0] +; ARM32-NEXT: bl __emutls_get_address(PLT) +; ARM32-NEXT: pop + +entry: + ret i32* @i2 +} + +define i32 @f5() nounwind { +; ARM32-LABEL: f5: +; ARM32: ldr r0, +; ARM32-NEXT: ldr r1, +; ARM32: add r0, pc, r0 +; ARM32-NEXT: ldr r0, [r1, r0] +; ARM32-NEXT: bl __emutls_get_address(PLT) +; ARM32-NEXT: ldr r0, [r0] + +entry: + %tmp1 = load i32, i32* @i3 + ret i32 %tmp1 +} + +define i32* @f6() { +; ARM32-LABEL: f6: +; ARM32: ldr r0, +; ARM32-NEXT: ldr r1, +; ARM32: add r0, pc, r0 +; ARM32-NEXT: ldr r0, [r1, r0] +; ARM32-NEXT: bl __emutls_get_address(PLT) +; ARM32-NEXT: pop + +entry: + ret i32* @i3 +} + +define i32 @f7() { +; ARM32-LABEL: f7: +; ARM32: ldr r0, +; ARM32-NEXT: ldr r1, +; ARM32: add r0, pc, r0 +; ARM32-NEXT: ldr r0, [r1, r0] +; ARM32-NEXT: bl __emutls_get_address(PLT) +; ARM32-NEXT: ldr r0, [r0] + +entry: + %tmp1 = load i32, i32* @i4 + ret i32 %tmp1 +} + +define i32* @f8() { +; ARM32-LABEL: f8: +; ARM32: ldr r0, +; ARM32-NEXT: ldr r1, +; ARM32: add r0, pc, r0 +; ARM32-NEXT: ldr r0, [r1, r0] +; ARM32-NEXT: bl __emutls_get_address(PLT) +; ARM32-NEXT: pop + +entry: + ret i32* @i4 +} + +define i32 @f9() { +; ARM32-LABEL: f9: +; ARM32: ldr r0, +; ARM32-NEXT: ldr r1, +; ARM32: add r0, pc, r0 +; ARM32-NEXT: ldr r0, [r1, r0] +; ARM32-NEXT: bl __emutls_get_address(PLT) +; ARM32-NEXT: ldr r0, [r0] + +entry: + %tmp1 = load i32, i32* @i5 + ret i32 %tmp1 +} + +define i32* @f10() { +; ARM32-LABEL: f10: +; ARM32: ldr r0, +; ARM32-NEXT: ldr r1, +; ARM32: add r0, pc, r0 +; ARM32-NEXT: ldr r0, [r1, r0] +; ARM32-NEXT: bl __emutls_get_address(PLT) +; ARM32-NEXT: pop + +entry: + ret i32* @i5 +} + +define i16 @f11() { +; ARM32-LABEL: f11: +; ARM32: ldr r0, +; ARM32-NEXT: ldr r1, +; ARM32: add r0, pc, r0 +; ARM32-NEXT: ldr r0, [r1, r0] +; ARM32-NEXT: bl __emutls_get_address(PLT) +; ARM32-NEXT: ldrh r0, [r0] + +entry: + %tmp1 = load i16, i16* @s1 + ret i16 %tmp1 +} + +define i32 @f12() { +; ARM32-LABEL: f12: +; ARM32: ldr r0, +; ARM32-NEXT: ldr r1, +; ARM32: add r0, pc, r0 +; ARM32-NEXT: ldr r0, [r1, r0] +; ARM32-NEXT: bl __emutls_get_address(PLT) +; ARM32-NEXT: ldrsh r0, [r0] + +entry: + %tmp1 = load i16, i16* @s1 + %tmp2 = sext i16 %tmp1 to i32 + ret i32 %tmp2 +} + +define i8 @f13() { +; ARM32-LABEL: f13: +; ARM32: ldr r0, +; ARM32-NEXT: ldr r1, +; ARM32: add r0, pc, r0 +; ARM32-NEXT: ldr r0, [r1, r0] +; ARM32-NEXT: bl __emutls_get_address(PLT) +; ARM32-NEXT: ldrb r0, [r0] +; ARM32-NEXT: pop + +entry: + %tmp1 = load i8, i8* @b1 + ret i8 %tmp1 +} + +define i32 @f14() { +; ARM32-LABEL: f14: +; ARM32: ldr r0, +; ARM32-NEXT: ldr r1, +; ARM32: add r0, pc, r0 +; ARM32-NEXT: ldr r0, [r1, r0] +; ARM32-NEXT: bl __emutls_get_address(PLT) +; ARM32-NEXT: ldrsb r0, [r0] +; ARM32-NEXT: pop + +entry: + %tmp1 = load i8, i8* @b1 + %tmp2 = sext i8 %tmp1 to i32 + ret i32 %tmp2 +} + +;;;;;;;;;;;;;; 32-bit __emutls_v. and __emutls_t. + +; ARM32 .section .data.rel.local, +; ARM32-LABEL: __emutls_v.i1: +; ARM32-NEXT: .long 4 +; ARM32-NEXT: .long 4 +; ARM32-NEXT: .long 0 +; ARM32-NEXT: .long __emutls_t.i1 + +; ARM32 .section .rodata, +; ARM32-LABEL: __emutls_t.i1: +; ARM32-NEXT: .long 15 + +; ARM32-NOT: __emutls_v.i2 + +; ARM32 .section .data.rel.local, +; ARM32-LABEL: __emutls_v.i3: +; ARM32-NEXT: .long 4 +; ARM32-NEXT: .long 4 +; ARM32-NEXT: .long 0 +; ARM32-NEXT: .long __emutls_t.i3 + +; ARM32 .section .rodata, +; ARM32-LABEL: __emutls_t.i3: +; ARM32-NEXT: .long 15 + +; ARM32 .section .data.rel.local, +; ARM32-LABEL: __emutls_v.i4: +; ARM32-NEXT: .long 4 +; ARM32-NEXT: .long 4 +; ARM32-NEXT: .long 0 +; ARM32-NEXT: .long __emutls_t.i4 + +; ARM32 .section .rodata, +; ARM32-LABEL: __emutls_t.i4: +; ARM32-NEXT: .long 15 + +; ARM32-NOT: __emutls_v.i5: +; ARM32 .hidden __emutls_v.i5 +; ARM32-NOT: __emutls_v.i5: + +; ARM32 .section .data.rel.local, +; ARM32-LABEL: __emutls_v.s1: +; ARM32-NEXT: .long 2 +; ARM32-NEXT: .long 2 +; ARM32-NEXT: .long 0 +; ARM32-NEXT: .long __emutls_t.s1 + +; ARM32 .section .rodata, +; ARM32-LABEL: __emutls_t.s1: +; ARM32-NEXT: .short 15 + +; ARM32 .section .data.rel.local, +; ARM32-LABEL: __emutls_v.b1: +; ARM32-NEXT: .long 1 +; ARM32-NEXT: .long 1 +; ARM32-NEXT: .long 0 +; ARM32-NEXT: .long 0 + +; ARM32-NOT: __emutls_t.b1 + +;;;;;;;;;;;;;; 64-bit __emutls_v. and __emutls_t. + +; ARM64 .section .data.rel.local, +; ARM64-LABEL: __emutls_v.i1: +; ARM64-NEXT: .xword 4 +; ARM64-NEXT: .xword 4 +; ARM64-NEXT: .xword 0 +; ARM64-NEXT: .xword __emutls_t.i1 + +; ARM64 .section .rodata, +; ARM64-LABEL: __emutls_t.i1: +; ARM64-NEXT: .word 15 + +; ARM64-NOT: __emutls_v.i2 + +; ARM64 .section .data.rel.local, +; ARM64-LABEL: __emutls_v.i3: +; ARM64-NEXT: .xword 4 +; ARM64-NEXT: .xword 4 +; ARM64-NEXT: .xword 0 +; ARM64-NEXT: .xword __emutls_t.i3 + +; ARM64 .section .rodata, +; ARM64-LABEL: __emutls_t.i3: +; ARM64-NEXT: .word 15 + +; ARM64 .section .data.rel.local, +; ARM64-LABEL: __emutls_v.i4: +; ARM64-NEXT: .xword 4 +; ARM64-NEXT: .xword 4 +; ARM64-NEXT: .xword 0 +; ARM64-NEXT: .xword __emutls_t.i4 + +; ARM64 .section .rodata, +; ARM64-LABEL: __emutls_t.i4: +; ARM64-NEXT: .word 15 + +; ARM64-NOT: __emutls_v.i5: +; ARM64 .hidden __emutls_v.i5 +; ARM64-NOT: __emutls_v.i5: + +; ARM64 .section .data.rel.local, +; ARM64-LABEL: __emutls_v.s1: +; ARM64-NEXT: .xword 2 +; ARM64-NEXT: .xword 2 +; ARM64-NEXT: .xword 0 +; ARM64-NEXT: .xword __emutls_t.s1 + +; ARM64 .section .rodata, +; ARM64-LABEL: __emutls_t.s1: +; ARM64-NEXT: .hword 15 + +; ARM64 .section .data.rel.local, +; ARM64-LABEL: __emutls_v.b1: +; ARM64-NEXT: .xword 1 +; ARM64-NEXT: .xword 1 +; ARM64-NEXT: .xword 0 +; ARM64-NEXT: .xword 0 + +; ARM64-NOT: __emutls_t.b1 Index: test/CodeGen/ARM/emutls1.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/emutls1.ll @@ -0,0 +1,30 @@ +; RUN: llc < %s -march=arm -mtriple=arm-linux-androideabi | FileCheck %s +; RUN: llc < %s -march=arm -mtriple=arm-linux-androideabi -relocation-model=pic | \ +; RUN: FileCheck %s --check-prefix=PIC + +; Compared with tls1.ll, emulated mode should not use __aeabi_read_tp or __tls_get_addr. + +; CHECK-NOT: _aeabi_read_tp +; CHECK-NOT: _tls_get_addr +; CHECK: __emutls_get_addr +; CHECK-NOT: __aeabi_read_tp +; CHECK-NOT: _tls_get_addr + +; PIC-NOT: _aeabi_read_tp +; PIC-NOT: _tls_get_addr +; PIC: __emutls_get_addr +; PIC-NOT: _aeabi_read_tp +; PIC-NOT: _tls_get_addr + +@i = thread_local(emulated) global i32 15 ; [#uses=2] + +define i32 @f() { +entry: + %tmp1 = load i32, i32* @i ; [#uses=1] + ret i32 %tmp1 +} + +define i32* @g() { +entry: + ret i32* @i +} Index: test/CodeGen/ARM/tls-models.ll =================================================================== --- test/CodeGen/ARM/tls-models.ll +++ test/CodeGen/ARM/tls-models.ll @@ -14,6 +14,9 @@ @external_le = external thread_local(localexec) global i32 @internal_le = internal thread_local(localexec) global i32 42 +@external_em = external thread_local(emulated) global i32 +@internal_em = internal thread_local(emulated) global i8 42 + ; ----- no model specified ----- define i32* @f1() { @@ -115,3 +118,65 @@ ; CHECK-PIC-LABEL: f8: ; CHECK-PIC: internal_le(TPOFF) } + + +; ----- emulated specified ----- + +define i32* @f9() { +entry: + ret i32* @external_em + + ; Non-PIC and PIC code will use emulated TLS as specified. + ; CHECK-NONPIC-LABEL: f9: + ; CHECK-NONPIC: __emutls_get_address + ; CHECK-NONPIC: .long __emutls_v.external_em + ; CHECK-NONPIC: .size f9, + ; CHECK-PIC-LABEL: f9: + ; CHECK-PIC: __emutls_get_address + ; CHECK-PIC: .long __emutls_v.external_em + ; CHECK-PIC: .size f9, +} + +define i8* @f10() { +entry: + ret i8* @internal_em + + ; Non-PIC and PIC code will use emulated TLS as specified. + ; CHECK-NONPIC-LABEL: f10: + ; CHECK-NONPIC: __emutls_get_address + ; CHECK-NONPIC: .long __emutls_v.internal_em + ; CHECK-NONPIC: .size f10, + ; CHECK-PIC-LABEL: f10: + ; CHECK-PIC: __emutls_get_address + ; CHECK-PIC: .long __emutls_v.internal_em + ; CHECK-PIC: .size f10, +} + +; External declaration has no initializer. +; Internal definition has initializer. + +; CHECK-NONPIC-NOT: __emutls_t.external_em +; CHECK-NONPIC-NOT: __emutls_v.external_em +; CHECK-NONPIC: .align 2 +; CHECK-NONPIC: __emutls_v.internal_em: +; CHECK-NONPIC-NEXT: .long 1 +; CHECK-NONPIC-NEXT: .long 1 +; CHECK-NONPIC-NEXT: .long 0 +; CHECK-NONPIC-NEXT: .long __emutls_t.internal_em +; CHECK-NONPIC: __emutls_t.internal_em: +; CHECK-NONPIC-NEXT: .byte 42 +; CHECK-NONPIC-NOT: __emutls_t.external_em + +; __emutls_t and __emutls_v are the same for PIC and non-PIC modes. + +; CHECK-PIC-NOT: __emutls_t.external_em +; CHECK-PIC-NOT: __emutls_v.external_em +; CHECK-PIC: .align 2 +; CHECK-PIC: __emutls_v.internal_em: +; CHECK-PIC-NEXT: .long 1 +; CHECK-PIC-NEXT: .long 1 +; CHECK-PIC-NEXT: .long 0 +; CHECK-PIC-NEXT: .long __emutls_t.internal_em +; CHECK-PIC: __emutls_t.internal_em: +; CHECK-PIC-NEXT: .byte 42 +; CHECK-PIC-NOT: __emutls_t.external_em Index: test/CodeGen/ARM/tls3.ll =================================================================== --- test/CodeGen/ARM/tls3.ll +++ test/CodeGen/ARM/tls3.ll @@ -1,11 +1,44 @@ ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \ ; RUN: grep "tbss" +; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | FileCheck %s %struct.anon = type { i32, i32 } -@teste = internal thread_local global %struct.anon zeroinitializer ; <%struct.anon*> [#uses=1] +@teste = internal thread_local global %struct.anon zeroinitializer ; <%struct.anon*> [#uses=1] define i32 @main() { entry: - %tmp2 = load i32, i32* getelementptr (%struct.anon, %struct.anon* @teste, i32 0, i32 0), align 8 ; [#uses=1] - ret i32 %tmp2 + %tmp2 = load i32, i32* getelementptr (%struct.anon, %struct.anon* @teste, i32 0, i32 0), align 8 ; [#uses=1] + ret i32 %tmp2 } + +@testx = internal thread_local(emulated) global %struct.anon zeroinitializer ; <%struct.anon*> [#uses=1] + +define i32 @foo() { +entry: + %tmp2 = load i32, i32* getelementptr (%struct.anon, %struct.anon* @testx, i32 0, i32 0), align 8 ; [#uses=1] + ret i32 %tmp2 +} + +; CHECK-LABEL: main: +; CHECK-NOT: __emutls_get_address +; CHECK-LABEL: foo: +; CHECK: __emutls_get_address + +; CHECK-NOT: testx: + +; CHECK: .section .tbss +; CHECK-LABEL: teste: +; CHECK-NEXT: .zero 8 + +; CHECK-NOT: testx: +; CHECK-NOT: __emutls_t.testx + +; CHECK: .align 2 +; CHECK-LABEL: __emutls_v.testx: +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long 0 + +; CHECK-NOT: testx: +; CHECK-NOT: __emutls_t.testx Index: test/CodeGen/Generic/emutls.ll =================================================================== --- /dev/null +++ test/CodeGen/Generic/emutls.ll @@ -0,0 +1,283 @@ +; RUN: llc < %s -mtriple=arm-linux-android -relocation-model=pic | FileCheck -check-prefix=ARM_32 %s +; RUN: llc < %s -mtriple=arm-linux-androidabi -relocation-model=pic | FileCheck -check-prefix=ARM_32 %s +; RUN: llc < %s -mtriple=aarch64-linux-android -relocation-model=pic | FileCheck -check-prefix=ARM_64 %s +; RUN: llc < %s -mtriple=arm-linux-androidabi -relocation-model=pic -O3 | FileCheck -check-prefix=ARM_32 %s +; RUN: llc < %s -mtriple=aarch64-linux-android -relocation-model=pic -O3 | FileCheck -check-prefix=ARM_64 %s +; RUN: llc < %s -mtriple=arm-linux-androidabi -O3 | FileCheck -check-prefix=ARM_32 %s +; RUN: llc < %s -mtriple=aarch64-linux-android -O3 | FileCheck -check-prefix=ARM_64 %s +; RUN: llc < %s -mtriple=i686-linux-android -relocation-model=pic | FileCheck -check-prefix=X86_32 %s +; RUN: llc < %s -mtriple=x86_64-linux-android -march=x86 -relocation-model=pic | FileCheck -check-prefix=X86_32 %s +; RUN: llc < %s -mtriple=x86_64-linux-android -relocation-model=pic | FileCheck -check-prefix=X86_64 %s +; RUN: llc < %s -mtriple=mipsel-linux-android -relocation-model=pic | FileCheck -check-prefix=MIPS_32 %s +; RUN: llc < %s -mtriple=mips64el-linux-android -relocation-model=pic | FileCheck -check-prefix=MIPS_64 %s +; RUN: llc < %s -march=ppc64 -relocation-model=pic | FileCheck %s +; RUN: llc < %s -march=ppc32 -relocation-model=pic | FileCheck %s +; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -relocation-model=pic | FileCheck %s + +; Make sure that TLS symbols are emitted in expected order. + +@external_x = external thread_local(emulated) global i32, align 8 +@external_y = thread_local(emulated) global i8 7, align 2 +@internal_y = internal thread_local(emulated) global i64 9, align 16 + +define i32* @get_external_x() { +entry: + ret i32* @external_x +} + +define i8* @get_external_y() { +entry: + ret i8* @external_y +} + +define i64* @get_internal_y() { +entry: + ret i64* @internal_y +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; targets independent mode +; CHECK-LABEL: get_external_x: +; CHECK-NOT: _tls_get_address +; CHECK: __emutls_get_address +; CHECK-LABEL: get_external_y: +; CHECK: __emutls_get_address +; CHECK-NOT: _tls_get_address +; CHECK-LABEL: get_internal_y: + +; CHECK-NOT: __emutls_t.external_x: +; CHECK-NOT: __emutls_v.external_x: + +; CHECK-LABEL: __emutls_v.external_y: +; CHECK-LABEL: __emutls_t.external_y: +; CHECK: __emutls_t.external_y + +; CHECK-LABEL: __emutls_v.internal_y: +; CHECK-LABEL: __emutls_t.internal_y: +; CHECK: __emutls_t.internal_y + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 32-bit mode +; ARM_32-LABEL: get_external_x: +; X86_32-LABEL: get_external_x: +; MIPS-LABEL: get_external_x: + +; ARM_32: bl __emutls_get_address +; ARM_32: .long __emutls_v.external_x + +; X86_32: movl __emutls_v.external_x +; X86_32: calll __emutls_get_address + +; ARM_32-LABEL: get_external_y: +; X86_32-LABEL: get_external_y: +; MIPS_32-LABEL: get_external_y: + +; ARM_32: bl __emutls_get_address +; ARM_32: .long __emutls_v.external_y + +; X86_32: movl __emutls_v.external_y +; X86_32: calll __emutls_get_address + +; ARM_32-LABEL: get_internal_y: +; X86_32-LABEL: get_internal_y: +; MIPS_32-LABEL: get_internal_y: + +; ARM_32: bl __emutls_get_address +; ARM_32: .long __emutls_v.internal_y + +; X86_32: movl __emutls_v.internal_y +; X86_32: calll __emutls_get_address + +; MIPS_32: lw {{.+}}(__emutls_v.internal_y +; MIPS_32: lw {{.+}}call16(__emutls_get_address + +; ARM_32-NOT: __emutls_t.external_x +; X86_32-NOT: __emutls_t.external_x +; MIPS_32-NOT: __emutls_t.external_x + +; ARM_32-NOT: __emutls_v.external_x: +; X86_32-NOT: __emutls_v.external_x: +; MIPS_32-NOT: __emutls_v.external_x: + +; ARM_32: .section .data.rel.local +; X86_32: .section .data.rel.local +; MIPS_32: .section .data.rel.local + +; ARM_32: .align 2 +; X86_32: .align 4 +; MIPS_32: .align 2 + +; ARM_32-LABEL: __emutls_v.external_y: +; X86_32-LABEL: __emutls_v.external_y: +; MIPS_32-LABEL: __emutls_v.external_y: + +; ARM_32-NEXT: .long 1 +; ARM_32-NEXT: .long 2 +; ARM_32-NEXT: .long 0 +; ARM_32-NEXT: .long __emutls_t.external_y + +; X86_32-NEXT: .long 1 +; X86_32-NEXT: .long 2 +; X86_32-NEXT: .long 0 +; X86_32-NEXT: .long __emutls_t.external_y + +; ARM_32: .section .rodata, +; X86_32: .section .rodata, +; MIPS_32: .section .rodata, + +; ARM_32-LABEL: __emutls_t.external_y: +; X86_32-LABEL: __emutls_t.external_y: +; MIPS_32-LABEL: __emutls_t.external_y: + +; ARM_32-NEXT: .byte 7 +; X86_32-NEXT: .byte 7 +; MIPS_32-NEXT: .byte 7 + +; ARM_32: .section .data.rel.local +; X86_32: .section .data.rel.local +; MIPS_32: .section .data.rel.local + +; ARM_32: .align 2 +; X86_32: .align 4 +; MIPS_32: .align 2 + +; ARM_32-LABEL: __emutls_v.internal_y: +; X86_32-LABEL: __emutls_v.internal_y: +; MIPS_32-LABEL: __emutls_v.internal_y: + +; ARM_32-NEXT: .long 8 +; ARM_32-NEXT: .long 16 +; ARM_32-NEXT: .long 0 +; ARM_32-NEXT: .long __emutls_t.internal_y + +; X86_32-NEXT: .long 8 +; X86_32-NEXT: .long 16 +; X86_32-NEXT: .long 0 +; X86_32-NEXT: .long __emutls_t.internal_y + +; MIPS_32-NEXT: .4byte 8 +; MIPS_32-NEXT: .4byte 16 +; MIPS_32-NEXT: .4byte 0 +; MIPS_32-NEXT: .4byte __emutls_t.internal_y + +; ARM_32-LABEL: __emutls_t.internal_y: +; X86_32-LABEL: __emutls_t.internal_y: +; MIPS_32-LABEL: __emutls_t.internal_y: + +; ARM_32-NEXT: .long 9 +; ARM_32-NEXT: .long 0 +; X86_32-NEXT: .quad 9 +; MIPS_32-NEXT: .8byte 9 + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 64-bit mode +; X86_64-LABEL: get_external_x: +; ARM_64-LABEL: get_external_x: +; MIPS_64-LABEL: get_external_x: + +; X86_64: __emutls_v.external_x +; X86_64: __emutls_get_address + +; ARM_64: __emutls_v.external_x +; ARM_64: __emutls_get_address + +; X86_64-LABEL: get_external_y: +; ARM_64-LABEL: get_external_y: +; MIPS_64-LABEL: get_external_y: + +; X86_64: __emutls_v.external_y +; X86_64: __emutls_get_address + +; ARM_64: __emutls_v.external_y +; ARM_64: __emutls_get_address + +; X86_64-LABEL: get_internal_y: +; ARM_64-LABEL: get_internal_y: +; MIPS_64-LABEL: get_internal_y: + +; X86_64: __emutls_v.internal_y +; X86_64: __emutls_get_address + +; ARM_64: __emutls_v.internal_y +; ARM_64: __emutls_get_address + +; MIPS_64: ld {{.+}}(__emutls_v.internal_y +; MIPS_64: ld {{.+}}call16(__emutls_get_address + +; ARM_64-NOT: __emutls_t.external_x +; X86_64-NOT: __emutls_t.external_x +; MIPS_64-NOT: __emutls_t.external_x + +; X86_64-NOT: __emutls_v.external_x: +; ARM_64-NOT: __emutls_v.external_x: +; MIPS_64-NOT: __emutls_v.external_x: + +; X86_64: .align 8 +; ARM_64: .align 3 + +; X86_64-LABEL: __emutls_v.external_y: +; ARM_64-LABEL: __emutls_v.external_y: +; MIPS_64-LABEL: __emutls_v.external_y: + +; X86_64-NEXT: .quad 1 +; X86_64-NEXT: .quad 2 +; X86_64-NEXT: .quad 0 +; X86_64-NEXT: .quad __emutls_t.external_y + +; ARM_64-NEXT: .xword 1 +; ARM_64-NEXT: .xword 2 +; ARM_64-NEXT: .xword 0 +; ARM_64-NEXT: .xword __emutls_t.external_y + +; X86_64-NOT: __emutls_v.external_x: +; ARM_64-NOT: __emutls_v.external_x: +; MIPS_64-NOT: __emutls_v.external_x: + +; ARM_64: .section .rodata, +; X86_64: .section .rodata, +; MIPS_64: .section .rodata, + +; X86_64-LABEL: __emutls_t.external_y: +; ARM_64-LABEL: __emutls_t.external_y: +; MIPS_64-LABEL: __emutls_t.external_y: + +; X86_64-NEXT: .byte 7 +; ARM_64-NEXT: .byte 7 +; MIPS_64-NEXT: .byte 7 + +; ARM_64: .section .data.rel.local +; X86_64: .section .data.rel.local +; MIPS_64: .section .data.rel.local + +; X86_64: .align 8 +; ARM_64: .align 3 +; MIPS_64: .align 3 + +; X86_64-LABEL: __emutls_v.internal_y: +; ARM_64-LABEL: __emutls_v.internal_y: +; MIPS_64-LABEL: __emutls_v.internal_y: + +; X86_64-NEXT: .quad 8 +; X86_64-NEXT: .quad 16 +; X86_64-NEXT: .quad 0 +; X86_64-NEXT: .quad __emutls_t.internal_y + +; ARM_64-NEXT: .xword 8 +; ARM_64-NEXT: .xword 16 +; ARM_64-NEXT: .xword 0 +; ARM_64-NEXT: .xword __emutls_t.internal_y + +; MIPS_64-NEXT: .8byte 8 +; MIPS_64-NEXT: .8byte 16 +; MIPS_64-NEXT: .8byte 0 +; MIPS_64-NEXT: .8byte __emutls_t.internal_y + +; ARM_64: .section .rodata, +; X86_64: .section .rodata, +; MIPS_64: .section .rodata, + +; X86_64-LABEL: __emutls_t.internal_y: +; ARM_64-LABEL: __emutls_t.internal_y: +; MIPS_64-LABEL: __emutls_t.internal_y: + +; X86_64-NEXT: .quad 9 +; ARM_64-NEXT: .xword 9 +; MIPS_64-NEXT: .8byte 9 Index: test/CodeGen/X86/emutls-pic.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/emutls-pic.ll @@ -0,0 +1,168 @@ +; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X32 %s +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X64 %s +; RUN: llc < %s -march=x86 -mtriple=i386-linux-android -relocation-model=pic | FileCheck -check-prefix=X32 %s +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-android -relocation-model=pic | FileCheck -check-prefix=X64 %s + +; Use my_emutls_get_address like __emutls_get_address. +@my_emutls_v_xyz = external global i8*, align 4 +declare i8* @my_emutls_get_address(i8*) + +define i32 @my_get_xyz() { +; X32-LABEL: my_get_xyz: +; X32: movl my_emutls_v_xyz@GOT(%ebx), %eax +; X32-NEXT: movl %eax, (%esp) +; X32-NEXT: calll my_emutls_get_address@PLT +; X64-LABEL: my_get_xyz: +; X64: movq my_emutls_v_xyz@GOTPCREL(%rip), %rdi +; X64-NEXT: callq my_emutls_get_address@PLT +; X64-NEXT: movl (%rax), %eax + +entry: + %call = call i8* @my_emutls_get_address(i8* bitcast (i8** @my_emutls_v_xyz to i8*)) + %0 = bitcast i8* %call to i32* + %1 = load i32, i32* %0, align 4 + ret i32 %1 +} + +@i = thread_local(emulated) global i32 15 +@j = internal thread_local(emulated) global i32 42 +@k = internal thread_local(emulated) global i32 0, align 8 + +define i32 @f1() { +entry: + %tmp1 = load i32, i32* @i + ret i32 %tmp1 +} + +; X32-LABEL: f1: +; X32: movl __emutls_v.i@GOT(%ebx), %eax +; X32-NEXT: movl %eax, (%esp) +; X32-NEXT: calll __emutls_get_address@PLT +; X64-LABEL: f1: +; X64: movq __emutls_v.i@GOTPCREL(%rip), %rdi +; X64-NEXT: callq __emutls_get_address@PLT +; X64-NEXT: movl (%rax), %eax + +@i2 = external thread_local global i32 + +define i32* @f2() { +entry: + ret i32* @i +} + +; X32-LABEL: f2: +; X64-LABEL: f2: + + +define i32 @f3() { +entry: + %tmp1 = load i32, i32* @i ; [#uses=1] + ret i32 %tmp1 +} + +; X32-LABEL: f3: +; X64-LABEL: f3: + + +define i32* @f4() nounwind { +entry: + ret i32* @i +} + +; X32-LABEL: f4: +; X64-LABEL: f4: + + +define i32 @f5() nounwind { +entry: + %0 = load i32, i32* @j, align 4 + %1 = load i32, i32* @k, align 4 + %add = add nsw i32 %0, %1 + ret i32 %add +} + +; X32-LABEL: f5: +; X32: movl __emutls_v.j@GOT(%ebx), %eax +; X32-NEXT: movl %eax, (%esp) +; X32-NEXT: calll __emutls_get_address@PLT +; X32-NEXT: movl (%eax), %esi +; X32-NEXT: movl __emutls_v.k@GOT(%ebx), %eax +; X32-NEXT: movl %eax, (%esp) +; X32-NEXT: calll __emutls_get_address@PLT +; X32-NEXT: addl (%eax), %esi +; X32-NEXT: movl %esi, %eax + +; X64-LABEL: f5: +; X64: movq __emutls_v.j@GOTPCREL(%rip), %rdi +; X64-NEXT: callq __emutls_get_address@PLT +; X64-NEXT: movl (%rax), %ebx +; X64-NEXT: movq __emutls_v.k@GOTPCREL(%rip), %rdi +; X64-NEXT: callq __emutls_get_address@PLT +; X64-NEXT: addl (%rax), %ebx +; X64-NEXT: movl %ebx, %eax + +;;;;; 32-bit targets + +; X32: .section .data.rel.local, +; X32-LABEL: __emutls_v.i: +; X32-NEXT: .long 4 +; X32-NEXT: .long 4 +; X32-NEXT: .long 0 +; X32-NEXT: .long __emutls_t.i + +; X32: .section .rodata, +; X32-LABEL: __emutls_t.i: +; X32-NEXT: .long 15 + +; X32: .section .data.rel.local, +; X32-LABEL: __emutls_v.j: +; X32-NEXT: .long 4 +; X32-NEXT: .long 4 +; X32-NEXT: .long 0 +; X32-NEXT: .long __emutls_t.j + +; X32: .section .rodata, +; X32-LABEL: __emutls_t.j: +; X32-NEXT: .long 42 + +; X32: .data +; X32-LABEL: __emutls_v.k: +; X32-NEXT: .long 4 +; X32-NEXT: .long 8 +; X32-NEXT: .long 0 +; X32-NEXT: .long 0 + +; X32-NOT: __emutls_t.k: + +;;;;; 64-bit targets + +; X64: .section .data.rel.local, +; X64-LABEL: __emutls_v.i: +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 0 +; X64-NEXT: .quad __emutls_t.i + +; X64: .section .rodata, +; X64-LABEL: __emutls_t.i: +; X64-NEXT: .long 15 + +; X64: .section .data.rel.local, +; X64-LABEL: __emutls_v.j: +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 0 +; X64-NEXT: .quad __emutls_t.j + +; X64: .section .rodata, +; X64-LABEL: __emutls_t.j: +; X64-NEXT: .long 42 + +; X64: .data +; X64-LABEL: __emutls_v.k: +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 8 +; X64-NEXT: .quad 0 +; X64-NEXT: .quad 0 + +; X64-NOT: __emutls_t.k: Index: test/CodeGen/X86/emutls-pie.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/emutls-pie.ll @@ -0,0 +1,131 @@ +; RUN: llc < %s -march=x86 -mcpu=generic -mtriple=i386-linux-gnu -relocation-model=pic -enable-pie \ +; RUN: | FileCheck -check-prefix=X32 %s +; RUN: llc < %s -march=x86-64 -mcpu=generic -mtriple=x86_64-linux-gnu -relocation-model=pic -enable-pie \ +; RUN: | FileCheck -check-prefix=X64 %s +; RUN: llc < %s -march=x86 -mcpu=generic -mtriple=i386-linux-android -relocation-model=pic -enable-pie \ +; RUN: | FileCheck -check-prefix=X32 %s +; RUN: llc < %s -march=x86-64 -mcpu=generic -mtriple=x86_64-linux-android -relocation-model=pic -enable-pie \ +; RUN: | FileCheck -check-prefix=X64 %s + +; Use my_emutls_get_address like __emutls_get_address. +@my_emutls_v_xyz = external global i8*, align 4 +declare i8* @my_emutls_get_address(i8*) + +define i32 @my_get_xyz() { +; X32-LABEL: my_get_xyz: +; X32: movl my_emutls_v_xyz@GOT(%ebx), %eax +; X32-NEXT: movl %eax, (%esp) +; X32-NEXT: calll my_emutls_get_address@PLT +; X32-NEXT: movl (%eax), %eax +; X32-NEXT: addl $8, %esp +; X32-NEXT: popl %ebx +; X32-NEXT: retl +; X64-LABEL: my_get_xyz: +; X64: movq my_emutls_v_xyz@GOTPCREL(%rip), %rdi +; X64-NEXT: callq my_emutls_get_address@PLT +; X64-NEXT: movl (%rax), %eax +; X64-NEXT: popq %rdx +; X64-NEXT: retq + +entry: + %call = call i8* @my_emutls_get_address(i8* bitcast (i8** @my_emutls_v_xyz to i8*)) + %0 = bitcast i8* %call to i32* + %1 = load i32, i32* %0, align 4 + ret i32 %1 +} + +@i = thread_local(emulated) global i32 15 +@i2 = external thread_local(emulated) global i32 + +define i32 @f1() { +; X32-LABEL: f1: +; X32: movl __emutls_v.i@GOT(%ebx), %eax +; X32-NEXT: movl %eax, (%esp) +; X32-NEXT: calll __emutls_get_address@PLT +; X32-NEXT: movl (%eax), %eax +; X32-NEXT: addl $8, %esp +; X32-NEXT: popl %ebx +; X32-NEXT: retl +; X64-LABEL: f1: +; X64: movq __emutls_v.i@GOTPCREL(%rip), %rdi +; X64-NEXT: callq __emutls_get_address@PLT +; X64-NEXT: movl (%rax), %eax +; X64-NEXT: popq %rdx +; X64-NEXT: retq + +entry: + %tmp1 = load i32, i32* @i + ret i32 %tmp1 +} + +define i32* @f2() { +; X32-LABEL: f2: +; X32: movl __emutls_v.i@GOT(%ebx), %eax +; X32-NEXT: movl %eax, (%esp) +; X32-NEXT: calll __emutls_get_address@PLT +; X64-LABEL: f2: +; X64: movq __emutls_v.i@GOTPCREL(%rip), %rdi +; X64-NEXT: callq __emutls_get_address@PLT + +entry: + ret i32* @i +} + +define i32 @f3() { +; X32-LABEL: f3: +; X32: movl __emutls_v.i2@GOT(%ebx), %eax +; X32-NEXT: movl %eax, (%esp) +; X32-NEXT: calll __emutls_get_address@PLT +; X64-LABEL: f3: +; X64: movq __emutls_v.i2@GOTPCREL(%rip), %rdi +; X64-NEXT: callq __emutls_get_address@PLT + +entry: + %tmp1 = load i32, i32* @i2 + ret i32 %tmp1 +} + +define i32* @f4() { +; X32-LABEL: f4: +; X32: movl __emutls_v.i2@GOT(%ebx), %eax +; X32-NEXT: movl %eax, (%esp) +; X32-NEXT: calll __emutls_get_address@PLT +; X64-LABEL: f4: +; X64: movq __emutls_v.i2@GOTPCREL(%rip), %rdi +; X64-NEXT: callq __emutls_get_address@PLT + +entry: + ret i32* @i2 +} + +;;;;; 32-bit targets + +; X32: .section .data.rel.local, +; X32-LABEL: __emutls_v.i: +; X32-NEXT: .long 4 +; X32-NEXT: .long 4 +; X32-NEXT: .long 0 +; X32-NEXT: .long __emutls_t.i + +; X32: .section .rodata, +; X32-LABEL: __emutls_t.i: +; X32-NEXT: .long 15 + +; X32-NOT: __emutls_v.i2 +; X32-NOT: __emutls_t.i2 + +;;;;; 64-bit targets + +; X64: .section .data.rel.local, +; X64-LABEL: __emutls_v.i: +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 0 +; X64-NEXT: .quad __emutls_t.i + +; X64: .section .rodata, +; X64-LABEL: __emutls_t.i: +; X64-NEXT: .long 15 + +; X64-NOT: __emutls_v.i2 +; X64-NOT: __emutls_t.i2 Index: test/CodeGen/X86/emutls.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/emutls.ll @@ -0,0 +1,347 @@ +; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32 %s +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64 %s +; RUN: llc < %s -march=x86 -mtriple=x86-linux-android | FileCheck -check-prefix=X32 %s +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-android | FileCheck -check-prefix=X64 %s + +; Copied from tls.ll; emulated TLS model is not implemented +; for *-pc-win32 and *-pc-winows targets yet. + +; Use my_emutls_get_address like __emutls_get_address. +@my_emutls_v_xyz = external global i8*, align 4 +declare i8* @my_emutls_get_address(i8*) + +define i32 @my_get_xyz() { +; X32-LABEL: my_get_xyz: +; X32: movl $my_emutls_v_xyz, (%esp) +; X32-NEXT: calll my_emutls_get_address +; X32-NEXT: movl (%eax), %eax +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl +; X64-LABEL: my_get_xyz: +; X64: movl $my_emutls_v_xyz, %edi +; X64-NEXT: callq my_emutls_get_address +; X64-NEXT: movl (%rax), %eax +; X64-NEXT: popq %rdx +; X64-NEXT: retq + +entry: + %call = call i8* @my_emutls_get_address(i8* bitcast (i8** @my_emutls_v_xyz to i8*)) + %0 = bitcast i8* %call to i32* + %1 = load i32, i32* %0, align 4 + ret i32 %1 +} + +@i1 = thread_local(emulated) global i32 15 +@i2 = external thread_local(emulated) global i32 +@i3 = internal thread_local(emulated) global i32 15 +@i4 = hidden thread_local(emulated) global i32 15 +@i5 = external hidden thread_local(emulated) global i32 +@s1 = thread_local(emulated) global i16 15 +@b1 = thread_local(emulated) global i8 0 + +define i32 @f1() { +; X32-LABEL: f1: +; X32: movl $__emutls_v.i1, (%esp) +; X32-NEXT: calll __emutls_get_address +; X32-NEXT: movl (%eax), %eax +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl +; X64-LABEL: f1: +; X64: movl $__emutls_v.i1, %edi +; X64-NEXT: callq __emutls_get_address +; X64-NEXT: movl (%rax), %eax +; X64-NEXT: popq %rdx +; X64-NEXT: retq + +entry: + %tmp1 = load i32, i32* @i1 + ret i32 %tmp1 +} + +define i32* @f2() { +; X32-LABEL: f2: +; X32: movl $__emutls_v.i1, (%esp) +; X32-NEXT: calll __emutls_get_address +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl +; X64-LABEL: f2: +; X64: movl $__emutls_v.i1, %edi +; X64-NEXT: callq __emutls_get_address +; X64-NEXT: popq %rdx +; X64-NEXT: retq + +entry: + ret i32* @i1 +} + +define i32 @f3() nounwind { +; X32-LABEL: f3: +; X32: movl $__emutls_v.i2, (%esp) +; X32-NEXT: calll __emutls_get_address +; X32-NEXT: movl (%eax), %eax +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl + +entry: + %tmp1 = load i32, i32* @i2 + ret i32 %tmp1 +} + +define i32* @f4() { +; X32-LABEL: f4: +; X32: movl $__emutls_v.i2, (%esp) +; X32-NEXT: calll __emutls_get_address +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl + +entry: + ret i32* @i2 +} + +define i32 @f5() nounwind { +; X32-LABEL: f5: +; X32: movl $__emutls_v.i3, (%esp) +; X32-NEXT: calll __emutls_get_address +; X32-NEXT: movl (%eax), %eax +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl + +entry: + %tmp1 = load i32, i32* @i3 + ret i32 %tmp1 +} + +define i32* @f6() { +; X32-LABEL: f6: +; X32: movl $__emutls_v.i3, (%esp) +; X32-NEXT: calll __emutls_get_address +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl + +entry: + ret i32* @i3 +} + +define i32 @f7() { +; X32-LABEL: f7: +; X32: movl $__emutls_v.i4, (%esp) +; X32-NEXT: calll __emutls_get_address +; X32-NEXT: movl (%eax), %eax +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl + +entry: + %tmp1 = load i32, i32* @i4 + ret i32 %tmp1 +} + +define i32* @f8() { +; X32-LABEL: f8: +; X32: movl $__emutls_v.i4, (%esp) +; X32-NEXT: calll __emutls_get_address +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl + +entry: + ret i32* @i4 +} + +define i32 @f9() { +; X32-LABEL: f9: +; X32: movl $__emutls_v.i5, (%esp) +; X32-NEXT: calll __emutls_get_address +; X32-NEXT: movl (%eax), %eax +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl + +entry: + %tmp1 = load i32, i32* @i5 + ret i32 %tmp1 +} + +define i32* @f10() { +; X32-LABEL: f10: +; X32: movl $__emutls_v.i5, (%esp) +; X32-NEXT: calll __emutls_get_address +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl + +entry: + ret i32* @i5 +} + +define i16 @f11() { +; X32-LABEL: f11: +; X32: movl $__emutls_v.s1, (%esp) +; X32-NEXT: calll __emutls_get_address +; X32-NEXT: movzwl (%eax), %eax +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl + +entry: + %tmp1 = load i16, i16* @s1 + ret i16 %tmp1 +} + +define i32 @f12() { +; X32-LABEL: f12: +; X32: movl $__emutls_v.s1, (%esp) +; X32-NEXT: calll __emutls_get_address +; X32-NEXT: movswl (%eax), %eax +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl + +entry: + %tmp1 = load i16, i16* @s1 + %tmp2 = sext i16 %tmp1 to i32 + ret i32 %tmp2 +} + +define i8 @f13() { +; X32-LABEL: f13: +; X32: movl $__emutls_v.b1, (%esp) +; X32-NEXT: calll __emutls_get_address +; X32-NEXT: movb (%eax), %al +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl + +entry: + %tmp1 = load i8, i8* @b1 + ret i8 %tmp1 +} + +define i32 @f14() { +; X32-LABEL: f14: +; X32: movl $__emutls_v.b1, (%esp) +; X32-NEXT: calll __emutls_get_address +; X32-NEXT: movsbl (%eax), %eax +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl + +entry: + %tmp1 = load i8, i8* @b1 + %tmp2 = sext i8 %tmp1 to i32 + ret i32 %tmp2 +} + +;;;;;;;;;;;;;; 32-bit __emutls_v. and __emutls_t. + +; X32 .section .data.rel.local, +; X32-LABEL: __emutls_v.i1: +; X32-NEXT: .long 4 +; X32-NEXT: .long 4 +; X32-NEXT: .long 0 +; X32-NEXT: .long __emutls_t.i1 + +; X32 .section .rodata, +; X32-LABEL: __emutls_t.i1: +; X32-NEXT: .long 15 + +; X32-NOT: __emutls_v.i2 + +; X32 .section .data.rel.local, +; X32-LABEL: __emutls_v.i3: +; X32-NEXT: .long 4 +; X32-NEXT: .long 4 +; X32-NEXT: .long 0 +; X32-NEXT: .long __emutls_t.i3 + +; X32 .section .rodata, +; X32-LABEL: __emutls_t.i3: +; X32-NEXT: .long 15 + +; X32 .section .data.rel.local, +; X32-LABEL: __emutls_v.i4: +; X32-NEXT: .long 4 +; X32-NEXT: .long 4 +; X32-NEXT: .long 0 +; X32-NEXT: .long __emutls_t.i4 + +; X32 .section .rodata, +; X32-LABEL: __emutls_t.i4: +; X32-NEXT: .long 15 + +; X32-NOT: __emutls_v.i5: +; X32 .hidden __emutls_v.i5 +; X32-NOT: __emutls_v.i5: + +; X32 .section .data.rel.local, +; X32-LABEL: __emutls_v.s1: +; X32-NEXT: .long 2 +; X32-NEXT: .long 2 +; X32-NEXT: .long 0 +; X32-NEXT: .long __emutls_t.s1 + +; X32 .section .rodata, +; X32-LABEL: __emutls_t.s1: +; X32-NEXT: .short 15 + +; X32 .section .data.rel.local, +; X32-LABEL: __emutls_v.b1: +; X32-NEXT: .long 1 +; X32-NEXT: .long 1 +; X32-NEXT: .long 0 +; X32-NEXT: .long 0 + +; X32-NOT: __emutls_t.b1 + +;;;;;;;;;;;;;; 64-bit __emutls_v. and __emutls_t. + +; X64 .section .data.rel.local, +; X64-LABEL: __emutls_v.i1: +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 0 +; X64-NEXT: .quad __emutls_t.i1 + +; X64 .section .rodata, +; X64-LABEL: __emutls_t.i1: +; X64-NEXT: .long 15 + +; X64-NOT: __emutls_v.i2 + +; X64 .section .data.rel.local, +; X64-LABEL: __emutls_v.i3: +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 0 +; X64-NEXT: .quad __emutls_t.i3 + +; X64 .section .rodata, +; X64-LABEL: __emutls_t.i3: +; X64-NEXT: .long 15 + +; X64 .section .data.rel.local, +; X64-LABEL: __emutls_v.i4: +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 0 +; X64-NEXT: .quad __emutls_t.i4 + +; X64 .section .rodata, +; X64-LABEL: __emutls_t.i4: +; X64-NEXT: .long 15 + +; X64-NOT: __emutls_v.i5: +; X64 .hidden __emutls_v.i5 +; X64-NOT: __emutls_v.i5: + +; X64 .section .data.rel.local, +; X64-LABEL: __emutls_v.s1: +; X64-NEXT: .quad 2 +; X64-NEXT: .quad 2 +; X64-NEXT: .quad 0 +; X64-NEXT: .quad __emutls_t.s1 + +; X64 .section .rodata, +; X64-LABEL: __emutls_t.s1: +; X64-NEXT: .short 15 + +; X64 .section .data.rel.local, +; X64-LABEL: __emutls_v.b1: +; X64-NEXT: .quad 1 +; X64-NEXT: .quad 1 +; X64-NEXT: .quad 0 +; X64-NEXT: .quad 0 + +; X64-NOT: __emutls_t.b1 Index: test/CodeGen/X86/fast-isel-emutls.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/fast-isel-emutls.ll @@ -0,0 +1,48 @@ +; RUN: llc < %s -march=x86 -relocation-model=pic -mtriple=i686-unknown-linux-gnu -fast-isel | FileCheck %s +; PR3654 + +@v = thread_local(emulated) global i32 0 +define i32 @f() nounwind { +entry: + %t = load i32, i32* @v + %s = add i32 %t, 1 + ret i32 %s +} + +; CHECK-LABEL: f: +; CHECK: movl __emutls_v.v@GOT(%ebx), %eax +; CHECK-NEXT: movl %eax, (%esp) +; CHECK-NEXT: calll __emutls_get_address@PLT +; CHECK-NEXT: movl (%eax), %eax + +@alias = internal alias i32* @v +define i32 @f_alias() nounwind { +entry: + %t = load i32, i32* @v + %s = add i32 %t, 1 + ret i32 %s +} + +; CHECK-LABEL: f_alias: +; CHECK: movl __emutls_v.v@GOT(%ebx), %eax +; CHECK-NEXT: movl %eax, (%esp) +; CHECK-NEXT: calll __emutls_get_address@PLT +; CHECK-NEXT: movl (%eax), %eax + +; Use my_emutls_get_address like __emutls_get_address. +@my_emutls_v_xyz = external global i8*, align 4 +declare i8* @my_emutls_get_address(i8*) + +define i32 @my_get_xyz() { +entry: + %call = call i8* @my_emutls_get_address(i8* bitcast (i8** @my_emutls_v_xyz to i8*)) + %0 = bitcast i8* %call to i32* + %1 = load i32, i32* %0, align 4 + ret i32 %1 +} + +; CHECK-LABEL: my_get_xyz: +; CHECK: movl my_emutls_v_xyz@GOT(%ebx), %eax +; CHECK-NEXT: movl %eax, (%esp) +; CHECK-NEXT: calll my_emutls_get_address@PLT +; CHECK-NEXT: movl (%eax), %eax Index: test/CodeGen/X86/tls-android-negative.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/tls-android-negative.ll @@ -0,0 +1,65 @@ +; RUN: llc < %s -march=x86 -mtriple=x86_64-linux-android -relocation-model=pic | FileCheck %s +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-android -relocation-model=pic | FileCheck %s + +; Make sure that some symboles are not emitted in emulated TLS model. + +@external_x = external thread_local(emulated) global i32 +@external_y = thread_local(emulated) global i32 7 +@internal_y = internal thread_local(emulated) global i32 9 +@internal_y0 = internal thread_local(emulated) global i32 0 + +define i32* @get_external_x() { +entry: + ret i32* @external_x +} + +define i32* @get_external_y() { +entry: + ret i32* @external_y +} + +define i32* @get_internal_y() { +entry: + ret i32* @internal_y +} + +define i32* @get_internal_y0() { +entry: + ret i32* @internal_y0 +} + +; no direct access to emulated TLS variables. +; no definition of emulated TLS variables. +; no initializer for external TLS variables, __emutls_t.external_x +; no initializer for 0-initialized TLS variables, __emutls_t.internal_y0 +; not global linkage for __emutls_t.external_y + +; CHECK-NOT: external_x@TLS +; CHECK-NOT: external_y@TLS +; CHECK-NOT: internal_y@TLS +; CHECK-NOT: .size external_x +; CHECK-NOT: .size external_y +; CHECK-NOT: .size internal_y +; CHECK-NOT: .size internal_y0 +; CHECK-NOT: __emutls_v.external_x: +; CHECK-NOT: __emutls_t.external_x: +; CHECK-NOT: __emutls_t.internal_y0: +; CHECK-NOT: global __emutls_t.external_y +; CHECK-NOT: global __emutls_v.internal_y +; CHECK-NOT: global __emutls_v.internal_y0 + +; CHECK: __emutls_t.external_y + +; CHECK-NOT: external_x@TLS +; CHECK-NOT: external_y@TLS +; CHECK-NOT: internal_y@TLS +; CHECK-NOT: .size external_x +; CHECK-NOT: .size external_y +; CHECK-NOT: .size internal_y +; CHECK-NOT: .size internal_y0 +; CHECK-NOT: __emutls_v.external_x: +; CHECK-NOT: __emutls_t.external_x: +; CHECK-NOT: __emutls_t.internal_y0: +; CHECK-NOT: global __emutls_t.external_y +; CHECK-NOT: global __emutls_v.internal_y +; CHECK-NOT: global __emutls_v.internal_y0 Index: test/CodeGen/X86/tls-android.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/tls-android.ll @@ -0,0 +1,89 @@ +; RUN: llc < %s -march=x86 -mtriple=x86_64-linux-android -relocation-model=pic | FileCheck %s +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-android -relocation-model=pic | FileCheck -check-prefix=X64 %s + +; Make sure that TLS symboles are emitted in expected order. + +@external_x = external thread_local(emulated) global i32 +@external_y = thread_local(emulated) global i32 7 +@internal_y = internal thread_local(emulated) global i32 9 + +define i32* @get_external_x() { +entry: + ret i32* @external_x +} + +define i32* @get_external_y() { +entry: + ret i32* @external_y +} + +define i32* @get_internal_y() { +entry: + ret i32* @internal_y +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 32-bit mode +; CHECK-LABEL: get_external_x: +; CHECK: __emutls_v.external_x +; CHECK: __emutls_get_address + +; CHECK-LABEL: get_external_y: +; CHECK: __emutls_v.external_y +; CHECK: __emutls_get_address + +; CHECK-LABEL: get_internal_y: +; CHECK: __emutls_v.internal_y +; CHECK: __emutls_get_address + +; CHECK-NOT: __emutls_v.external_x: + +; CHECK: .align 4 +; CHECK-LABEL: __emutls_v.external_y: +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long __emutls_t.external_y +; CHECK-LABEL: __emutls_t.external_y: +; CHECK-NEXT: .long 7 + +; CHECK: .align 4 +; CHECK-LABEL: __emutls_v.internal_y: +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long __emutls_t.internal_y +; CHECK-LABEL: __emutls_t.internal_y: +; CHECK-NEXT: .long 9 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 64-bit mode +; X64-LABEL: get_external_x: +; X64: __emutls_v.external_x +; X64: __emutls_get_address + +; X64-LABEL: get_external_y: +; X64: __emutls_v.external_y +; X64: __emutls_get_address + +; X64-LABEL: get_internal_y: +; X64: __emutls_v.internal_y +; X64: __emutls_get_address + +; X64-NOT: __emutls_v.external_x: + +; X64: .align 8 +; X64-LABEL: __emutls_v.external_y: +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 0 +; X64-NEXT: .quad __emutls_t.external_y +; X64-LABEL: __emutls_t.external_y: +; X64-NEXT: .long 7 + +; X64: .align 8 +; X64-LABEL: __emutls_v.internal_y: +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 0 +; X64-NEXT: .quad __emutls_t.internal_y +; X64-LABEL: __emutls_t.internal_y: +; X64-NEXT: .long 9 Index: test/CodeGen/X86/tls-models.ll =================================================================== --- test/CodeGen/X86/tls-models.ll +++ test/CodeGen/X86/tls-models.ll @@ -18,6 +18,8 @@ @external_le = external thread_local(localexec) global i32 @internal_le = internal thread_local(localexec) global i32 42 +; See test cases for emulated model in emutls.ll, emutls-pic.ll and emutls-pie.ll. + ; ----- no model specified ----- define i32* @f1() { Index: test/DebugInfo/ARM/tls.ll =================================================================== --- test/DebugInfo/ARM/tls.ll +++ test/DebugInfo/ARM/tls.ll @@ -1,4 +1,5 @@ ; RUN: llc -O0 -filetype=asm -mtriple=armv7-linux-gnuehabi < %s | FileCheck %s +; TODO: add test cases with thread_local(emulated), debug info. ; ; Generated with clang with source ; __thread int x; Index: test/DebugInfo/X86/tls.ll =================================================================== --- test/DebugInfo/X86/tls.ll +++ test/DebugInfo/X86/tls.ll @@ -16,6 +16,8 @@ ; RUN: llc %s -o - -filetype=asm -O0 -mtriple=x86_64-unknown-freebsd \ ; RUN: | FileCheck --check-prefix=SINGLE --check-prefix=SINGLE-64 --check-prefix=GNUOP %s +; TODO: add test cases with thread_local(emulated), debug info. + ; FIXME: add relocation and DWARF expression support to llvm-dwarfdump & use ; that here instead of raw assembly printing Index: test/Transforms/GlobalOpt/emutls.ll =================================================================== --- /dev/null +++ test/Transforms/GlobalOpt/emutls.ll @@ -0,0 +1,55 @@ +; RUN: opt < %s -globalopt -S | FileCheck %s + +; Identical to tls.ll, but use emulated TLS model. + +declare void @wait() +declare void @signal() +declare void @start_thread(void ()*) + +@x = internal thread_local(emulated) global [100 x i32] zeroinitializer, align 16 +@ip = internal global i32* null, align 8 + +; PR14309: GlobalOpt would think that the value of @ip is always the address of +; x[1]. However, that address is different for different threads so @ip cannot +; be replaced with a constant. + +define i32 @f() { +entry: + ; Set @ip to point to x[1] for thread 1. + store i32* getelementptr inbounds ([100 x i32], [100 x i32]* @x, i64 0, i64 1), i32** @ip, align 8 + + ; Run g on a new thread. + tail call void @start_thread(void ()* @g) nounwind + tail call void @wait() nounwind + + ; Reset x[1] for thread 1. + store i32 0, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @x, i64 0, i64 1), align 4 + + ; Read the value of @ip, which now points at x[1] for thread 2. + %0 = load i32*, i32** @ip, align 8 + + %1 = load i32, i32* %0, align 4 + ret i32 %1 + +; CHECK-LABEL: @f( +; Make sure that the load from @ip hasn't been removed. +; CHECK: load i32*, i32** @ip +; CHECK: ret +} + +define internal void @g() nounwind uwtable { +entry: + ; Set @ip to point to x[1] for thread 2. + store i32* getelementptr inbounds ([100 x i32], [100 x i32]* @x, i64 0, i64 1), i32** @ip, align 8 + + ; Store 50 in x[1] for thread 2. + store i32 50, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @x, i64 0, i64 1), align 4 + + tail call void @signal() nounwind + ret void + +; CHECK-LABEL: @g( +; Make sure that the store to @ip hasn't been removed. +; CHECK: store {{.*}} @ip +; CHECK: ret +} Index: utils/vim/syntax/llvm.vim =================================================================== --- utils/vim/syntax/llvm.vim +++ utils/vim/syntax/llvm.vim @@ -41,7 +41,7 @@ syn keyword llvmKeyword arm_aapcscc arm_apcscc asm atomic available_externally syn keyword llvmKeyword blockaddress byval c catch cc ccc cleanup coldcc common syn keyword llvmKeyword constant datalayout declare default define deplibs -syn keyword llvmKeyword distinct dllexport dllimport except extern_weak external +syn keyword llvmKeyword distinct dllexport dllimport emulated except extern_weak external syn keyword llvmKeyword externally_initialized fastcc filter gc global hidden syn keyword llvmKeyword initialexec inlinehint inreg intel_ocl_bicc inteldialect syn keyword llvmKeyword internal linkonce linkonce_odr localdynamic localexec