Index: llvm/trunk/docs/LangRef.rst =================================================================== --- llvm/trunk/docs/LangRef.rst +++ llvm/trunk/docs/LangRef.rst @@ -494,6 +494,9 @@ A model can also be specified in a alias, but then it only governs how the alias is accessed. It will not have any effect in the aliasee. +For platforms without linker support of ELF TLS model, the -femulated-tls +flag can be used to generate GCC compatible emulated TLS code. + .. _namedtypes: Structure Types Index: llvm/trunk/include/llvm/CodeGen/AsmPrinter.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/AsmPrinter.h +++ llvm/trunk/include/llvm/CodeGen/AsmPrinter.h @@ -238,6 +238,11 @@ /// void EmitJumpTableInfo(); + /// Emit the control variable for an emulated TLS variable. + virtual void EmitEmulatedTLSControlVariable(const GlobalVariable *GV, + MCSymbol *EmittedSym, + bool AllZeroInitValue); + /// Emit the specified global variable to the .s file. virtual void EmitGlobalVariable(const GlobalVariable *GV); Index: llvm/trunk/include/llvm/CodeGen/CommandFlags.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/CommandFlags.h +++ llvm/trunk/include/llvm/CodeGen/CommandFlags.h @@ -219,6 +219,10 @@ cl::desc("Emit functions into separate sections"), cl::init(false)); +cl::opt EmulatedTLS("emulated-tls", + cl::desc("Use emulated TLS model"), + cl::init(false)); + cl::opt UniqueSectionNames("unique-section-names", cl::desc("Give unique names to every section"), cl::init(true)); @@ -260,6 +264,7 @@ Options.DataSections = DataSections; Options.FunctionSections = FunctionSections; Options.UniqueSectionNames = UniqueSectionNames; + Options.EmulatedTLS = EmulatedTLS; Options.MCOptions = InitMCTargetOptionsFromFlags(); Options.JTType = JTableType; Index: llvm/trunk/include/llvm/MC/MCObjectFileInfo.h =================================================================== --- llvm/trunk/include/llvm/MC/MCObjectFileInfo.h +++ llvm/trunk/include/llvm/MC/MCObjectFileInfo.h @@ -216,6 +216,7 @@ MCSection *getTextSection() const { return TextSection; } MCSection *getDataSection() const { return DataSection; } MCSection *getBSSSection() const { return BSSSection; } + MCSection *getReadOnlySection() const { return ReadOnlySection; } MCSection *getLSDASection() const { return LSDASection; } MCSection *getCompactUnwindSection() const { return CompactUnwindSection; } MCSection *getDwarfAbbrevSection() const { return DwarfAbbrevSection; } Index: llvm/trunk/include/llvm/Target/TargetLowering.h =================================================================== --- llvm/trunk/include/llvm/Target/TargetLowering.h +++ llvm/trunk/include/llvm/Target/TargetLowering.h @@ -2821,6 +2821,10 @@ virtual bool useLoadStackGuardNode() const { return false; } + + /// Lower TLS global address SDNode for target independent emulated TLS model. + virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, + SelectionDAG &DAG) const; }; /// Given an LLVM IR type and return type attributes, compute the return value Index: llvm/trunk/include/llvm/Target/TargetOptions.h =================================================================== --- llvm/trunk/include/llvm/Target/TargetOptions.h +++ llvm/trunk/include/llvm/Target/TargetOptions.h @@ -72,7 +72,7 @@ UseInitArray(false), DisableIntegratedAS(false), CompressDebugSections(false), FunctionSections(false), DataSections(false), UniqueSectionNames(true), TrapUnreachable(false), - FloatABIType(FloatABI::Default), + EmulatedTLS(false), FloatABIType(FloatABI::Default), AllowFPOpFusion(FPOpFusion::Standard), Reciprocals(TargetRecip()), JTType(JumpTable::Single), ThreadModel(ThreadModel::POSIX) {} @@ -172,6 +172,10 @@ /// Emit target-specific trap instruction for 'unreachable' IR instructions. unsigned TrapUnreachable : 1; + /// EmulatedTLS - This flag enables emulated TLS model, using emutls + /// function in the runtime library.. + unsigned EmulatedTLS : 1; + /// FloatABIType - This setting is set by -float-abi=xxx option is specfied /// on the command line. This setting may either be Default, Soft, or Hard. /// Default selects the target's default behavior. Soft selects the ABI for @@ -231,6 +235,7 @@ ARE_EQUAL(PositionIndependentExecutable) && ARE_EQUAL(UseInitArray) && ARE_EQUAL(TrapUnreachable) && + ARE_EQUAL(EmulatedTLS) && ARE_EQUAL(FloatABIType) && ARE_EQUAL(AllowFPOpFusion) && ARE_EQUAL(Reciprocals) && Index: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp =================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -343,8 +343,58 @@ return TM.getSymbol(GV, *Mang); } +static MCSymbol *getOrCreateEmuTLSControlSym(MCSymbol *GVSym, MCContext &C) { + return C.getOrCreateSymbol(Twine("__emutls_v.") + GVSym->getName()); +} + +static MCSymbol *getOrCreateEmuTLSInitSym(MCSymbol *GVSym, MCContext &C) { + return C.getOrCreateSymbol(Twine("__emutls_t.") + GVSym->getName()); +} + +/// EmitEmulatedTLSControlVariable - Emit the control variable for an emulated TLS variable. +void AsmPrinter::EmitEmulatedTLSControlVariable(const GlobalVariable *GV, + MCSymbol *EmittedSym, + bool AllZeroInitValue) { + // If there is init value, use .data.rel.local section; + // otherwise use the .data section. + MCSection *TLSVarSection = const_cast( + (GV->hasInitializer() && !AllZeroInitValue) + ? getObjFileLowering().getDataRelLocalSection() + : getObjFileLowering().getDataSection()); + OutStreamer->SwitchSection(TLSVarSection); + MCSymbol *GVSym = getSymbol(GV); + EmitLinkage(GV, EmittedSym); // same linkage as GV + const DataLayout &DL = GV->getParent()->getDataLayout(); + uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType()); + unsigned AlignLog = getGVAlignmentLog2(GV, DL); + unsigned WordSize = DL.getPointerSize(); + unsigned Alignment = DL.getPointerABIAlignment(); + EmitAlignment(Log2_32(Alignment)); + OutStreamer->EmitLabel(EmittedSym); + OutStreamer->EmitIntValue(Size, WordSize); + OutStreamer->EmitIntValue((1 << AlignLog), WordSize); + OutStreamer->EmitIntValue(0, WordSize); + if (GV->hasInitializer() && !AllZeroInitValue) { + OutStreamer->EmitSymbolValue( + getOrCreateEmuTLSInitSym(GVSym, OutContext), WordSize); + } else + OutStreamer->EmitIntValue(0, WordSize); + if (MAI->hasDotTypeDotSizeDirective()) + OutStreamer->emitELFSize(cast(EmittedSym), + MCConstantExpr::create(4 * WordSize, OutContext)); + OutStreamer->AddBlankLine(); // End of the __emutls_v.* variable. +} + /// EmitGlobalVariable - Emit the specified global variable to the .s file. void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { + bool IsEmuTLSVar = + GV->getThreadLocalMode() != llvm::GlobalVariable::NotThreadLocal && + TM.Options.EmulatedTLS; + assert((!IsEmuTLSVar || getObjFileLowering().getDataRelLocalSection()) && + "Need relocatable local section for emulated TLS variables"); + assert(!(IsEmuTLSVar && GV->hasCommonLinkage()) && + "No emulated TLS variables in the common section"); + if (GV->hasInitializer()) { // Check to see if this is a special global used by LLVM, if so, emit it. if (EmitSpecialLLVMGlobal(GV)) @@ -355,7 +405,9 @@ if (GlobalGOTEquivs.count(getSymbol(GV))) return; - if (isVerbose()) { + if (isVerbose() && !IsEmuTLSVar) { + // When printing the control variable __emutls_v.*, + // we don't need to print the original TLS variable name. GV->printAsOperand(OutStreamer->GetCommentOS(), /*PrintType=*/false, GV->getParent()); OutStreamer->GetCommentOS() << '\n'; @@ -363,7 +415,12 @@ } MCSymbol *GVSym = getSymbol(GV); - EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration()); + MCSymbol *EmittedSym = IsEmuTLSVar ? + getOrCreateEmuTLSControlSym(GVSym, OutContext) : GVSym; + // getOrCreateEmuTLSControlSym only creates the symbol with name and default attributes. + // GV's or GVSym's attributes will be used for the EmittedSym. + + EmitVisibility(EmittedSym, GV->getVisibility(), !GV->isDeclaration()); if (!GV->hasInitializer()) // External globals require no extra code. return; @@ -374,7 +431,7 @@ "' is already defined"); if (MAI->hasDotTypeDotSizeDirective()) - OutStreamer->EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject); + OutStreamer->EmitSymbolAttribute(EmittedSym, MCSA_ELF_TypeObject); SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); @@ -386,6 +443,18 @@ // sections and expected to be contiguous (e.g. ObjC metadata). unsigned AlignLog = getGVAlignmentLog2(GV, DL); + bool AllZeroInitValue = false; + const Constant *InitValue = GV->getInitializer(); + if (isa(InitValue)) + AllZeroInitValue = true; + else { + const ConstantInt *InitIntValue = dyn_cast(InitValue); + if (InitIntValue && InitIntValue->isZero()) + AllZeroInitValue = true; + } + if (IsEmuTLSVar) + EmitEmulatedTLSControlVariable(GV, EmittedSym, AllZeroInitValue); + for (const HandlerInfo &HI : Handlers) { NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled); HI.Handler->setSymbolSize(GVSym, Size); @@ -393,6 +462,8 @@ // Handle common and BSS local symbols (.lcomm). if (GVKind.isCommon() || GVKind.isBSSLocal()) { + assert(!(IsEmuTLSVar && GVKind.isCommon()) && + "No emulated TLS variables in the common section"); if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. unsigned Align = 1 << AlignLog; @@ -437,12 +508,21 @@ return; } - MCSection *TheSection = + if (IsEmuTLSVar && AllZeroInitValue) + return; // No need of initialization values. + + MCSymbol *EmittedInitSym = IsEmuTLSVar ? + getOrCreateEmuTLSInitSym(GVSym, OutContext) : GVSym; + // getOrCreateEmuTLSInitSym only creates the symbol with name and default attributes. + // GV's or GVSym's attributes will be used for the EmittedInitSym. + + MCSection *TheSection = IsEmuTLSVar ? + getObjFileLowering().getReadOnlySection() : getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM); // Handle the zerofill directive on darwin, which is a special form of BSS // emission. - if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective()) { + if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective() && !IsEmuTLSVar) { if (Size == 0) Size = 1; // zerofill of 0 bytes is undefined. // .globl _foo @@ -462,7 +542,7 @@ // TLOF class. This will also make it more obvious that stuff like // MCStreamer::EmitTBSSSymbol is macho specific and only called from macho // specific code. - if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) { + if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective() && !IsEmuTLSVar) { // Emit the .tbss symbol MCSymbol *MangSym = OutContext.getOrCreateSymbol(GVSym->getName() + Twine("$tlv$init")); @@ -506,16 +586,18 @@ OutStreamer->SwitchSection(TheSection); - EmitLinkage(GV, GVSym); + // emutls_t.* symbols are only used in the current compilation unit. + if (!IsEmuTLSVar) + EmitLinkage(GV, EmittedInitSym); EmitAlignment(AlignLog, GV); - OutStreamer->EmitLabel(GVSym); + OutStreamer->EmitLabel(EmittedInitSym); EmitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer()); if (MAI->hasDotTypeDotSizeDirective()) // .size foo, 42 - OutStreamer->emitELFSize(cast(GVSym), + OutStreamer->emitELFSize(cast(EmittedInitSym), MCConstantExpr::create(Size, OutContext)); OutStreamer->AddBlankLine(); Index: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp =================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -151,28 +151,32 @@ DIELoc *Loc = new (DIEValueAllocator) DIELoc; const MCSymbol *Sym = Asm->getSymbol(Global); if (Global->isThreadLocal()) { - // FIXME: Make this work with -gsplit-dwarf. - unsigned PointerSize = Asm->getDataLayout().getPointerSize(); - assert((PointerSize == 4 || PointerSize == 8) && - "Add support for other sizes if necessary"); - // Based on GCC's support for TLS: - if (!DD->useSplitDwarf()) { - // 1) Start with a constNu of the appropriate pointer size - addUInt(*Loc, dwarf::DW_FORM_data1, - PointerSize == 4 ? dwarf::DW_OP_const4u : dwarf::DW_OP_const8u); - // 2) containing the (relocated) offset of the TLS variable - // within the module's TLS block. - addExpr(*Loc, dwarf::DW_FORM_udata, - Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym)); + if (Asm->TM.Options.EmulatedTLS) { + // TODO: add debug info for emulated thread local mode. } else { - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); - addUInt(*Loc, dwarf::DW_FORM_udata, - DD->getAddressPool().getIndex(Sym, /* TLS */ true)); + // FIXME: Make this work with -gsplit-dwarf. + unsigned PointerSize = Asm->getDataLayout().getPointerSize(); + assert((PointerSize == 4 || PointerSize == 8) && + "Add support for other sizes if necessary"); + // Based on GCC's support for TLS: + if (!DD->useSplitDwarf()) { + // 1) Start with a constNu of the appropriate pointer size + addUInt(*Loc, dwarf::DW_FORM_data1, + PointerSize == 4 ? dwarf::DW_OP_const4u : dwarf::DW_OP_const8u); + // 2) containing the (relocated) offset of the TLS variable + // within the module's TLS block. + addExpr(*Loc, dwarf::DW_FORM_udata, + Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym)); + } else { + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); + addUInt(*Loc, dwarf::DW_FORM_udata, + DD->getAddressPool().getIndex(Sym, /* TLS */ true)); + } + // 3) followed by an OP to make the debugger do a TLS lookup. + addUInt(*Loc, dwarf::DW_FORM_data1, + DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address + : dwarf::DW_OP_form_tls_address); } - // 3) followed by an OP to make the debugger do a TLS lookup. - addUInt(*Loc, dwarf::DW_FORM_data1, - DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address - : dwarf::DW_OP_form_tls_address); } else { DD->addArangeLabel(SymbolCU(this, Sym)); addOpAddress(*Loc, Sym); Index: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3044,3 +3044,46 @@ DAG.getConstant(0, dl, NVT), Ret, ISD::SETLT); return true; } + +//===----------------------------------------------------------------------===// +// Implementation of Emulated TLS Model +//===----------------------------------------------------------------------===// + +SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, + SelectionDAG &DAG) const { + // Access to address of TLS varialbe xyz is lowered to a function call: + // __emutls_get_address( address of global variable named "__emutls_v.xyz" ) + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext()); + SDLoc dl(GA); + + ArgListTy Args; + ArgListEntry Entry; + std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str(); + Module *VariableModule = const_cast(GA->getGlobal()->getParent()); + StringRef EmuTlsVarName(NameString); + GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName); + if (!EmuTlsVar) + EmuTlsVar = dyn_cast_or_null( + VariableModule->getOrInsertGlobal(EmuTlsVarName, VoidPtrType)); + Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT); + Entry.Ty = VoidPtrType; + Args.push_back(Entry); + + SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()); + CLI.setCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args), 0); + std::pair CallResult = LowerCallTo(CLI); + + // TLSADDR will be codegen'ed as call. Inform MFI that function has calls. + // At last for X86 targets, maybe good for other targets too? + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setAdjustsStack(true); // Is this only for X86 target? + MFI->setHasCalls(true); + + assert((GA->getOffset() == 0) && + "Emulated TLS must have zero offset in GlobalAddressSDNode"); + return CallResult.first; +} Index: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3404,6 +3404,10 @@ const GlobalAddressSDNode *GA = cast(Op); TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal()); + + if (DAG.getTarget().Options.EmulatedTLS) + return LowerToTLSEmulatedModel(GA, DAG); + if (!EnableAArch64ELFLocalDynamicTLSGeneration) { if (Model == TLSModel::LocalDynamic) Model = TLSModel::GeneralDynamic; Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp @@ -2583,6 +2583,8 @@ assert(Subtarget->isTargetELF() && "TLS not implemented for non-ELF targets"); GlobalAddressSDNode *GA = cast(Op); + if (DAG.getTarget().Options.EmulatedTLS) + return LowerToTLSEmulatedModel(GA, DAG); TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal()); Index: llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp +++ llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp @@ -1723,6 +1723,9 @@ // Local Exec TLS Model. GlobalAddressSDNode *GA = cast(Op); + if (DAG.getTarget().Options.EmulatedTLS) + return LowerToTLSEmulatedModel(GA, DAG); + SDLoc DL(GA); const GlobalValue *GV = GA->getGlobal(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp @@ -2085,6 +2085,9 @@ // large models could be added if users need it, at the cost of // additional complexity. GlobalAddressSDNode *GA = cast(Op); + if (DAG.getTarget().Options.EmulatedTLS) + return LowerToTLSEmulatedModel(GA, DAG); + SDLoc dl(GA); const GlobalValue *GV = GA->getGlobal(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); Index: llvm/trunk/lib/Target/Sparc/SparcISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/Sparc/SparcISelLowering.cpp +++ llvm/trunk/lib/Target/Sparc/SparcISelLowering.cpp @@ -1872,6 +1872,9 @@ SelectionDAG &DAG) const { GlobalAddressSDNode *GA = cast(Op); + if (DAG.getTarget().Options.EmulatedTLS) + return LowerToTLSEmulatedModel(GA, DAG); + SDLoc DL(GA); const GlobalValue *GV = GA->getGlobal(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); Index: llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp +++ llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -2485,6 +2485,8 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, SelectionDAG &DAG) const { + if (DAG.getTarget().Options.EmulatedTLS) + return LowerToTLSEmulatedModel(Node, DAG); SDLoc DL(Node); const GlobalValue *GV = Node->getGlobal(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -11645,6 +11645,8 @@ auto PtrVT = getPointerTy(DAG.getDataLayout()); if (Subtarget->isTargetELF()) { + if (DAG.getTarget().Options.EmulatedTLS) + return LowerToTLSEmulatedModel(GA, DAG); TLSModel::Model model = DAG.getTarget().getTLSModel(GV); switch (model) { case TLSModel::GeneralDynamic: Index: llvm/trunk/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll +++ llvm/trunk/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll @@ -1,4 +1,7 @@ -; RUN: llc -O0 -mtriple=arm64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -O0 -mtriple=arm64-none-linux-gnu -relocation-model=pic \ +; RUN: -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK -check-prefix=NOEMU %s +; RUN: llc -emulated-tls -O0 -mtriple=arm64-none-linux-gnu -relocation-model=pic \ +; RUN: -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK -check-prefix=EMU %s ; If the .tlsdesccall and blr parts are emitted completely separately (even with ; glue) then LLVM will separate them quite happily (with a spill at O0, hence @@ -13,6 +16,40 @@ %val = load i32, i32* @general_dynamic_var ret i32 %val -; CHECK: .tlsdesccall general_dynamic_var -; CHECK-NEXT: blr {{x[0-9]+}} +; NOEMU: .tlsdesccall general_dynamic_var +; NOEMU-NEXT: blr {{x[0-9]+}} +; NOEMU-NOT: __emutls_v.general_dynamic_var: + +; EMU: adrp{{.+}}__emutls_v.general_dynamic_var +; EMU: bl __emutls_get_address + +; EMU-NOT: __emutls_v.general_dynamic_var +; EMU-NOT: __emutls_t.general_dynamic_var +} + +@emulated_init_var = thread_local global i32 37, align 8 + +define i32 @test_emulated_init() { +; COMMON-LABEL: test_emulated_init: + + %val = load i32, i32* @emulated_init_var + ret i32 %val + +; EMU: adrp{{.+}}__emutls_v.emulated_init_var +; EMU: bl __emutls_get_address + +; EMU-NOT: __emutls_v.general_dynamic_var: + +; EMU: .align 3 +; EMU-LABEL: __emutls_v.emulated_init_var: +; EMU-NEXT: .xword 4 +; EMU-NEXT: .xword 8 +; EMU-NEXT: .xword 0 +; EMU-NEXT: .xword __emutls_t.emulated_init_var + +; EMU-LABEL: __emutls_t.emulated_init_var: +; EMU-NEXT: .word 37 } + +; CHECK-NOT: __emutls_v.general_dynamic_var: +; EMU-NOT: __emutls_t.general_dynamic_var Index: llvm/trunk/test/CodeGen/AArch64/emutls.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/emutls.ll +++ llvm/trunk/test/CodeGen/AArch64/emutls.ll @@ -0,0 +1,368 @@ +; RUN: llc -emulated-tls -mtriple=arm-linux-android \ +; RUN: -relocation-model=pic < %s | FileCheck -check-prefix=ARM32 %s +; RUN: llc -emulated-tls -mtriple=aarch64-linux-android \ +; RUN: -relocation-model=pic < %s | FileCheck -check-prefix=ARM64 %s + +; Copied from X86/emutls.ll + +; Use my_emutls_get_address like __emutls_get_address. +@my_emutls_v_xyz = external global i8*, align 4 +declare i8* @my_emutls_get_address(i8*) + +define i32 @my_get_xyz() { +; ARM32-LABEL: my_get_xyz: +; ARM32: ldr r0, +; ARM32-NEXT: ldr r1, +; ARM32: add r0, pc, r0 +; ARM32-NEXT: ldr r0, [r1, r0] +; ARM32-NEXT: bl my_emutls_get_address(PLT) +; ARM32-NEXT: ldr r0, [r0] +; ARM64-LABEL: my_get_xyz: +; ARM64: adrp x0, :got:my_emutls_v_xyz +; ARM64-NEXT: ldr x0, [x0, :got_lo12:my_emutls_v_xyz] +; ARM64-NEXT: bl my_emutls_get_address +; ARM64-NEXT: ldr w0, [x0] +; ARM64-NEXT: ldp x29, x30, [sp] + +entry: + %call = call i8* @my_emutls_get_address(i8* bitcast (i8** @my_emutls_v_xyz to i8*)) + %0 = bitcast i8* %call to i32* + %1 = load i32, i32* %0, align 4 + ret i32 %1 +} + +@i1 = thread_local global i32 15 +@i2 = external thread_local global i32 +@i3 = internal thread_local global i32 15 +@i4 = hidden thread_local global i32 15 +@i5 = external hidden thread_local global i32 +@s1 = thread_local global i16 15 +@b1 = thread_local global i8 0 + +define i32 @f1() { +; ARM32-LABEL: f1: +; ARM32: ldr r0, +; ARM32-NEXT: ldr r1, +; ARM32: add r0, pc, r0 +; ARM32-NEXT: ldr r0, [r1, r0] +; ARM32-NEXT: bl __emutls_get_address(PLT) +; ARM32-NEXT: ldr r0, [r0] +; ARM64-LABEL: f1: +; ARM64: adrp x0, :got:__emutls_v.i1 +; ARM64-NEXT: ldr x0, [x0, :got_lo12:__emutls_v.i1] +; ARM64-NEXT: bl __emutls_get_address +; ARM64-NEXT: ldr w0, [x0] +; ARM64-NEXT: ldp x29, x30, [sp] + +entry: + %tmp1 = load i32, i32* @i1 + ret i32 %tmp1 +} + +define i32* @f2() { +; ARM32-LABEL: f2: +; ARM32: ldr r0, +; ARM32-NEXT: ldr r1, +; ARM32: add r0, pc, r0 +; ARM32-NEXT: ldr r0, [r1, r0] +; ARM32-NEXT: bl __emutls_get_address(PLT) +; ARM32-NEXT: pop +; ARM64-LABEL: f2: +; ARM64: adrp x0, :got:__emutls_v.i1 +; ARM64-NEXT: ldr x0, [x0, :got_lo12:__emutls_v.i1] +; ARM64-NEXT: bl __emutls_get_address +; ARM64-NEXT: ldp x29, x30, [sp] + +entry: + ret i32* @i1 +} + +define i32 @f3() nounwind { +; ARM32-LABEL: f3: +; ARM32: ldr r0, +; ARM32-NEXT: ldr r1, +; ARM32: add r0, pc, r0 +; ARM32-NEXT: ldr r0, [r1, r0] +; ARM32-NEXT: bl __emutls_get_address(PLT) +; ARM32-NEXT: ldr r0, [r0] + +entry: + %tmp1 = load i32, i32* @i2 + ret i32 %tmp1 +} + +define i32* @f4() { +; ARM32-LABEL: f4: +; ARM32: ldr r0, +; ARM32-NEXT: ldr r1, +; ARM32: add r0, pc, r0 +; ARM32-NEXT: ldr r0, [r1, r0] +; ARM32-NEXT: bl __emutls_get_address(PLT) +; ARM32-NEXT: pop + +entry: + ret i32* @i2 +} + +define i32 @f5() nounwind { +; ARM32-LABEL: f5: +; ARM32: ldr r0, +; ARM32-NEXT: ldr r1, +; ARM32: add r0, pc, r0 +; ARM32-NEXT: ldr r0, [r1, r0] +; ARM32-NEXT: bl __emutls_get_address(PLT) +; ARM32-NEXT: ldr r0, [r0] + +entry: + %tmp1 = load i32, i32* @i3 + ret i32 %tmp1 +} + +define i32* @f6() { +; ARM32-LABEL: f6: +; ARM32: ldr r0, +; ARM32-NEXT: ldr r1, +; ARM32: add r0, pc, r0 +; ARM32-NEXT: ldr r0, [r1, r0] +; ARM32-NEXT: bl __emutls_get_address(PLT) +; ARM32-NEXT: pop + +entry: + ret i32* @i3 +} + +define i32 @f7() { +; ARM32-LABEL: f7: +; ARM32: ldr r0, +; ARM32-NEXT: ldr r1, +; ARM32: add r0, pc, r0 +; ARM32-NEXT: ldr r0, [r1, r0] +; ARM32-NEXT: bl __emutls_get_address(PLT) +; ARM32-NEXT: ldr r0, [r0] + +entry: + %tmp1 = load i32, i32* @i4 + ret i32 %tmp1 +} + +define i32* @f8() { +; ARM32-LABEL: f8: +; ARM32: ldr r0, +; ARM32-NEXT: ldr r1, +; ARM32: add r0, pc, r0 +; ARM32-NEXT: ldr r0, [r1, r0] +; ARM32-NEXT: bl __emutls_get_address(PLT) +; ARM32-NEXT: pop + +entry: + ret i32* @i4 +} + +define i32 @f9() { +; ARM32-LABEL: f9: +; ARM32: ldr r0, +; ARM32-NEXT: ldr r1, +; ARM32: add r0, pc, r0 +; ARM32-NEXT: ldr r0, [r1, r0] +; ARM32-NEXT: bl __emutls_get_address(PLT) +; ARM32-NEXT: ldr r0, [r0] + +entry: + %tmp1 = load i32, i32* @i5 + ret i32 %tmp1 +} + +define i32* @f10() { +; ARM32-LABEL: f10: +; ARM32: ldr r0, +; ARM32-NEXT: ldr r1, +; ARM32: add r0, pc, r0 +; ARM32-NEXT: ldr r0, [r1, r0] +; ARM32-NEXT: bl __emutls_get_address(PLT) +; ARM32-NEXT: pop + +entry: + ret i32* @i5 +} + +define i16 @f11() { +; ARM32-LABEL: f11: +; ARM32: ldr r0, +; ARM32-NEXT: ldr r1, +; ARM32: add r0, pc, r0 +; ARM32-NEXT: ldr r0, [r1, r0] +; ARM32-NEXT: bl __emutls_get_address(PLT) +; ARM32-NEXT: ldrh r0, [r0] + +entry: + %tmp1 = load i16, i16* @s1 + ret i16 %tmp1 +} + +define i32 @f12() { +; ARM32-LABEL: f12: +; ARM32: ldr r0, +; ARM32-NEXT: ldr r1, +; ARM32: add r0, pc, r0 +; ARM32-NEXT: ldr r0, [r1, r0] +; ARM32-NEXT: bl __emutls_get_address(PLT) +; ARM32-NEXT: ldrsh r0, [r0] + +entry: + %tmp1 = load i16, i16* @s1 + %tmp2 = sext i16 %tmp1 to i32 + ret i32 %tmp2 +} + +define i8 @f13() { +; ARM32-LABEL: f13: +; ARM32: ldr r0, +; ARM32-NEXT: ldr r1, +; ARM32: add r0, pc, r0 +; ARM32-NEXT: ldr r0, [r1, r0] +; ARM32-NEXT: bl __emutls_get_address(PLT) +; ARM32-NEXT: ldrb r0, [r0] +; ARM32-NEXT: pop + +entry: + %tmp1 = load i8, i8* @b1 + ret i8 %tmp1 +} + +define i32 @f14() { +; ARM32-LABEL: f14: +; ARM32: ldr r0, +; ARM32-NEXT: ldr r1, +; ARM32: add r0, pc, r0 +; ARM32-NEXT: ldr r0, [r1, r0] +; ARM32-NEXT: bl __emutls_get_address(PLT) +; ARM32-NEXT: ldrsb r0, [r0] +; ARM32-NEXT: pop + +entry: + %tmp1 = load i8, i8* @b1 + %tmp2 = sext i8 %tmp1 to i32 + ret i32 %tmp2 +} + +;;;;;;;;;;;;;; 32-bit __emutls_v. and __emutls_t. + +; ARM32 .section .data.rel.local, +; ARM32-LABEL: __emutls_v.i1: +; ARM32-NEXT: .long 4 +; ARM32-NEXT: .long 4 +; ARM32-NEXT: .long 0 +; ARM32-NEXT: .long __emutls_t.i1 + +; ARM32 .section .rodata, +; ARM32-LABEL: __emutls_t.i1: +; ARM32-NEXT: .long 15 + +; ARM32-NOT: __emutls_v.i2 + +; ARM32 .section .data.rel.local, +; ARM32-LABEL: __emutls_v.i3: +; ARM32-NEXT: .long 4 +; ARM32-NEXT: .long 4 +; ARM32-NEXT: .long 0 +; ARM32-NEXT: .long __emutls_t.i3 + +; ARM32 .section .rodata, +; ARM32-LABEL: __emutls_t.i3: +; ARM32-NEXT: .long 15 + +; ARM32 .section .data.rel.local, +; ARM32-LABEL: __emutls_v.i4: +; ARM32-NEXT: .long 4 +; ARM32-NEXT: .long 4 +; ARM32-NEXT: .long 0 +; ARM32-NEXT: .long __emutls_t.i4 + +; ARM32 .section .rodata, +; ARM32-LABEL: __emutls_t.i4: +; ARM32-NEXT: .long 15 + +; ARM32-NOT: __emutls_v.i5: +; ARM32 .hidden __emutls_v.i5 +; ARM32-NOT: __emutls_v.i5: + +; ARM32 .section .data.rel.local, +; ARM32-LABEL: __emutls_v.s1: +; ARM32-NEXT: .long 2 +; ARM32-NEXT: .long 2 +; ARM32-NEXT: .long 0 +; ARM32-NEXT: .long __emutls_t.s1 + +; ARM32 .section .rodata, +; ARM32-LABEL: __emutls_t.s1: +; ARM32-NEXT: .short 15 + +; ARM32 .section .data.rel.local, +; ARM32-LABEL: __emutls_v.b1: +; ARM32-NEXT: .long 1 +; ARM32-NEXT: .long 1 +; ARM32-NEXT: .long 0 +; ARM32-NEXT: .long 0 + +; ARM32-NOT: __emutls_t.b1 + +;;;;;;;;;;;;;; 64-bit __emutls_v. and __emutls_t. + +; ARM64 .section .data.rel.local, +; ARM64-LABEL: __emutls_v.i1: +; ARM64-NEXT: .xword 4 +; ARM64-NEXT: .xword 4 +; ARM64-NEXT: .xword 0 +; ARM64-NEXT: .xword __emutls_t.i1 + +; ARM64 .section .rodata, +; ARM64-LABEL: __emutls_t.i1: +; ARM64-NEXT: .word 15 + +; ARM64-NOT: __emutls_v.i2 + +; ARM64 .section .data.rel.local, +; ARM64-LABEL: __emutls_v.i3: +; ARM64-NEXT: .xword 4 +; ARM64-NEXT: .xword 4 +; ARM64-NEXT: .xword 0 +; ARM64-NEXT: .xword __emutls_t.i3 + +; ARM64 .section .rodata, +; ARM64-LABEL: __emutls_t.i3: +; ARM64-NEXT: .word 15 + +; ARM64 .section .data.rel.local, +; ARM64-LABEL: __emutls_v.i4: +; ARM64-NEXT: .xword 4 +; ARM64-NEXT: .xword 4 +; ARM64-NEXT: .xword 0 +; ARM64-NEXT: .xword __emutls_t.i4 + +; ARM64 .section .rodata, +; ARM64-LABEL: __emutls_t.i4: +; ARM64-NEXT: .word 15 + +; ARM64-NOT: __emutls_v.i5: +; ARM64 .hidden __emutls_v.i5 +; ARM64-NOT: __emutls_v.i5: + +; ARM64 .section .data.rel.local, +; ARM64-LABEL: __emutls_v.s1: +; ARM64-NEXT: .xword 2 +; ARM64-NEXT: .xword 2 +; ARM64-NEXT: .xword 0 +; ARM64-NEXT: .xword __emutls_t.s1 + +; ARM64 .section .rodata, +; ARM64-LABEL: __emutls_t.s1: +; ARM64-NEXT: .hword 15 + +; ARM64 .section .data.rel.local, +; ARM64-LABEL: __emutls_v.b1: +; ARM64-NEXT: .xword 1 +; ARM64-NEXT: .xword 1 +; ARM64-NEXT: .xword 0 +; ARM64-NEXT: .xword 0 + +; ARM64-NOT: __emutls_t.b1 Index: llvm/trunk/test/CodeGen/ARM/emutls1.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/emutls1.ll +++ llvm/trunk/test/CodeGen/ARM/emutls1.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -emulated-tls -march=arm -mtriple=arm-linux-androideabi \ +; RUN: | FileCheck %s +; RUN: llc < %s -emulated-tls -march=arm -mtriple=arm-linux-androideabi \ +; RUN: -relocation-model=pic | FileCheck %s --check-prefix=PIC + +; Compared with tls1.ll, emulated mode should not use __aeabi_read_tp or __tls_get_addr. + +; CHECK-NOT: _aeabi_read_tp +; CHECK-NOT: _tls_get_addr +; CHECK: __emutls_get_addr +; CHECK-NOT: __aeabi_read_tp +; CHECK-NOT: _tls_get_addr + +; PIC-NOT: _aeabi_read_tp +; PIC-NOT: _tls_get_addr +; PIC: __emutls_get_addr +; PIC-NOT: _aeabi_read_tp +; PIC-NOT: _tls_get_addr + +@i = thread_local global i32 15 ; [#uses=2] + +define i32 @f() { +entry: + %tmp1 = load i32, i32* @i ; [#uses=1] + ret i32 %tmp1 +} + +define i32* @g() { +entry: + ret i32* @i +} Index: llvm/trunk/test/CodeGen/ARM/tls-models.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/tls-models.ll +++ llvm/trunk/test/CodeGen/ARM/tls-models.ll @@ -1,5 +1,11 @@ -; RUN: llc -march=arm -mtriple=arm-linux-gnueabi < %s | FileCheck -check-prefix=CHECK-NONPIC %s -; RUN: llc -march=arm -mtriple=arm-linux-gnueabi -relocation-model=pic < %s | FileCheck -check-prefix=CHECK-PIC %s +; RUN: llc -march=arm -mtriple=arm-linux-gnueabi < %s \ +; RUN: | FileCheck -check-prefix=CHECK-NONPIC -check-prefix=COMMON %s +; RUN: llc -march=arm -mtriple=arm-linux-gnueabi -relocation-model=pic < %s \ +; RUN: | FileCheck -check-prefix=CHECK-PIC -check-prefix=COMMON %s +; RUN: llc -emulated-tls -march=arm -mtriple=arm-linux-gnueabi < %s \ +; RUN: | FileCheck -check-prefix=EMUNONPIC -check-prefix=EMU -check-prefix=COMMON %s +; RUN: llc -emulated-tls -march=arm -mtriple=arm-linux-gnueabi -relocation-model=pic < %s \ +; RUN: | FileCheck -check-prefix=EMUPIC -check-prefix=EMU -check-prefix=COMMON %s @external_gd = external thread_local global i32 @@ -20,23 +26,23 @@ entry: ret i32* @external_gd + ; COMMON-LABEL: f1: ; Non-PIC code can use initial-exec, PIC code has to use general dynamic. - ; CHECK-NONPIC-LABEL: f1: ; CHECK-NONPIC: external_gd(GOTTPOFF) - ; CHECK-PIC-LABEL: f1: ; CHECK-PIC: external_gd(TLSGD) + ; EMU: __emutls_get_address } define i32* @f2() { entry: ret i32* @internal_gd + ; COMMON-LABEL: f2: ; Non-PIC code can use local exec, PIC code can use local dynamic, ; but that is not implemented, so falls back to general dynamic. - ; CHECK-NONPIC-LABEL: f2: ; CHECK-NONPIC: internal_gd(TPOFF) - ; CHECK-PIC-LABEL: f2: ; CHECK-PIC: internal_gd(TLSGD) + ; EMU: __emutls_get_address } @@ -46,24 +52,24 @@ entry: ret i32* @external_ld + ; COMMON-LABEL: f3: ; Non-PIC code can use initial exec, PIC should use local dynamic, ; but that is not implemented, so falls back to general dynamic. - ; CHECK-NONPIC-LABEL: f3: ; CHECK-NONPIC: external_ld(GOTTPOFF) - ; CHECK-PIC-LABEL: f3: ; CHECK-PIC: external_ld(TLSGD) + ; EMU: __emutls_get_address } define i32* @f4() { entry: ret i32* @internal_ld + ; COMMON-LABEL: f4: ; Non-PIC code can use local exec, PIC code can use local dynamic, ; but that is not implemented, so it falls back to general dynamic. - ; CHECK-NONPIC-LABEL: f4: ; CHECK-NONPIC: internal_ld(TPOFF) - ; CHECK-PIC-LABEL: f4: ; CHECK-PIC: internal_ld(TLSGD) + ; EMU: __emutls_get_address } @@ -73,22 +79,22 @@ entry: ret i32* @external_ie + ; COMMON-LABEL: f5: ; Non-PIC and PIC code will use initial exec as specified. - ; CHECK-NONPIC-LABEL: f5: ; CHECK-NONPIC: external_ie(GOTTPOFF) - ; CHECK-PIC-LABEL: f5: ; CHECK-PIC: external_ie(GOTTPOFF) + ; EMU: __emutls_get_address } define i32* @f6() { entry: ret i32* @internal_ie + ; COMMON-LABEL: f6: ; Non-PIC code can use local exec, PIC code use initial exec as specified. - ; CHECK-NONPIC-LABEL: f6: ; CHECK-NONPIC: internal_ie(TPOFF) - ; CHECK-PIC-LABEL: f6: ; CHECK-PIC: internal_ie(GOTTPOFF) + ; EMU: __emutls_get_address } @@ -98,20 +104,52 @@ entry: ret i32* @external_le + ; COMMON-LABEL: f7: ; Non-PIC and PIC code will use local exec as specified. - ; CHECK-NONPIC-LABEL: f7: ; CHECK-NONPIC: external_le(TPOFF) - ; CHECK-PIC-LABEL: f7: ; CHECK-PIC: external_le(TPOFF) + ; EMU: __emutls_get_address } define i32* @f8() { entry: ret i32* @internal_le + ; COMMON-LABEL: f8: ; Non-PIC and PIC code will use local exec as specified. - ; CHECK-NONPIC-LABEL: f8: ; CHECK-NONPIC: internal_le(TPOFF) - ; CHECK-PIC-LABEL: f8: ; CHECK-PIC: internal_le(TPOFF) + ; EMU: __emutls_get_address } + + +; ----- emulated specified ----- + +; External declaration has no initializer. +; Internal definition has initializer. + +; EMU-NOT: __emutls_t.external_gd +; EMU-NOT: __emutls_v.external_gd +; EMU: .align 2 +; EMU-LABEL: __emutls_v.internal_gd: +; EMU-NEXT: .long 4 +; EMU-NEXT: .long 4 +; EMU-NEXT: .long 0 +; EMU-NEXT: .long __emutls_t.internal_gd +; EMU-LABEL: __emutls_t.internal_gd: +; EMU-NEXT: .long 42 +; EMU-NOT: __emutls_t.external_gd + +; __emutls_t and __emutls_v are the same for PIC and non-PIC modes. + +; EMU-NOT: __emutls_t.external_gd +; EMU-NOT: __emutls_v.external_gd +; EMU: .align 2 +; EMU-LABEL: __emutls_v.internal_le: +; EMU-NEXT: .long 4 +; EMU-NEXT: .long 4 +; EMU-NEXT: .long 0 +; EMU-NEXT: .long __emutls_t.internal_le +; EMU-LABEL: __emutls_t.internal_le: +; EMU-NEXT: .long 42 +; EMU-NOT: __emutls_t.external_le Index: llvm/trunk/test/CodeGen/ARM/tls3.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/tls3.ll +++ llvm/trunk/test/CodeGen/ARM/tls3.ll @@ -1,11 +1,34 @@ ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \ ; RUN: grep "tbss" +; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \ +; RUN: FileCheck %s -check-prefix=CHECK -check-prefix=NOEMU +; RUN: llc < %s -emulated-tls -march=arm -mtriple=arm-linux-gnueabi | \ +; RUN: FileCheck %s -check-prefix=CHECK -check-prefix=EMU %struct.anon = type { i32, i32 } -@teste = internal thread_local global %struct.anon zeroinitializer ; <%struct.anon*> [#uses=1] +@teste = internal thread_local global %struct.anon zeroinitializer ; <%struct.anon*> [#uses=1] define i32 @main() { entry: - %tmp2 = load i32, i32* getelementptr (%struct.anon, %struct.anon* @teste, i32 0, i32 0), align 8 ; [#uses=1] - ret i32 %tmp2 + %tmp2 = load i32, i32* getelementptr (%struct.anon, %struct.anon* @teste, i32 0, i32 0), align 8 ; [#uses=1] + ret i32 %tmp2 } + +; CHECK-LABEL: main: +; NOEMU-NOT: __emutls_get_address + +; NOEMU: .section .tbss +; NOEMU-LABEL: teste: +; NOEMU-NEXT: .zero 8 + +; CHECK-NOT: __emutls_t.teste + +; EMU: .align 2 +; EMU-LABEL: __emutls_v.teste: +; EMU-NEXT: .long 8 +; EMU-NEXT: .long 4 +; EMU-NEXT: .long 0 +; EMU-NEXT: .long 0 + +; CHECK-NOT: teste: +; CHECK-NOT: __emutls_t.teste Index: llvm/trunk/test/CodeGen/Generic/emutls.ll =================================================================== --- llvm/trunk/test/CodeGen/Generic/emutls.ll +++ llvm/trunk/test/CodeGen/Generic/emutls.ll @@ -0,0 +1,298 @@ +; RUN: llc < %s -emulated-tls -mtriple=arm-linux-android -relocation-model=pic \ +; RUN: | FileCheck -check-prefix=ARM_32 %s +; RUN: llc < %s -emulated-tls -mtriple=arm-linux-androidabi -relocation-model=pic \ +; RUN: | FileCheck -check-prefix=ARM_32 %s +; RUN: llc < %s -emulated-tls -mtriple=aarch64-linux-android -relocation-model=pic \ +; RUN: | FileCheck -check-prefix=ARM_64 %s +; RUN: llc < %s -emulated-tls -mtriple=arm-linux-androidabi -relocation-model=pic -O3 \ +; RUN: | FileCheck -check-prefix=ARM_32 %s +; RUN: llc < %s -emulated-tls -mtriple=aarch64-linux-android -relocation-model=pic -O3 \ +; RUN: | FileCheck -check-prefix=ARM_64 %s +; RUN: llc < %s -emulated-tls -mtriple=arm-linux-androidabi -O3 \ +; RUN: | FileCheck -check-prefix=ARM_32 %s +; RUN: llc < %s -emulated-tls -mtriple=aarch64-linux-android -O3 \ +; RUN: | FileCheck -check-prefix=ARM_64 %s +; RUN: llc < %s -emulated-tls -mtriple=i686-linux-android -relocation-model=pic \ +; RUN: | FileCheck -check-prefix=X86_32 %s +; RUN: llc < %s -emulated-tls -mtriple=x86_64-linux-android -march=x86 -relocation-model=pic \ +; RUN: | FileCheck -check-prefix=X86_32 %s +; RUN: llc < %s -emulated-tls -mtriple=x86_64-linux-android -relocation-model=pic \ +; RUN: | FileCheck -check-prefix=X86_64 %s +; RUN: llc < %s -emulated-tls -mtriple=mipsel-linux-android -relocation-model=pic \ +; RUN: | FileCheck -check-prefix=MIPS_32 %s +; RUN: llc < %s -emulated-tls -mtriple=mips64el-linux-android -relocation-model=pic \ +; RUN: | FileCheck -check-prefix=MIPS_64 %s +; RUN: llc < %s -emulated-tls -march=ppc64 -relocation-model=pic \ +; RUN: | FileCheck %s +; RUN: llc < %s -emulated-tls -march=ppc32 -relocation-model=pic \ +; RUN: | FileCheck %s +; RUN: llc < %s -emulated-tls -march=x86 -mtriple=i386-linux-gnu -relocation-model=pic \ +; RUN: | FileCheck %s + +; Make sure that TLS symbols are emitted in expected order. + +@external_x = external thread_local global i32, align 8 +@external_y = thread_local global i8 7, align 2 +@internal_y = internal thread_local global i64 9, align 16 + +define i32* @get_external_x() { +entry: + ret i32* @external_x +} + +define i8* @get_external_y() { +entry: + ret i8* @external_y +} + +define i64* @get_internal_y() { +entry: + ret i64* @internal_y +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; targets independent mode +; CHECK-LABEL: get_external_x: +; CHECK-NOT: _tls_get_address +; CHECK: __emutls_get_address +; CHECK-LABEL: get_external_y: +; CHECK: __emutls_get_address +; CHECK-NOT: _tls_get_address +; CHECK-LABEL: get_internal_y: + +; CHECK-NOT: __emutls_t.external_x: +; CHECK-NOT: __emutls_v.external_x: + +; CHECK-LABEL: __emutls_v.external_y: +; CHECK-LABEL: __emutls_t.external_y: +; CHECK: __emutls_t.external_y + +; CHECK-LABEL: __emutls_v.internal_y: +; CHECK-LABEL: __emutls_t.internal_y: +; CHECK: __emutls_t.internal_y + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 32-bit mode +; ARM_32-LABEL: get_external_x: +; X86_32-LABEL: get_external_x: +; MIPS-LABEL: get_external_x: + +; ARM_32: bl __emutls_get_address +; ARM_32: .long __emutls_v.external_x + +; X86_32: movl __emutls_v.external_x +; X86_32: calll __emutls_get_address + +; ARM_32-LABEL: get_external_y: +; X86_32-LABEL: get_external_y: +; MIPS_32-LABEL: get_external_y: + +; ARM_32: bl __emutls_get_address +; ARM_32: .long __emutls_v.external_y + +; X86_32: movl __emutls_v.external_y +; X86_32: calll __emutls_get_address + +; ARM_32-LABEL: get_internal_y: +; X86_32-LABEL: get_internal_y: +; MIPS_32-LABEL: get_internal_y: + +; ARM_32: bl __emutls_get_address +; ARM_32: .long __emutls_v.internal_y + +; X86_32: movl __emutls_v.internal_y +; X86_32: calll __emutls_get_address + +; MIPS_32: lw {{.+}}(__emutls_v.internal_y +; MIPS_32: lw {{.+}}call16(__emutls_get_address + +; ARM_32-NOT: __emutls_t.external_x +; X86_32-NOT: __emutls_t.external_x +; MIPS_32-NOT: __emutls_t.external_x + +; ARM_32-NOT: __emutls_v.external_x: +; X86_32-NOT: __emutls_v.external_x: +; MIPS_32-NOT: __emutls_v.external_x: + +; ARM_32: .section .data.rel.local +; X86_32: .section .data.rel.local +; MIPS_32: .section .data.rel.local + +; ARM_32: .align 2 +; X86_32: .align 4 +; MIPS_32: .align 2 + +; ARM_32-LABEL: __emutls_v.external_y: +; X86_32-LABEL: __emutls_v.external_y: +; MIPS_32-LABEL: __emutls_v.external_y: + +; ARM_32-NEXT: .long 1 +; ARM_32-NEXT: .long 2 +; ARM_32-NEXT: .long 0 +; ARM_32-NEXT: .long __emutls_t.external_y + +; X86_32-NEXT: .long 1 +; X86_32-NEXT: .long 2 +; X86_32-NEXT: .long 0 +; X86_32-NEXT: .long __emutls_t.external_y + +; ARM_32: .section .rodata, +; X86_32: .section .rodata, +; MIPS_32: .section .rodata, + +; ARM_32-LABEL: __emutls_t.external_y: +; X86_32-LABEL: __emutls_t.external_y: +; MIPS_32-LABEL: __emutls_t.external_y: + +; ARM_32-NEXT: .byte 7 +; X86_32-NEXT: .byte 7 +; MIPS_32-NEXT: .byte 7 + +; ARM_32: .section .data.rel.local +; X86_32: .section .data.rel.local +; MIPS_32: .section .data.rel.local + +; ARM_32: .align 2 +; X86_32: .align 4 +; MIPS_32: .align 2 + +; ARM_32-LABEL: __emutls_v.internal_y: +; X86_32-LABEL: __emutls_v.internal_y: +; MIPS_32-LABEL: __emutls_v.internal_y: + +; ARM_32-NEXT: .long 8 +; ARM_32-NEXT: .long 16 +; ARM_32-NEXT: .long 0 +; ARM_32-NEXT: .long __emutls_t.internal_y + +; X86_32-NEXT: .long 8 +; X86_32-NEXT: .long 16 +; X86_32-NEXT: .long 0 +; X86_32-NEXT: .long __emutls_t.internal_y + +; MIPS_32-NEXT: .4byte 8 +; MIPS_32-NEXT: .4byte 16 +; MIPS_32-NEXT: .4byte 0 +; MIPS_32-NEXT: .4byte __emutls_t.internal_y + +; ARM_32-LABEL: __emutls_t.internal_y: +; X86_32-LABEL: __emutls_t.internal_y: +; MIPS_32-LABEL: __emutls_t.internal_y: + +; ARM_32-NEXT: .long 9 +; ARM_32-NEXT: .long 0 +; X86_32-NEXT: .quad 9 +; MIPS_32-NEXT: .8byte 9 + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 64-bit mode +; X86_64-LABEL: get_external_x: +; ARM_64-LABEL: get_external_x: +; MIPS_64-LABEL: get_external_x: + +; X86_64: __emutls_v.external_x +; X86_64: __emutls_get_address + +; ARM_64: __emutls_v.external_x +; ARM_64: __emutls_get_address + +; X86_64-LABEL: get_external_y: +; ARM_64-LABEL: get_external_y: +; MIPS_64-LABEL: get_external_y: + +; X86_64: __emutls_v.external_y +; X86_64: __emutls_get_address + +; ARM_64: __emutls_v.external_y +; ARM_64: __emutls_get_address + +; X86_64-LABEL: get_internal_y: +; ARM_64-LABEL: get_internal_y: +; MIPS_64-LABEL: get_internal_y: + +; X86_64: __emutls_v.internal_y +; X86_64: __emutls_get_address + +; ARM_64: __emutls_v.internal_y +; ARM_64: __emutls_get_address + +; MIPS_64: ld {{.+}}(__emutls_v.internal_y +; MIPS_64: ld {{.+}}call16(__emutls_get_address + +; ARM_64-NOT: __emutls_t.external_x +; X86_64-NOT: __emutls_t.external_x +; MIPS_64-NOT: __emutls_t.external_x + +; X86_64-NOT: __emutls_v.external_x: +; ARM_64-NOT: __emutls_v.external_x: +; MIPS_64-NOT: __emutls_v.external_x: + +; X86_64: .align 8 +; ARM_64: .align 3 + +; X86_64-LABEL: __emutls_v.external_y: +; ARM_64-LABEL: __emutls_v.external_y: +; MIPS_64-LABEL: __emutls_v.external_y: + +; X86_64-NEXT: .quad 1 +; X86_64-NEXT: .quad 2 +; X86_64-NEXT: .quad 0 +; X86_64-NEXT: .quad __emutls_t.external_y + +; ARM_64-NEXT: .xword 1 +; ARM_64-NEXT: .xword 2 +; ARM_64-NEXT: .xword 0 +; ARM_64-NEXT: .xword __emutls_t.external_y + +; X86_64-NOT: __emutls_v.external_x: +; ARM_64-NOT: __emutls_v.external_x: +; MIPS_64-NOT: __emutls_v.external_x: + +; ARM_64: .section .rodata, +; X86_64: .section .rodata, +; MIPS_64: .section .rodata, + +; X86_64-LABEL: __emutls_t.external_y: +; ARM_64-LABEL: __emutls_t.external_y: +; MIPS_64-LABEL: __emutls_t.external_y: + +; X86_64-NEXT: .byte 7 +; ARM_64-NEXT: .byte 7 +; MIPS_64-NEXT: .byte 7 + +; ARM_64: .section .data.rel.local +; X86_64: .section .data.rel.local +; MIPS_64: .section .data.rel.local + +; X86_64: .align 8 +; ARM_64: .align 3 +; MIPS_64: .align 3 + +; X86_64-LABEL: __emutls_v.internal_y: +; ARM_64-LABEL: __emutls_v.internal_y: +; MIPS_64-LABEL: __emutls_v.internal_y: + +; X86_64-NEXT: .quad 8 +; X86_64-NEXT: .quad 16 +; X86_64-NEXT: .quad 0 +; X86_64-NEXT: .quad __emutls_t.internal_y + +; ARM_64-NEXT: .xword 8 +; ARM_64-NEXT: .xword 16 +; ARM_64-NEXT: .xword 0 +; ARM_64-NEXT: .xword __emutls_t.internal_y + +; MIPS_64-NEXT: .8byte 8 +; MIPS_64-NEXT: .8byte 16 +; MIPS_64-NEXT: .8byte 0 +; MIPS_64-NEXT: .8byte __emutls_t.internal_y + +; ARM_64: .section .rodata, +; X86_64: .section .rodata, +; MIPS_64: .section .rodata, + +; X86_64-LABEL: __emutls_t.internal_y: +; ARM_64-LABEL: __emutls_t.internal_y: +; MIPS_64-LABEL: __emutls_t.internal_y: + +; X86_64-NEXT: .quad 9 +; ARM_64-NEXT: .xword 9 +; MIPS_64-NEXT: .8byte 9 Index: llvm/trunk/test/CodeGen/X86/emutls-pic.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/emutls-pic.ll +++ llvm/trunk/test/CodeGen/X86/emutls-pic.ll @@ -0,0 +1,168 @@ +; RUN: llc < %s -emulated-tls -march=x86 -mtriple=i386-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X32 %s +; RUN: llc < %s -emulated-tls -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X64 %s +; RUN: llc < %s -emulated-tls -march=x86 -mtriple=i386-linux-android -relocation-model=pic | FileCheck -check-prefix=X32 %s +; RUN: llc < %s -emulated-tls -march=x86-64 -mtriple=x86_64-linux-android -relocation-model=pic | FileCheck -check-prefix=X64 %s + +; Use my_emutls_get_address like __emutls_get_address. +@my_emutls_v_xyz = external global i8*, align 4 +declare i8* @my_emutls_get_address(i8*) + +define i32 @my_get_xyz() { +; X32-LABEL: my_get_xyz: +; X32: movl my_emutls_v_xyz@GOT(%ebx), %eax +; X32-NEXT: movl %eax, (%esp) +; X32-NEXT: calll my_emutls_get_address@PLT +; X64-LABEL: my_get_xyz: +; X64: movq my_emutls_v_xyz@GOTPCREL(%rip), %rdi +; X64-NEXT: callq my_emutls_get_address@PLT +; X64-NEXT: movl (%rax), %eax + +entry: + %call = call i8* @my_emutls_get_address(i8* bitcast (i8** @my_emutls_v_xyz to i8*)) + %0 = bitcast i8* %call to i32* + %1 = load i32, i32* %0, align 4 + ret i32 %1 +} + +@i = thread_local global i32 15 +@j = internal thread_local global i32 42 +@k = internal thread_local global i32 0, align 8 + +define i32 @f1() { +entry: + %tmp1 = load i32, i32* @i + ret i32 %tmp1 +} + +; X32-LABEL: f1: +; X32: movl __emutls_v.i@GOT(%ebx), %eax +; X32-NEXT: movl %eax, (%esp) +; X32-NEXT: calll __emutls_get_address@PLT +; X64-LABEL: f1: +; X64: movq __emutls_v.i@GOTPCREL(%rip), %rdi +; X64-NEXT: callq __emutls_get_address@PLT +; X64-NEXT: movl (%rax), %eax + +@i2 = external thread_local global i32 + +define i32* @f2() { +entry: + ret i32* @i +} + +; X32-LABEL: f2: +; X64-LABEL: f2: + + +define i32 @f3() { +entry: + %tmp1 = load i32, i32* @i ; [#uses=1] + ret i32 %tmp1 +} + +; X32-LABEL: f3: +; X64-LABEL: f3: + + +define i32* @f4() nounwind { +entry: + ret i32* @i +} + +; X32-LABEL: f4: +; X64-LABEL: f4: + + +define i32 @f5() nounwind { +entry: + %0 = load i32, i32* @j, align 4 + %1 = load i32, i32* @k, align 4 + %add = add nsw i32 %0, %1 + ret i32 %add +} + +; X32-LABEL: f5: +; X32: movl __emutls_v.j@GOT(%ebx), %eax +; X32-NEXT: movl %eax, (%esp) +; X32-NEXT: calll __emutls_get_address@PLT +; X32-NEXT: movl (%eax), %esi +; X32-NEXT: movl __emutls_v.k@GOT(%ebx), %eax +; X32-NEXT: movl %eax, (%esp) +; X32-NEXT: calll __emutls_get_address@PLT +; X32-NEXT: addl (%eax), %esi +; X32-NEXT: movl %esi, %eax + +; X64-LABEL: f5: +; X64: movq __emutls_v.j@GOTPCREL(%rip), %rdi +; X64-NEXT: callq __emutls_get_address@PLT +; X64-NEXT: movl (%rax), %ebx +; X64-NEXT: movq __emutls_v.k@GOTPCREL(%rip), %rdi +; X64-NEXT: callq __emutls_get_address@PLT +; X64-NEXT: addl (%rax), %ebx +; X64-NEXT: movl %ebx, %eax + +;;;;; 32-bit targets + +; X32: .section .data.rel.local, +; X32-LABEL: __emutls_v.i: +; X32-NEXT: .long 4 +; X32-NEXT: .long 4 +; X32-NEXT: .long 0 +; X32-NEXT: .long __emutls_t.i + +; X32: .section .rodata, +; X32-LABEL: __emutls_t.i: +; X32-NEXT: .long 15 + +; X32: .section .data.rel.local, +; X32-LABEL: __emutls_v.j: +; X32-NEXT: .long 4 +; X32-NEXT: .long 4 +; X32-NEXT: .long 0 +; X32-NEXT: .long __emutls_t.j + +; X32: .section .rodata, +; X32-LABEL: __emutls_t.j: +; X32-NEXT: .long 42 + +; X32: .data +; X32-LABEL: __emutls_v.k: +; X32-NEXT: .long 4 +; X32-NEXT: .long 8 +; X32-NEXT: .long 0 +; X32-NEXT: .long 0 + +; X32-NOT: __emutls_t.k: + +;;;;; 64-bit targets + +; X64: .section .data.rel.local, +; X64-LABEL: __emutls_v.i: +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 0 +; X64-NEXT: .quad __emutls_t.i + +; X64: .section .rodata, +; X64-LABEL: __emutls_t.i: +; X64-NEXT: .long 15 + +; X64: .section .data.rel.local, +; X64-LABEL: __emutls_v.j: +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 0 +; X64-NEXT: .quad __emutls_t.j + +; X64: .section .rodata, +; X64-LABEL: __emutls_t.j: +; X64-NEXT: .long 42 + +; X64: .data +; X64-LABEL: __emutls_v.k: +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 8 +; X64-NEXT: .quad 0 +; X64-NEXT: .quad 0 + +; X64-NOT: __emutls_t.k: Index: llvm/trunk/test/CodeGen/X86/emutls-pie.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/emutls-pie.ll +++ llvm/trunk/test/CodeGen/X86/emutls-pie.ll @@ -0,0 +1,131 @@ +; RUN: llc < %s -emulated-tls -march=x86 -mcpu=generic -mtriple=i386-linux-gnu -relocation-model=pic -enable-pie \ +; RUN: | FileCheck -check-prefix=X32 %s +; RUN: llc < %s -emulated-tls -march=x86-64 -mcpu=generic -mtriple=x86_64-linux-gnu -relocation-model=pic -enable-pie \ +; RUN: | FileCheck -check-prefix=X64 %s +; RUN: llc < %s -emulated-tls -march=x86 -mcpu=generic -mtriple=i386-linux-android -relocation-model=pic -enable-pie \ +; RUN: | FileCheck -check-prefix=X32 %s +; RUN: llc < %s -emulated-tls -march=x86-64 -mcpu=generic -mtriple=x86_64-linux-android -relocation-model=pic -enable-pie \ +; RUN: | FileCheck -check-prefix=X64 %s + +; Use my_emutls_get_address like __emutls_get_address. +@my_emutls_v_xyz = external global i8*, align 4 +declare i8* @my_emutls_get_address(i8*) + +define i32 @my_get_xyz() { +; X32-LABEL: my_get_xyz: +; X32: movl my_emutls_v_xyz@GOT(%ebx), %eax +; X32-NEXT: movl %eax, (%esp) +; X32-NEXT: calll my_emutls_get_address@PLT +; X32-NEXT: movl (%eax), %eax +; X32-NEXT: addl $8, %esp +; X32-NEXT: popl %ebx +; X32-NEXT: retl +; X64-LABEL: my_get_xyz: +; X64: movq my_emutls_v_xyz@GOTPCREL(%rip), %rdi +; X64-NEXT: callq my_emutls_get_address@PLT +; X64-NEXT: movl (%rax), %eax +; X64-NEXT: popq %rdx +; X64-NEXT: retq + +entry: + %call = call i8* @my_emutls_get_address(i8* bitcast (i8** @my_emutls_v_xyz to i8*)) + %0 = bitcast i8* %call to i32* + %1 = load i32, i32* %0, align 4 + ret i32 %1 +} + +@i = thread_local global i32 15 +@i2 = external thread_local global i32 + +define i32 @f1() { +; X32-LABEL: f1: +; X32: movl __emutls_v.i@GOT(%ebx), %eax +; X32-NEXT: movl %eax, (%esp) +; X32-NEXT: calll __emutls_get_address@PLT +; X32-NEXT: movl (%eax), %eax +; X32-NEXT: addl $8, %esp +; X32-NEXT: popl %ebx +; X32-NEXT: retl +; X64-LABEL: f1: +; X64: movq __emutls_v.i@GOTPCREL(%rip), %rdi +; X64-NEXT: callq __emutls_get_address@PLT +; X64-NEXT: movl (%rax), %eax +; X64-NEXT: popq %rdx +; X64-NEXT: retq + +entry: + %tmp1 = load i32, i32* @i + ret i32 %tmp1 +} + +define i32* @f2() { +; X32-LABEL: f2: +; X32: movl __emutls_v.i@GOT(%ebx), %eax +; X32-NEXT: movl %eax, (%esp) +; X32-NEXT: calll __emutls_get_address@PLT +; X64-LABEL: f2: +; X64: movq __emutls_v.i@GOTPCREL(%rip), %rdi +; X64-NEXT: callq __emutls_get_address@PLT + +entry: + ret i32* @i +} + +define i32 @f3() { +; X32-LABEL: f3: +; X32: movl __emutls_v.i2@GOT(%ebx), %eax +; X32-NEXT: movl %eax, (%esp) +; X32-NEXT: calll __emutls_get_address@PLT +; X64-LABEL: f3: +; X64: movq __emutls_v.i2@GOTPCREL(%rip), %rdi +; X64-NEXT: callq __emutls_get_address@PLT + +entry: + %tmp1 = load i32, i32* @i2 + ret i32 %tmp1 +} + +define i32* @f4() { +; X32-LABEL: f4: +; X32: movl __emutls_v.i2@GOT(%ebx), %eax +; X32-NEXT: movl %eax, (%esp) +; X32-NEXT: calll __emutls_get_address@PLT +; X64-LABEL: f4: +; X64: movq __emutls_v.i2@GOTPCREL(%rip), %rdi +; X64-NEXT: callq __emutls_get_address@PLT + +entry: + ret i32* @i2 +} + +;;;;; 32-bit targets + +; X32: .section .data.rel.local, +; X32-LABEL: __emutls_v.i: +; X32-NEXT: .long 4 +; X32-NEXT: .long 4 +; X32-NEXT: .long 0 +; X32-NEXT: .long __emutls_t.i + +; X32: .section .rodata, +; X32-LABEL: __emutls_t.i: +; X32-NEXT: .long 15 + +; X32-NOT: __emutls_v.i2 +; X32-NOT: __emutls_t.i2 + +;;;;; 64-bit targets + +; X64: .section .data.rel.local, +; X64-LABEL: __emutls_v.i: +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 0 +; X64-NEXT: .quad __emutls_t.i + +; X64: .section .rodata, +; X64-LABEL: __emutls_t.i: +; X64-NEXT: .long 15 + +; X64-NOT: __emutls_v.i2 +; X64-NOT: __emutls_t.i2 Index: llvm/trunk/test/CodeGen/X86/emutls.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/emutls.ll +++ llvm/trunk/test/CodeGen/X86/emutls.ll @@ -0,0 +1,347 @@ +; RUN: llc < %s -emulated-tls -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32 %s +; RUN: llc < %s -emulated-tls -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64 %s +; RUN: llc < %s -emulated-tls -march=x86 -mtriple=x86-linux-android | FileCheck -check-prefix=X32 %s +; RUN: llc < %s -emulated-tls -march=x86-64 -mtriple=x86_64-linux-android | FileCheck -check-prefix=X64 %s + +; Copied from tls.ll; emulated TLS model is not implemented +; for *-pc-win32 and *-pc-winows targets yet. + +; Use my_emutls_get_address like __emutls_get_address. +@my_emutls_v_xyz = external global i8*, align 4 +declare i8* @my_emutls_get_address(i8*) + +define i32 @my_get_xyz() { +; X32-LABEL: my_get_xyz: +; X32: movl $my_emutls_v_xyz, (%esp) +; X32-NEXT: calll my_emutls_get_address +; X32-NEXT: movl (%eax), %eax +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl +; X64-LABEL: my_get_xyz: +; X64: movl $my_emutls_v_xyz, %edi +; X64-NEXT: callq my_emutls_get_address +; X64-NEXT: movl (%rax), %eax +; X64-NEXT: popq %rdx +; X64-NEXT: retq + +entry: + %call = call i8* @my_emutls_get_address(i8* bitcast (i8** @my_emutls_v_xyz to i8*)) + %0 = bitcast i8* %call to i32* + %1 = load i32, i32* %0, align 4 + ret i32 %1 +} + +@i1 = thread_local global i32 15 +@i2 = external thread_local global i32 +@i3 = internal thread_local global i32 15 +@i4 = hidden thread_local global i32 15 +@i5 = external hidden thread_local global i32 +@s1 = thread_local global i16 15 +@b1 = thread_local global i8 0 + +define i32 @f1() { +; X32-LABEL: f1: +; X32: movl $__emutls_v.i1, (%esp) +; X32-NEXT: calll __emutls_get_address +; X32-NEXT: movl (%eax), %eax +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl +; X64-LABEL: f1: +; X64: movl $__emutls_v.i1, %edi +; X64-NEXT: callq __emutls_get_address +; X64-NEXT: movl (%rax), %eax +; X64-NEXT: popq %rdx +; X64-NEXT: retq + +entry: + %tmp1 = load i32, i32* @i1 + ret i32 %tmp1 +} + +define i32* @f2() { +; X32-LABEL: f2: +; X32: movl $__emutls_v.i1, (%esp) +; X32-NEXT: calll __emutls_get_address +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl +; X64-LABEL: f2: +; X64: movl $__emutls_v.i1, %edi +; X64-NEXT: callq __emutls_get_address +; X64-NEXT: popq %rdx +; X64-NEXT: retq + +entry: + ret i32* @i1 +} + +define i32 @f3() nounwind { +; X32-LABEL: f3: +; X32: movl $__emutls_v.i2, (%esp) +; X32-NEXT: calll __emutls_get_address +; X32-NEXT: movl (%eax), %eax +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl + +entry: + %tmp1 = load i32, i32* @i2 + ret i32 %tmp1 +} + +define i32* @f4() { +; X32-LABEL: f4: +; X32: movl $__emutls_v.i2, (%esp) +; X32-NEXT: calll __emutls_get_address +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl + +entry: + ret i32* @i2 +} + +define i32 @f5() nounwind { +; X32-LABEL: f5: +; X32: movl $__emutls_v.i3, (%esp) +; X32-NEXT: calll __emutls_get_address +; X32-NEXT: movl (%eax), %eax +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl + +entry: + %tmp1 = load i32, i32* @i3 + ret i32 %tmp1 +} + +define i32* @f6() { +; X32-LABEL: f6: +; X32: movl $__emutls_v.i3, (%esp) +; X32-NEXT: calll __emutls_get_address +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl + +entry: + ret i32* @i3 +} + +define i32 @f7() { +; X32-LABEL: f7: +; X32: movl $__emutls_v.i4, (%esp) +; X32-NEXT: calll __emutls_get_address +; X32-NEXT: movl (%eax), %eax +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl + +entry: + %tmp1 = load i32, i32* @i4 + ret i32 %tmp1 +} + +define i32* @f8() { +; X32-LABEL: f8: +; X32: movl $__emutls_v.i4, (%esp) +; X32-NEXT: calll __emutls_get_address +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl + +entry: + ret i32* @i4 +} + +define i32 @f9() { +; X32-LABEL: f9: +; X32: movl $__emutls_v.i5, (%esp) +; X32-NEXT: calll __emutls_get_address +; X32-NEXT: movl (%eax), %eax +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl + +entry: + %tmp1 = load i32, i32* @i5 + ret i32 %tmp1 +} + +define i32* @f10() { +; X32-LABEL: f10: +; X32: movl $__emutls_v.i5, (%esp) +; X32-NEXT: calll __emutls_get_address +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl + +entry: + ret i32* @i5 +} + +define i16 @f11() { +; X32-LABEL: f11: +; X32: movl $__emutls_v.s1, (%esp) +; X32-NEXT: calll __emutls_get_address +; X32-NEXT: movzwl (%eax), %eax +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl + +entry: + %tmp1 = load i16, i16* @s1 + ret i16 %tmp1 +} + +define i32 @f12() { +; X32-LABEL: f12: +; X32: movl $__emutls_v.s1, (%esp) +; X32-NEXT: calll __emutls_get_address +; X32-NEXT: movswl (%eax), %eax +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl + +entry: + %tmp1 = load i16, i16* @s1 + %tmp2 = sext i16 %tmp1 to i32 + ret i32 %tmp2 +} + +define i8 @f13() { +; X32-LABEL: f13: +; X32: movl $__emutls_v.b1, (%esp) +; X32-NEXT: calll __emutls_get_address +; X32-NEXT: movb (%eax), %al +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl + +entry: + %tmp1 = load i8, i8* @b1 + ret i8 %tmp1 +} + +define i32 @f14() { +; X32-LABEL: f14: +; X32: movl $__emutls_v.b1, (%esp) +; X32-NEXT: calll __emutls_get_address +; X32-NEXT: movsbl (%eax), %eax +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl + +entry: + %tmp1 = load i8, i8* @b1 + %tmp2 = sext i8 %tmp1 to i32 + ret i32 %tmp2 +} + +;;;;;;;;;;;;;; 32-bit __emutls_v. and __emutls_t. + +; X32 .section .data.rel.local, +; X32-LABEL: __emutls_v.i1: +; X32-NEXT: .long 4 +; X32-NEXT: .long 4 +; X32-NEXT: .long 0 +; X32-NEXT: .long __emutls_t.i1 + +; X32 .section .rodata, +; X32-LABEL: __emutls_t.i1: +; X32-NEXT: .long 15 + +; X32-NOT: __emutls_v.i2 + +; X32 .section .data.rel.local, +; X32-LABEL: __emutls_v.i3: +; X32-NEXT: .long 4 +; X32-NEXT: .long 4 +; X32-NEXT: .long 0 +; X32-NEXT: .long __emutls_t.i3 + +; X32 .section .rodata, +; X32-LABEL: __emutls_t.i3: +; X32-NEXT: .long 15 + +; X32 .section .data.rel.local, +; X32-LABEL: __emutls_v.i4: +; X32-NEXT: .long 4 +; X32-NEXT: .long 4 +; X32-NEXT: .long 0 +; X32-NEXT: .long __emutls_t.i4 + +; X32 .section .rodata, +; X32-LABEL: __emutls_t.i4: +; X32-NEXT: .long 15 + +; X32-NOT: __emutls_v.i5: +; X32 .hidden __emutls_v.i5 +; X32-NOT: __emutls_v.i5: + +; X32 .section .data.rel.local, +; X32-LABEL: __emutls_v.s1: +; X32-NEXT: .long 2 +; X32-NEXT: .long 2 +; X32-NEXT: .long 0 +; X32-NEXT: .long __emutls_t.s1 + +; X32 .section .rodata, +; X32-LABEL: __emutls_t.s1: +; X32-NEXT: .short 15 + +; X32 .section .data.rel.local, +; X32-LABEL: __emutls_v.b1: +; X32-NEXT: .long 1 +; X32-NEXT: .long 1 +; X32-NEXT: .long 0 +; X32-NEXT: .long 0 + +; X32-NOT: __emutls_t.b1 + +;;;;;;;;;;;;;; 64-bit __emutls_v. and __emutls_t. + +; X64 .section .data.rel.local, +; X64-LABEL: __emutls_v.i1: +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 0 +; X64-NEXT: .quad __emutls_t.i1 + +; X64 .section .rodata, +; X64-LABEL: __emutls_t.i1: +; X64-NEXT: .long 15 + +; X64-NOT: __emutls_v.i2 + +; X64 .section .data.rel.local, +; X64-LABEL: __emutls_v.i3: +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 0 +; X64-NEXT: .quad __emutls_t.i3 + +; X64 .section .rodata, +; X64-LABEL: __emutls_t.i3: +; X64-NEXT: .long 15 + +; X64 .section .data.rel.local, +; X64-LABEL: __emutls_v.i4: +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 0 +; X64-NEXT: .quad __emutls_t.i4 + +; X64 .section .rodata, +; X64-LABEL: __emutls_t.i4: +; X64-NEXT: .long 15 + +; X64-NOT: __emutls_v.i5: +; X64 .hidden __emutls_v.i5 +; X64-NOT: __emutls_v.i5: + +; X64 .section .data.rel.local, +; X64-LABEL: __emutls_v.s1: +; X64-NEXT: .quad 2 +; X64-NEXT: .quad 2 +; X64-NEXT: .quad 0 +; X64-NEXT: .quad __emutls_t.s1 + +; X64 .section .rodata, +; X64-LABEL: __emutls_t.s1: +; X64-NEXT: .short 15 + +; X64 .section .data.rel.local, +; X64-LABEL: __emutls_v.b1: +; X64-NEXT: .quad 1 +; X64-NEXT: .quad 1 +; X64-NEXT: .quad 0 +; X64-NEXT: .quad 0 + +; X64-NOT: __emutls_t.b1 Index: llvm/trunk/test/CodeGen/X86/fast-isel-emutls.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fast-isel-emutls.ll +++ llvm/trunk/test/CodeGen/X86/fast-isel-emutls.ll @@ -0,0 +1,48 @@ +; RUN: llc < %s -emulated-tls -march=x86 -relocation-model=pic -mtriple=i686-unknown-linux-gnu -fast-isel | FileCheck %s +; PR3654 + +@v = thread_local global i32 0 +define i32 @f() nounwind { +entry: + %t = load i32, i32* @v + %s = add i32 %t, 1 + ret i32 %s +} + +; CHECK-LABEL: f: +; CHECK: movl __emutls_v.v@GOT(%ebx), %eax +; CHECK-NEXT: movl %eax, (%esp) +; CHECK-NEXT: calll __emutls_get_address@PLT +; CHECK-NEXT: movl (%eax), %eax + +@alias = internal alias i32* @v +define i32 @f_alias() nounwind { +entry: + %t = load i32, i32* @v + %s = add i32 %t, 1 + ret i32 %s +} + +; CHECK-LABEL: f_alias: +; CHECK: movl __emutls_v.v@GOT(%ebx), %eax +; CHECK-NEXT: movl %eax, (%esp) +; CHECK-NEXT: calll __emutls_get_address@PLT +; CHECK-NEXT: movl (%eax), %eax + +; Use my_emutls_get_address like __emutls_get_address. +@my_emutls_v_xyz = external global i8*, align 4 +declare i8* @my_emutls_get_address(i8*) + +define i32 @my_get_xyz() { +entry: + %call = call i8* @my_emutls_get_address(i8* bitcast (i8** @my_emutls_v_xyz to i8*)) + %0 = bitcast i8* %call to i32* + %1 = load i32, i32* %0, align 4 + ret i32 %1 +} + +; CHECK-LABEL: my_get_xyz: +; CHECK: movl my_emutls_v_xyz@GOT(%ebx), %eax +; CHECK-NEXT: movl %eax, (%esp) +; CHECK-NEXT: calll my_emutls_get_address@PLT +; CHECK-NEXT: movl (%eax), %eax Index: llvm/trunk/test/CodeGen/X86/tls-android-negative.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/tls-android-negative.ll +++ llvm/trunk/test/CodeGen/X86/tls-android-negative.ll @@ -0,0 +1,65 @@ +; RUN: llc < %s -emulated-tls -march=x86 -mtriple=x86_64-linux-android -relocation-model=pic | FileCheck %s +; RUN: llc < %s -emulated-tls -march=x86-64 -mtriple=x86_64-linux-android -relocation-model=pic | FileCheck %s + +; Make sure that some symboles are not emitted in emulated TLS model. + +@external_x = external thread_local global i32 +@external_y = thread_local global i32 7 +@internal_y = internal thread_local global i32 9 +@internal_y0 = internal thread_local global i32 0 + +define i32* @get_external_x() { +entry: + ret i32* @external_x +} + +define i32* @get_external_y() { +entry: + ret i32* @external_y +} + +define i32* @get_internal_y() { +entry: + ret i32* @internal_y +} + +define i32* @get_internal_y0() { +entry: + ret i32* @internal_y0 +} + +; no direct access to emulated TLS variables. +; no definition of emulated TLS variables. +; no initializer for external TLS variables, __emutls_t.external_x +; no initializer for 0-initialized TLS variables, __emutls_t.internal_y0 +; not global linkage for __emutls_t.external_y + +; CHECK-NOT: external_x@TLS +; CHECK-NOT: external_y@TLS +; CHECK-NOT: internal_y@TLS +; CHECK-NOT: .size external_x +; CHECK-NOT: .size external_y +; CHECK-NOT: .size internal_y +; CHECK-NOT: .size internal_y0 +; CHECK-NOT: __emutls_v.external_x: +; CHECK-NOT: __emutls_t.external_x: +; CHECK-NOT: __emutls_t.internal_y0: +; CHECK-NOT: global __emutls_t.external_y +; CHECK-NOT: global __emutls_v.internal_y +; CHECK-NOT: global __emutls_v.internal_y0 + +; CHECK: __emutls_t.external_y + +; CHECK-NOT: external_x@TLS +; CHECK-NOT: external_y@TLS +; CHECK-NOT: internal_y@TLS +; CHECK-NOT: .size external_x +; CHECK-NOT: .size external_y +; CHECK-NOT: .size internal_y +; CHECK-NOT: .size internal_y0 +; CHECK-NOT: __emutls_v.external_x: +; CHECK-NOT: __emutls_t.external_x: +; CHECK-NOT: __emutls_t.internal_y0: +; CHECK-NOT: global __emutls_t.external_y +; CHECK-NOT: global __emutls_v.internal_y +; CHECK-NOT: global __emutls_v.internal_y0 Index: llvm/trunk/test/CodeGen/X86/tls-android.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/tls-android.ll +++ llvm/trunk/test/CodeGen/X86/tls-android.ll @@ -0,0 +1,89 @@ +; RUN: llc < %s -emulated-tls -march=x86 -mtriple=x86_64-linux-android -relocation-model=pic | FileCheck %s +; RUN: llc < %s -emulated-tls -march=x86-64 -mtriple=x86_64-linux-android -relocation-model=pic | FileCheck -check-prefix=X64 %s + +; Make sure that TLS symboles are emitted in expected order. + +@external_x = external thread_local global i32 +@external_y = thread_local global i32 7 +@internal_y = internal thread_local global i32 9 + +define i32* @get_external_x() { +entry: + ret i32* @external_x +} + +define i32* @get_external_y() { +entry: + ret i32* @external_y +} + +define i32* @get_internal_y() { +entry: + ret i32* @internal_y +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 32-bit mode +; CHECK-LABEL: get_external_x: +; CHECK: __emutls_v.external_x +; CHECK: __emutls_get_address + +; CHECK-LABEL: get_external_y: +; CHECK: __emutls_v.external_y +; CHECK: __emutls_get_address + +; CHECK-LABEL: get_internal_y: +; CHECK: __emutls_v.internal_y +; CHECK: __emutls_get_address + +; CHECK-NOT: __emutls_v.external_x: + +; CHECK: .align 4 +; CHECK-LABEL: __emutls_v.external_y: +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long __emutls_t.external_y +; CHECK-LABEL: __emutls_t.external_y: +; CHECK-NEXT: .long 7 + +; CHECK: .align 4 +; CHECK-LABEL: __emutls_v.internal_y: +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long __emutls_t.internal_y +; CHECK-LABEL: __emutls_t.internal_y: +; CHECK-NEXT: .long 9 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 64-bit mode +; X64-LABEL: get_external_x: +; X64: __emutls_v.external_x +; X64: __emutls_get_address + +; X64-LABEL: get_external_y: +; X64: __emutls_v.external_y +; X64: __emutls_get_address + +; X64-LABEL: get_internal_y: +; X64: __emutls_v.internal_y +; X64: __emutls_get_address + +; X64-NOT: __emutls_v.external_x: + +; X64: .align 8 +; X64-LABEL: __emutls_v.external_y: +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 0 +; X64-NEXT: .quad __emutls_t.external_y +; X64-LABEL: __emutls_t.external_y: +; X64-NEXT: .long 7 + +; X64: .align 8 +; X64-LABEL: __emutls_v.internal_y: +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 4 +; X64-NEXT: .quad 0 +; X64-NEXT: .quad __emutls_t.internal_y +; X64-LABEL: __emutls_t.internal_y: +; X64-NEXT: .long 9 Index: llvm/trunk/test/CodeGen/X86/tls-models.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/tls-models.ll +++ llvm/trunk/test/CodeGen/X86/tls-models.ll @@ -18,6 +18,8 @@ @external_le = external thread_local(localexec) global i32 @internal_le = internal thread_local(localexec) global i32 42 +; See test cases for emulated model in emutls.ll, emutls-pic.ll and emutls-pie.ll. + ; ----- no model specified ----- define i32* @f1() { Index: llvm/trunk/test/DebugInfo/ARM/tls.ll =================================================================== --- llvm/trunk/test/DebugInfo/ARM/tls.ll +++ llvm/trunk/test/DebugInfo/ARM/tls.ll @@ -1,5 +1,8 @@ -; RUN: llc -O0 -filetype=asm -mtriple=armv7-linux-gnuehabi < %s | FileCheck %s -; +; RUN: llc -O0 -filetype=asm -mtriple=armv7-linux-gnuehabi < %s \ +; RUN: | FileCheck %s --check-prefix=CHECK +; RUN: llc -O0 -filetype=asm -mtriple=armv7-linux-gnuehabi -emulated-tls < %s \ +; RUN: | FileCheck %s --check-prefix=EMU + ; Generated with clang with source ; __thread int x; @@ -16,6 +19,9 @@ ; The debug relocation of the address of the tls variable ; CHECK: .long x(tlsldo) +; TODO: Add expected output for -emulated-tls tests. +; EMU-NOT: .long x(tlsldo) + !0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2) !1 = !DIFile(filename: "tls.c", directory: "/tmp") !2 = !{} Index: llvm/trunk/test/DebugInfo/X86/tls.ll =================================================================== --- llvm/trunk/test/DebugInfo/X86/tls.ll +++ llvm/trunk/test/DebugInfo/X86/tls.ll @@ -1,20 +1,30 @@ ; RUN: llc %s -o - -filetype=asm -O0 -mtriple=x86_64-unknown-linux-gnu \ -; RUN: | FileCheck --check-prefix=SINGLE --check-prefix=SINGLE-64 --check-prefix=GNUOP %s +; RUN: | FileCheck --check-prefix=NOEMU --check-prefix=SINGLE --check-prefix=SINGLE-64 --check-prefix=GNUOP %s ; RUN: llc %s -o - -filetype=asm -O0 -mtriple=i386-linux-gnu \ -; RUN: | FileCheck --check-prefix=SINGLE --check-prefix=SINGLE-32 --check-prefix=GNUOP %s +; RUN: | FileCheck --check-prefix=NOEMU --check-prefix=SINGLE --check-prefix=SINGLE-32 --check-prefix=GNUOP %s ; RUN: llc %s -o - -filetype=asm -O0 -mtriple=x86_64-unknown-linux-gnu -split-dwarf=Enable \ -; RUN: | FileCheck --check-prefix=FISSION --check-prefix=GNUOP %s +; RUN: | FileCheck --check-prefix=NOEMU --check-prefix=FISSION --check-prefix=GNUOP %s ; RUN: llc %s -o - -filetype=asm -O0 -mtriple=x86_64-scei-ps4 \ -; RUN: | FileCheck --check-prefix=SINGLE --check-prefix=SINGLE-64 --check-prefix=STDOP %s +; RUN: | FileCheck --check-prefix=NOEMU --check-prefix=SINGLE --check-prefix=SINGLE-64 --check-prefix=STDOP %s ; RUN: llc %s -o - -filetype=asm -O0 -mtriple=x86_64-apple-darwin \ -; RUN: | FileCheck --check-prefix=DARWIN --check-prefix=STDOP %s +; RUN: | FileCheck --check-prefix=NOEMU --check-prefix=DARWIN --check-prefix=STDOP %s ; RUN: llc %s -o - -filetype=asm -O0 -mtriple=x86_64-unknown-freebsd \ -; RUN: | FileCheck --check-prefix=SINGLE --check-prefix=SINGLE-64 --check-prefix=STDOP %s +; RUN: | FileCheck --check-prefix=NOEMU --check-prefix=SINGLE --check-prefix=SINGLE-64 --check-prefix=STDOP %s + +; RUN: llc %s -o - -filetype=asm -O0 -mtriple=x86_64-unknown-linux-gnu -emulated-tls \ +; RUN: | FileCheck --check-prefix=SINGLE --check-prefix=EMUSINGLE-64 \ +; RUN: --check-prefix=EMUGNUOP --check-prefix=EMU %s + +; RUN: llc %s -o - -filetype=asm -O0 -mtriple=i386-linux-gnu -emulated-tls \ +; RUN: | FileCheck --check-prefix=SINGLE --check-prefix=EMUSINGLE-32 \ +; RUN: --check-prefix=EMUGNUOP --check-prefix=EMU %s + +; TODO: Add expected output for -emulated-tls tests. ; FIXME: add relocation and DWARF expression support to llvm-dwarfdump & use ; that here instead of raw assembly printing Index: llvm/trunk/test/Transforms/GlobalOpt/tls.ll =================================================================== --- llvm/trunk/test/Transforms/GlobalOpt/tls.ll +++ llvm/trunk/test/Transforms/GlobalOpt/tls.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -globalopt -S | FileCheck %s +; RUN: opt -emulated-tls < %s -globalopt -S | FileCheck %s declare void @wait() declare void @signal()