diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp @@ -69,6 +69,8 @@ return {XCOFF::RelocationType::R_TOCU, SignAndSizeForHalf16}; case MCSymbolRefExpr::VK_PPC_L: return {XCOFF::RelocationType::R_TOCL, SignAndSizeForHalf16}; + case MCSymbolRefExpr::VK_PPC_AIX_TLSLE: + return {XCOFF::RelocationType::R_TLS_LE, SignAndSizeForHalf16}; } } break; case PPC::fixup_ppc_half16ds: @@ -82,6 +84,8 @@ return {XCOFF::RelocationType::R_TOC, 15}; case MCSymbolRefExpr::VK_PPC_L: return {XCOFF::RelocationType::R_TOCL, 15}; + case MCSymbolRefExpr::VK_PPC_AIX_TLSLE: + return {XCOFF::RelocationType::R_TLS_LE, 15}; } } break; case PPC::fixup_ppc_br24: diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -1534,6 +1534,22 @@ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::EnforceIEIO)); return; } + case PPC::ADDI8: { + // The faster non-TOC-based local-exec sequence is represented by `addi` + // with an immediate operand having the MO_TPREL_FLAG. Such an instruction + // does not otherwise arise. + const MachineOperand &MO = MI->getOperand(2); + if ((MO.getTargetFlags() & PPCII::MO_TPREL_FLAG) != 0) { + assert( + Subtarget->hasAIXSmallLocalExecTLS() && + "addi with thread-pointer only expected with local-exec small TLS"); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this); + TmpInst.setOpcode(PPC::LA8); + EmitToStreamer(*OutStreamer, TmpInst); + return; + } + break; + } } LowerPPCMachineInstrToMCInst(MI, TmpInst, *this); diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -7654,13 +7654,6 @@ // is already in place on the operand, so copying the operand // is sufficient. ReplaceFlags = false; - // For these cases, the immediate may not be divisible by 4, in - // which case the fold is illegal for DS-form instructions. (The - // other cases provide aligned addresses and are always safe.) - if (RequiresMod4Offset && - (!isa(Base.getOperand(1)) || - Base.getConstantOperandVal(1) % 4 != 0)) - continue; break; case PPC::ADDIdtprelL: Flags = PPCII::MO_DTPREL_LO; @@ -7712,6 +7705,18 @@ UpdateHBase = true; } } else { + // Global addresses can be folded, but only if they are sufficiently + // aligned. + if (RequiresMod4Offset) { + if (GlobalAddressSDNode *GA = + dyn_cast(ImmOpnd)) { + const GlobalValue *GV = GA->getGlobal(); + Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout()); + if (Alignment < 4) + continue; + } + } + // If we're directly folding the addend from an addi instruction, then: // 1. In general, the offset on the memory access must be zero. // 2. If the addend is a constant, then it can be combined with a diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -148,6 +148,12 @@ static const char AIXSSPCanaryWordName[] = "__ssp_canary_word"; +// A faster local-exec TLS access sequence (enabled with the +// -maix-small-local-exec-tls option) can be produced for TLS variables; +// consistent with the IBM XL compiler, we apply a max size of slightly under +// 32KB. +constexpr uint64_t AIXSmallTlsPolicySizeLimit = 32751; + // FIXME: Remove this once the bug has been fixed! extern cl::opt ANDIGlueBug; @@ -3355,14 +3361,16 @@ const GlobalValue *GV = GA->getGlobal(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); bool Is64Bit = Subtarget.isPPC64(); + bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS(); TLSModel::Model Model = getTargetMachine().getTLSModel(GV); + bool IsTLSLocalExecModel = Model == TLSModel::LocalExec; - if (Model == TLSModel::LocalExec || Model == TLSModel::InitialExec) { + if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) { SDValue VariableOffsetTGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG); SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA); SDValue TLSReg; - if (Is64Bit) + if (Is64Bit) { // For local-exec and initial-exec on AIX (64-bit), the sequence generated // involves a load of the variable offset (from the TOC), followed by an // add of the loaded variable offset to R13 (the thread pointer). @@ -3370,7 +3378,22 @@ // ld reg1,var[TC](2) // add reg2, reg1, r13 // r13 contains the thread pointer TLSReg = DAG.getRegister(PPC::X13, MVT::i64); - else + + // With the -maix-small-local-exec-tls option, produce a faster access + // sequence for local-exec TLS variables where the offset from the TLS + // base is encoded as an immediate operand. + // + // We only utilize the faster local-exec access sequence when the TLS + // variable has a size within the policy limit. We treat types that are + // not sized or are empty as being over the policy size limit. + if (HasAIXSmallLocalExecTLS && IsTLSLocalExecModel) { + Type *GVType = GV->getValueType(); + if (GVType->isSized() && !GVType->isEmptyTy() && + GV->getParent()->getDataLayout().getTypeAllocSize(GVType) <= + AIXSmallTlsPolicySizeLimit) + return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg); + } + } else { // For local-exec and initial-exec on AIX (32-bit), the sequence generated // involves loading the variable offset from the TOC, generating a call to // .__get_tpointer to get the thread pointer (which will be in R3), and @@ -3379,6 +3402,13 @@ // bla .__get_tpointer // add reg2, reg1, r3 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT); + + // We do not implement the 32-bit version of the faster access sequence + // for local-exec that is controlled by -maix-small-local-exec-tls. + if (HasAIXSmallLocalExecTLS) + report_fatal_error("The small-local-exec TLS access sequence is " + "currently only supported on AIX (64-bit mode)."); + } return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset); } diff --git a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp --- a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -31,22 +31,19 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP) { - const TargetMachine &TM = AP.TM; - Mangler &Mang = TM.getObjFileLowering()->getMangler(); - const DataLayout &DL = AP.getDataLayout(); - MCContext &Ctx = AP.OutContext; - - SmallString<128> Name; - if (!MO.isGlobal()) { - assert(MO.isSymbol() && "Isn't a symbol reference"); - Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL); - } else { + if (MO.isGlobal()) { const GlobalValue *GV = MO.getGlobal(); - TM.getNameWithPrefix(Name, GV, Mang); + return AP.getSymbol(GV); } - MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); + assert(MO.isSymbol() && "Isn't a symbol reference"); + + SmallString<128> Name; + const DataLayout &DL = AP.getDataLayout(); + Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL); + MCContext &Ctx = AP.OutContext; + MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); return Sym; } @@ -80,6 +77,8 @@ break; } + const TargetMachine &TM = Printer.TM; + if (MO.getTargetFlags() == PPCII::MO_PLT) RefKind = MCSymbolRefExpr::VK_PLT; else if (MO.getTargetFlags() == PPCII::MO_PCREL_FLAG) @@ -94,12 +93,21 @@ RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD_PCREL; else if (MO.getTargetFlags() == PPCII::MO_GOT_TPREL_PCREL_FLAG) RefKind = MCSymbolRefExpr::VK_PPC_GOT_TPREL_PCREL; + else if (MO.getTargetFlags() == PPCII::MO_TPREL_FLAG) { + assert(MO.isGlobal() && "Only expecting a global MachineOperand here!"); + TLSModel::Model Model = TM.getTLSModel(MO.getGlobal()); + // For the local-exec TLS model, we may generate the offset from the TLS + // base as an immediate operand (instead of using a TOC entry). + // Set the relocation type in case the result is used for purposes other + // than a TOC reference. In TOC reference cases, this result is discarded. + if (Model == TLSModel::LocalExec) + RefKind = MCSymbolRefExpr::VK_PPC_AIX_TLSLE; + } const MachineInstr *MI = MO.getParent(); const MachineFunction *MF = MI->getMF(); const Module *M = MF->getFunction().getParent(); const PPCSubtarget *Subtarget = &(MF->getSubtarget()); - const TargetMachine &TM = Printer.TM; unsigned MIOpcode = MI->getOpcode(); assert((Subtarget->isUsingPCRelativeCalls() || MIOpcode != PPC::BL8_NOTOC) && diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-char.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-char.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-char.ll @@ -0,0 +1,172 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff -mattr=+aix-small-local-exec-tls < %s \ +; RUN: | FileCheck %s --check-prefix=SMALL-LOCAL-EXEC-SMALLCM64 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large \ +; RUN: -mattr=+aix-small-local-exec-tls < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL-LOCAL-EXEC-LARGECM64 + +@ThreadLocalVarInit = thread_local(localexec) global i8 1, align 1 +@VarInit = local_unnamed_addr global i8 87, align 1 +@IThreadLocalVarInit = internal thread_local(localexec) global i8 1, align 1 +declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1 +@c = thread_local(localexec) global [87 x i8] zeroinitializer, align 1 + +define nonnull ptr @AddrTest1() local_unnamed_addr #0 { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: AddrTest1: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, c[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: addi r3, r3, 1 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: AddrTest1: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, c[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addi r3, r3, 1 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @c) + %arrayidx = getelementptr inbounds [87 x i8], ptr %0, i64 0, i64 1 + ret ptr %arrayidx +} + +define void @storeITLInit(i8 noundef zeroext %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeITLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stb r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeITLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stb r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @IThreadLocalVarInit) + store i8 %x, ptr %0, align 1 + ret void +} + +define void @storeTLInit(i8 noundef zeroext %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeTLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stb r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeTLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stb r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalVarInit) + store i8 %x, ptr %0, align 1 + ret void +} + +define zeroext i8 @loadITLInit() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadITLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lbz r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadITLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lbz r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @IThreadLocalVarInit) + %1 = load i8, ptr %0, align 1 + ret i8 %1 +} + +define zeroext i8 @loadITLInit2() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadITLInit2: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # @VarInit +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lbz r4, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lbz r3, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: clrldi r3, r3, 56 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadITLInit2: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0@u(r2) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lbz r4, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0@l(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lbz r3, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: clrldi r3, r3, 56 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @IThreadLocalVarInit) + %1 = load i8, ptr %0, align 1 + %2 = load i8, ptr @VarInit, align 1 + %add = add i8 %2, %1 + ret i8 %add +} + +define zeroext i8 @loadTLInit() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadTLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lbz r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadTLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lbz r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalVarInit) + %1 = load i8, ptr %0, align 1 + ret i8 %1 +} + +define zeroext i8 @loadTLInit2() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadTLInit2: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # @VarInit +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lbz r4, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lbz r3, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: clrldi r3, r3, 56 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadTLInit2: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0@u(r2) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lbz r4, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0@l(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lbz r3, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: clrldi r3, r3, 56 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalVarInit) + %1 = load i8, ptr %0, align 1 + %2 = load i8, ptr @VarInit, align 1 + %add = add i8 %2, %1 + ret i8 %add +} + +define void @loadStore1(i8 noundef zeroext %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadStore1: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lbz r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: addi r3, r3, 9 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stb r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadStore1: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lbz r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addi r3, r3, 9 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stb r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @IThreadLocalVarInit) + %1 = load i8, ptr %0, align 1 + %add = add i8 %1, 9 + store i8 %add, ptr %0, align 1 + ret void +} diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-double.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-double.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-double.ll @@ -0,0 +1,179 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff -mattr=+aix-small-local-exec-tls < %s \ +; RUN: | FileCheck %s --check-prefix=SMALL-LOCAL-EXEC-SMALLCM64 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large \ +; RUN: -mattr=+aix-small-local-exec-tls < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL-LOCAL-EXEC-LARGECM64 + +@ThreadLocalVarInit = thread_local(localexec) global double 1.000000e+00, align 8 +@VarInit = local_unnamed_addr global double 8.700000e+01, align 8 +@IThreadLocalVarInit = internal thread_local(localexec) global double 1.000000e+00, align 8 +declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1 +@f = thread_local(localexec) global [87 x double] zeroinitializer, align 8 + +define nonnull ptr @AddrTest1() local_unnamed_addr #0 { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: AddrTest1: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, f[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: addi r3, r3, 48 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: AddrTest1: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, f[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addi r3, r3, 48 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @f) + %arrayidx = getelementptr inbounds [87 x double], ptr %0, i64 0, i64 6 + ret ptr %arrayidx +} + +define void @storeITLInit(double noundef %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeITLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stfd f1, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeITLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stfd f1, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) + store double %x, ptr %0, align 8 + ret void +} + +define void @storeTLInit(double noundef %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeTLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stfd f1, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeTLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stfd f1, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) + store double %x, ptr %0, align 8 + ret void +} + +define double @loadITLInit() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadITLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfd f1, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadITLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfd f1, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) + %1 = load double, ptr %0, align 8 + ret double %1 +} + +define double @loadITLInit2() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadITLInit2: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # @VarInit +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfd f0, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfd f1, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: xsadddp f1, f0, f1 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadITLInit2: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0@u(r2) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfd f0, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0@l(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfd f1, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: xsadddp f1, f0, f1 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) + %1 = load double, ptr %0, align 8 + %2 = load double, ptr @VarInit, align 8 + %add = fadd double %1, %2 + ret double %add +} + +define double @loadTLInit() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadTLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfd f1, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadTLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfd f1, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) + %1 = load double, ptr %0, align 8 + ret double %1 +} + +define double @loadTLInit2() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadTLInit2: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # @VarInit +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfd f0, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfd f1, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: xsadddp f1, f0, f1 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadTLInit2: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0@u(r2) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfd f0, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0@l(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfd f1, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: xsadddp f1, f0, f1 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) + %1 = load double, ptr %0, align 8 + %2 = load double, ptr @VarInit, align 8 + %add = fadd double %1, %2 + ret double %add +} + +define void @loadStore1(double noundef %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadStore1: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: vspltisw v2, 1 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfd f1, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: xvcvsxwdp vs0, vs34 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: vspltisw v2, 8 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: xsadddp f0, f1, f0 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: xvcvsxwdp vs1, vs34 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: xsadddp f0, f0, f1 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stfd f0, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadStore1: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: vspltisw v2, 1 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfd f1, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: xvcvsxwdp vs0, vs34 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: vspltisw v2, 8 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: xsadddp f0, f1, f0 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: xvcvsxwdp vs1, vs34 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: xsadddp f0, f0, f1 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stfd f0, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) + %1 = load double, ptr %0, align 8 + %inc = fadd double %1, 1.000000e+00 + %add = fadd double %inc, 8.000000e+00 + store double %add, ptr %0, align 8 + ret void +} diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-float.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-float.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-float.ll @@ -0,0 +1,179 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff -mattr=+aix-small-local-exec-tls < %s \ +; RUN: | FileCheck %s --check-prefix=SMALL-LOCAL-EXEC-SMALLCM64 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large \ +; RUN: -mattr=+aix-small-local-exec-tls < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL-LOCAL-EXEC-LARGECM64 + +@ThreadLocalVarInit = thread_local(localexec) global float 1.000000e+00, align 4 +@VarInit = local_unnamed_addr global float 8.700000e+01, align 4 +@IThreadLocalVarInit = internal thread_local(localexec) global float 1.000000e+00, align 4 +declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1 +@e = thread_local(localexec) global [87 x float] zeroinitializer, align 4 + +define nonnull ptr @AddrTest1() local_unnamed_addr #0 { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: AddrTest1: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, e[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: addi r3, r3, 16 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: AddrTest1: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, e[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addi r3, r3, 16 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @e) + %arrayidx = getelementptr inbounds [87 x float], ptr %0, i64 0, i64 4 + ret ptr %arrayidx +} + +define void @storeITLInit(float noundef %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeITLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stfs f1, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeITLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stfs f1, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) + store float %x, ptr %0, align 4 + ret void +} + +define void @storeTLInit(float noundef %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeTLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stfs f1, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeTLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stfs f1, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) + store float %x, ptr %0, align 4 + ret void +} + +define float @loadITLInit() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadITLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfs f1, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadITLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfs f1, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) + %1 = load float, ptr %0, align 4 + ret float %1 +} + +define float @loadITLInit2() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadITLInit2: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # @VarInit +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfs f0, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfs f1, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: fadds f1, f0, f1 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadITLInit2: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0@u(r2) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfs f0, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0@l(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfs f1, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: fadds f1, f0, f1 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) + %1 = load float, ptr %0, align 4 + %2 = load float, ptr @VarInit, align 4 + %add = fadd float %1, %2 + ret float %add +} + +define float @loadTLInit() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadTLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfs f1, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadTLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfs f1, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) + %1 = load float, ptr %0, align 4 + ret float %1 +} + +define float @loadTLInit2() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadTLInit2: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # @VarInit +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfs f0, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfs f1, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: fadds f1, f0, f1 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadTLInit2: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0@u(r2) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfs f0, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0@l(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfs f1, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: fadds f1, f0, f1 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) + %1 = load float, ptr %0, align 4 + %2 = load float, ptr @VarInit, align 4 + %add = fadd float %1, %2 + ret float %add +} + +define void @loadStore1(float noundef %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadStore1: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: vspltisw v2, 1 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfs f1, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: xvcvsxwdp vs0, vs34 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: vspltisw v2, 8 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: xvcvsxwdp vs2, vs34 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: fadds f0, f1, f0 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: fadds f0, f0, f2 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stfs f0, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadStore1: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: vspltisw v2, 1 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfs f1, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: xvcvsxwdp vs0, vs34 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: vspltisw v2, 8 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: xvcvsxwdp vs2, vs34 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: fadds f0, f1, f0 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: fadds f0, f0, f2 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stfs f0, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) + %1 = load float, ptr %0, align 4 + %inc = fadd float %1, 1.000000e+00 + %add = fadd float %inc, 8.000000e+00 + store float %add, ptr %0, align 4 + ret void +} diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-int.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-int.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-int.ll @@ -0,0 +1,194 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff -mattr=+aix-small-local-exec-tls < %s \ +; RUN: | FileCheck %s --check-prefix=SMALL-LOCAL-EXEC-SMALLCM64 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large \ +; RUN: -mattr=+aix-small-local-exec-tls < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL-LOCAL-EXEC-LARGECM64 + +@ThreadLocalVarInit = thread_local(localexec) global i32 1, align 4 +@VarInit = local_unnamed_addr global i32 87, align 4 +@IThreadLocalVarInit = internal thread_local(localexec) global i32 1, align 4 +declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1 +%struct.anon = type { i32 } +@ThreadLocalStruct = thread_local(localexec) global %struct.anon zeroinitializer, align 1 +@a = thread_local(localexec) global [87 x i32] zeroinitializer, align 4 + +define nonnull ptr @AddrTest1() local_unnamed_addr #0 { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: AddrTest1: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, a[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: addi r3, r3, 12 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: AddrTest1: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, a[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addi r3, r3, 12 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @a) + %arrayidx = getelementptr inbounds [87 x i32], ptr %0, i64 0, i64 3 + ret ptr %arrayidx +} + +define signext i32 @testUnaligned() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: testUnaligned: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, ThreadLocalStruct[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lwa r3, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: testUnaligned: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, ThreadLocalStruct[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lwa r3, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalStruct) + %x = getelementptr inbounds %struct.anon, ptr %0, i32 0, i32 0 + %1 = load i32, ptr %x, align 1 + ret i32 %1 +} + +define void @storeITLInit(i32 noundef signext %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeITLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeITLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) + store i32 %x, ptr %0, align 4 + ret void +} + +define void @storeTLInit(i32 noundef signext %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeTLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeTLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) + store i32 %x, ptr %0, align 4 + ret void +} + +define signext i32 @loadITLInit() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadITLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lwa r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadITLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lwa r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) + %1 = load i32, ptr %0, align 4 + ret i32 %1 +} + +define signext i32 @loadITLInit2() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadITLInit2: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # @VarInit +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lwz r4, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lwz r3, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: extsw r3, r3 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadITLInit2: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0@u(r2) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lwz r4, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0@l(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lwz r3, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: extsw r3, r3 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) + %1 = load i32, ptr %0, align 4 + %2 = load i32, ptr @VarInit, align 4 + %add = add nsw i32 %2, %1 + ret i32 %add +} + +define signext i32 @loadTLInit() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadTLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lwa r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadTLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lwa r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) + %1 = load i32, ptr %0, align 4 + ret i32 %1 +} + +define signext i32 @loadTLInit2() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadTLInit2: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # @VarInit +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lwz r4, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lwz r3, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: extsw r3, r3 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadTLInit2: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0@u(r2) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lwz r4, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0@l(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lwz r3, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: extsw r3, r3 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) + %1 = load i32, ptr %0, align 4 + %2 = load i32, ptr @VarInit, align 4 + %add = add nsw i32 %2, %1 + ret i32 %add +} + +define void @loadStore1(i32 noundef signext %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadStore1: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lwz r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: addi r3, r3, 9 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadStore1: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lwz r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addi r3, r3, 9 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) + %1 = load i32, ptr %0, align 4 + %add = add nsw i32 %1, 9 + store i32 %add, ptr %0, align 4 + ret void +} + diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-largeaccess.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-largeaccess.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-largeaccess.ll @@ -0,0 +1,249 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff -mattr=+aix-small-local-exec-tls < %s \ +; RUN: | FileCheck %s --check-prefix=SMALL-LOCAL-EXEC-SMALLCM64 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large \ +; RUN: -mattr=+aix-small-local-exec-tls < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL-LOCAL-EXEC-LARGECM64 + +; Test disassembly of object. +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+aix-small-local-exec-tls \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff -xcoff-traceback-table=false \ +; RUN: --code-model=large -filetype=obj -o %t.o < %s +; RUN: llvm-objdump -D -r --symbol-description %t.o | FileCheck --check-prefix=DIS %s + +@mySmallLocalExecTLSv1 = thread_local(localexec) global [8187 x i32] zeroinitializer, align 4 +@mySmallLocalExecTLS2 = thread_local(localexec) global [4000 x i32] zeroinitializer, align 4 +@mySmallLocalExecTLS3 = thread_local(localexec) global [4000 x i32] zeroinitializer, align 4 +@mySmallLocalExecTLS4 = thread_local(localexec) global [4000 x i32] zeroinitializer, align 4 +@mySmallLocalExecTLS5 = thread_local(localexec) global [4000 x i32] zeroinitializer, align 4 +@mySmallLocalExecTLSv2 = thread_local(localexec) global [9000 x i32] zeroinitializer, align 4 +declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1 + +; All accesses use a "faster" local-exec sequence directly off the thread pointer. +define signext i32 @StoreArrays1() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: StoreArrays1: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, mySmallLocalExecTLSv1[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r4, 1 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r5, 4 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r6, mySmallLocalExecTLS2[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r7, 2 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r4, mySmallLocalExecTLSv1[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r4, mySmallLocalExecTLS3[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r5, 24(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 3 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r7, 320(r6) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, 324(r4) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r4, mySmallLocalExecTLS4[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r6, mySmallLocalExecTLS5[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r7, 88 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 102 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r5, 328(r4) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r7, 332(r6) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: StoreArrays1: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, mySmallLocalExecTLSv1[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r4, 1 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r5, 4 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r6, mySmallLocalExecTLS2[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r7, 2 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r4, mySmallLocalExecTLSv1[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r4, mySmallLocalExecTLS3[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r5, 24(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 3 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r7, 320(r6) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, 324(r4) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r4, mySmallLocalExecTLS4[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r6, mySmallLocalExecTLS5[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r7, 88 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 102 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r5, 328(r4) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r7, 332(r6) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalExecTLSv1) + store i32 1, ptr %0, align 4 + %arrayidx1 = getelementptr inbounds [8187 x i32], ptr %0, i64 0, i64 6 + store i32 4, ptr %arrayidx1, align 4 + %1 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalExecTLS2) + %arrayidx2 = getelementptr inbounds [4000 x i32], ptr %1, i64 0, i64 80 + store i32 2, ptr %arrayidx2, align 4 + %2 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalExecTLS3) + %arrayidx3 = getelementptr inbounds [4000 x i32], ptr %2, i64 0, i64 81 + store i32 3, ptr %arrayidx3, align 4 + %3 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalExecTLS4) + %arrayidx4 = getelementptr inbounds [4000 x i32], ptr %3, i64 0, i64 82 + store i32 4, ptr %arrayidx4, align 4 + %4 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalExecTLS5) + %arrayidx5 = getelementptr inbounds [4000 x i32], ptr %4, i64 0, i64 83 + store i32 88, ptr %arrayidx5, align 4 + %5 = load i32, ptr %0, align 4 + %6 = load i32, ptr %arrayidx1, align 4 + %7 = load i32, ptr %arrayidx2, align 4 + %8 = load i32, ptr %arrayidx3, align 4 + %9 = load i32, ptr %arrayidx4, align 4 + %add = add i32 %5, 88 + %add9 = add i32 %add, %6 + %add11 = add i32 %add9, %7 + %add13 = add i32 %add11, %8 + %add15 = add i32 %add13, %9 + ret i32 %add15 +} + +; Example of one access using the regular local-exec access from the TOC. +define signext i32 @StoreArrays2() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: StoreArrays2: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @mySmallLocalExecTLSv2 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r4, 1 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r5, 4 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r6, mySmallLocalExecTLS2[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r7, 2 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: add r3, r13, r3 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r4, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r4, mySmallLocalExecTLS3[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r5, 24(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 3 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r7, 320(r6) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, 324(r4) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r4, mySmallLocalExecTLS4[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r6, mySmallLocalExecTLS5[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r7, 88 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 102 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r5, 328(r4) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r7, 332(r6) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: StoreArrays2: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0@u(r2) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r4, 1 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r5, 4 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r6, mySmallLocalExecTLS2[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r7, 2 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0@l(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: add r3, r13, r3 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r4, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r4, mySmallLocalExecTLS3[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r5, 24(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 3 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r7, 320(r6) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, 324(r4) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r4, mySmallLocalExecTLS4[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r6, mySmallLocalExecTLS5[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r7, 88 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 102 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r5, 328(r4) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r7, 332(r6) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalExecTLSv2) + store i32 1, ptr %0, align 4 + %arrayidx1 = getelementptr inbounds [9000 x i32], ptr %0, i64 0, i64 6 + store i32 4, ptr %arrayidx1, align 4 + %1 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalExecTLS2) + %arrayidx2 = getelementptr inbounds [4000 x i32], ptr %1, i64 0, i64 80 + store i32 2, ptr %arrayidx2, align 4 + %2 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalExecTLS3) + %arrayidx3 = getelementptr inbounds [4000 x i32], ptr %2, i64 0, i64 81 + store i32 3, ptr %arrayidx3, align 4 + %3 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalExecTLS4) + %arrayidx4 = getelementptr inbounds [4000 x i32], ptr %3, i64 0, i64 82 + store i32 4, ptr %arrayidx4, align 4 + %4 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalExecTLS5) + %arrayidx5 = getelementptr inbounds [4000 x i32], ptr %4, i64 0, i64 83 + store i32 88, ptr %arrayidx5, align 4 + %5 = load i32, ptr %0, align 4 + %6 = load i32, ptr %arrayidx1, align 4 + %7 = load i32, ptr %arrayidx2, align 4 + %8 = load i32, ptr %arrayidx3, align 4 + %9 = load i32, ptr %arrayidx4, align 4 + %add = add i32 %5, 88 + %add9 = add i32 %add, %6 + %add11 = add i32 %add9, %7 + %add13 = add i32 %add11, %8 + %add15 = add i32 %add13, %9 + ret i32 %add15 +} + +; DIS: {{.*}}aix-small-local-exec-tls-largeaccess.ll.tmp.o: file format aix5coff64-rs6000 +; DIS: Disassembly of section .text: +; DIS: 0000000000000000 (idx: 3) .StoreArrays1: +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addi 3, 13, 0 +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 15) mySmallLocalExecTLSv1[TL] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 4, 1 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 5, 4 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addi 6, 13, 32748 +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 17) mySmallLocalExecTLS2[TL] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 7, 2 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 4, 0(13) +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 15) mySmallLocalExecTLSv1[TL] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addi 4, 13, -16788 +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 19) mySmallLocalExecTLS3[TL] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 5, 24(3) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 3, 3 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 7, 320(6) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 3, 324(4) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addi 4, 13, -788 +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 21) mySmallLocalExecTLS4[TL] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addi 6, 13, 15212 +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 23) mySmallLocalExecTLS5[TL] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 7, 88 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 3, 102 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 5, 328(4) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 7, 332(6) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} blr + +; DIS: 0000000000000050 (idx: 5) .StoreArrays2: +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addis 3, 2, 0 +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 13) mySmallLocalExecTLSv2[TE] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 4, 1 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 5, 4 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addi 6, 13, 32748 +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 17) mySmallLocalExecTLS2[TL] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 7, 2 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 3, 0(3) +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 13) mySmallLocalExecTLSv2[TE] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 13, 3 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 4, 0(3) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addi 4, 13, -16788 +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 19) mySmallLocalExecTLS3[TL] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 5, 24(3) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 3, 3 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 7, 320(6) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 3, 324(4) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addi 4, 13, -788 +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 21) mySmallLocalExecTLS4[TL] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addi 6, 13, 15212 +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 23) mySmallLocalExecTLS5[TL] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 7, 88 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 3, 102 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 5, 328(4) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 7, 332(6) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} blr + +; DIS: Disassembly of section .data: +; DIS: 00000000000000a0 (idx: 7) StoreArrays1[DS]: +; DIS-NEXT: a0: 00 00 00 00 +; DIS-NEXT: 00000000000000a0: R_POS (idx: 3) .StoreArrays1 +; DIS-NEXT: a4: 00 00 00 00 +; DIS-NEXT: a8: 00 00 00 00 +; DIS-NEXT: 00000000000000a8: R_POS (idx: 11) TOC[TC0] +; DIS-NEXT: ac: 00 00 00 d0 + +; DIS: 00000000000000b8 (idx: 9) StoreArrays2[DS]: +; DIS-NEXT: b8: 00 00 00 00 +; DIS-NEXT: 00000000000000b8: R_POS (idx: 5) .StoreArrays2 +; DIS-NEXT: bc: 00 00 00 50 +; DIS-NEXT: c0: 00 00 00 00 +; DIS-NEXT: 00000000000000c0: R_POS (idx: 11) TOC[TC0] +; DIS-NEXT: c4: 00 00 00 d0 + +; DIS: 00000000000000d0 (idx: 13) mySmallLocalExecTLSv2[TE]: +; DIS-NEXT: d0: 00 00 00 00 +; DIS-NEXT: 00000000000000d0: R_TLS_LE (idx: 25) mySmallLocalExecTLSv2[TL] +; DIS-NEXT: d4: 00 01 79 ec diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-longlong.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-longlong.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-longlong.ll @@ -0,0 +1,186 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff -mattr=+aix-small-local-exec-tls < %s \ +; RUN: | FileCheck %s --check-prefix=SMALL-LOCAL-EXEC-SMALLCM64 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large \ +; RUN: -mattr=+aix-small-local-exec-tls < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL-LOCAL-EXEC-LARGECM64 + +@ThreadLocalVarInit = thread_local(localexec) global i64 1, align 8 +@VarInit = local_unnamed_addr global i64 87, align 8 +@IThreadLocalVarInit = internal thread_local(localexec) global i64 1, align 8 +declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1 +%struct.anon = type { i64 } +@ThreadLocalStruct = thread_local(localexec) global %struct.anon zeroinitializer, align 1 +@d = thread_local(localexec) global [87 x i64] zeroinitializer, align 8 + +define nonnull ptr @AddrTest1() local_unnamed_addr #0 { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: AddrTest1: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, d[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: AddrTest1: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, d[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @d) + ret ptr %0 +} + +define i64 @testUnaligned() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: testUnaligned: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, ThreadLocalStruct[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: testUnaligned: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, ThreadLocalStruct[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalStruct) + %x = getelementptr inbounds %struct.anon, ptr %0, i32 0, i32 0 + %1 = load i64, ptr %x, align 1 + ret i64 %1 +} + +define void @storeITLInit(i64 noundef %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeITLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeITLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) + store i64 %x, ptr %0, align 8 + ret void +} + +define void @storeTLInit(i64 noundef %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeTLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeTLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) + store i64 %x, ptr %0, align 8 + ret void +} + +define i64 @loadITLInit() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadITLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadITLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) + %1 = load i64, ptr %0, align 8 + ret i64 %1 +} + +define i64 @loadITLInit2() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadITLInit2: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # @VarInit +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r4, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadITLInit2: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0@u(r2) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r4, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0@l(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) + %1 = load i64, ptr %0, align 8 + %2 = load i64, ptr @VarInit, align 8 + %add = add nsw i64 %2, %1 + ret i64 %add +} + +define i64 @loadTLInit() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadTLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadTLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) + %1 = load i64, ptr %0, align 8 + ret i64 %1 +} + +define i64 @loadTLInit2() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadTLInit2: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # @VarInit +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r4, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadTLInit2: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0@u(r2) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r4, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0@l(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) + %1 = load i64, ptr %0, align 8 + %2 = load i64, ptr @VarInit, align 8 + %add = add nsw i64 %2, %1 + ret i64 %add +} + +define void @loadStore1(i64 noundef %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadStore1: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: addi r3, r3, 9 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadStore1: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addi r3, r3, 9 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) + %1 = load i64, ptr %0, align 8 + %add = add nsw i64 %1, 9 + store i64 %add, ptr %0, align 8 + ret void +} diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-short.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-short.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-short.ll @@ -0,0 +1,173 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff -mattr=+aix-small-local-exec-tls < %s \ +; RUN: | FileCheck %s --check-prefix=SMALL-LOCAL-EXEC-SMALLCM64 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large \ +; RUN: -mattr=+aix-small-local-exec-tls < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL-LOCAL-EXEC-LARGECM64 + +@ThreadLocalVarInit = thread_local(localexec) global i16 1, align 2 +@VarInit = local_unnamed_addr global i16 87, align 2 +@IThreadLocalVarInit = internal thread_local(localexec) global i16 1, align 2 +declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1 +@b = thread_local(localexec) global [87 x i16] zeroinitializer, align 2 + +define nonnull ptr @AddrTest1() local_unnamed_addr #0 { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: AddrTest1: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, b[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: addi r3, r3, 4 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: AddrTest1: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, b[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addi r3, r3, 4 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @b) + %arrayidx = getelementptr inbounds [87 x i16], ptr %0, i64 0, i64 2 + ret ptr %arrayidx +} + +define void @storeITLInit(i16 noundef signext %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeITLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: sth r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeITLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: sth r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @IThreadLocalVarInit) + store i16 %x, ptr %0, align 2 + ret void +} + +define void @storeTLInit(i16 noundef signext %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeTLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: sth r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeTLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: sth r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @ThreadLocalVarInit) + store i16 %x, ptr %0, align 2 + ret void +} + +define signext i16 @loadITLInit() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadITLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lha r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadITLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lha r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @IThreadLocalVarInit) + %1 = load i16, ptr %0, align 2 + ret i16 %1 +} + +define signext i16 @loadITLInit2() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadITLInit2: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # @VarInit +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lhz r4, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lhz r3, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: extsh r3, r3 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadITLInit2: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0@u(r2) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lhz r4, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0@l(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lhz r3, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: extsh r3, r3 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @IThreadLocalVarInit) + %1 = load i16, ptr %0, align 2 + %2 = load i16, ptr @VarInit, align 2 + %add = add i16 %2, %1 + ret i16 %add +} + +define signext i16 @loadTLInit() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadTLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lha r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadTLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lha r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @ThreadLocalVarInit) + %1 = load i16, ptr %0, align 2 + ret i16 %1 +} + +define signext i16 @loadTLInit2() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadTLInit2: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # @VarInit +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lhz r4, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lhz r3, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: extsh r3, r3 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadTLInit2: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0@u(r2) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lhz r4, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0@l(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lhz r3, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: extsh r3, r3 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @ThreadLocalVarInit) + %1 = load i16, ptr %0, align 2 + %2 = load i16, ptr @VarInit, align 2 + %add = add i16 %2, %1 + ret i16 %add +} + +define void @loadStore1(i16 noundef signext %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadStore1: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lhz r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: addi r3, r3, 9 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: sth r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadStore1: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lhz r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addi r3, r3, 9 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: sth r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @IThreadLocalVarInit) + %1 = load i16, ptr %0, align 2 + %add = add i16 %1, 9 + store i16 %add, ptr %0, align 2 + ret void +} + diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-O0.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-O0.ll --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-O0.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-O0.ll @@ -11,6 +11,13 @@ ; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ ; RUN: -mtriple powerpc-ibm-aix-xcoff --code-model=large < %s \ ; RUN: | FileCheck %s --check-prefix=LARGE32-O0 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff -mattr=+aix-small-local-exec-tls < %s \ +; RUN: | FileCheck %s --check-prefix=SMALL-LOCAL-EXEC-SMALLCM64 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large \ +; RUN: -mattr=+aix-small-local-exec-tls < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL-LOCAL-EXEC-LARGECM64 @TLInt = internal thread_local(localexec) global i32 0, align 4 @TLLongLong = internal thread_local(localexec) global i64 0, align 8 @@ -70,6 +77,20 @@ ; LARGE32-O0-NEXT: lwz r0, 8(r1) ; LARGE32-O0-NEXT: mtlr r0 ; LARGE32-O0-NEXT: blr +; +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeInt: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: # kill: def $r3 killed $r3 killed $x3 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r4, TLInt[UL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, 0(r4) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeInt: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: # kill: def $r3 killed $r3 killed $x3 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r4, TLInt[UL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, 0(r4) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLInt) store i32 %x, ptr %0, align 4 @@ -132,6 +153,18 @@ ; LARGE32-O0-NEXT: lwz r0, 8(r1) ; LARGE32-O0-NEXT: mtlr r0 ; LARGE32-O0-NEXT: blr +; +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeLongLong: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r4, TLLongLong[UL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r3, 0(r4) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeLongLong: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r4, TLLongLong[UL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r3, 0(r4) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @TLLongLong) store i64 %x, ptr %0, align 8 @@ -182,6 +215,18 @@ ; LARGE32-O0-NEXT: lwz r0, 8(r1) ; LARGE32-O0-NEXT: mtlr r0 ; LARGE32-O0-NEXT: blr +; +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeDouble: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, TLDouble[UL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stxsdx f1, 0, r3 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeDouble: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, TLDouble[UL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stxsdx f1, 0, r3 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @TLDouble) store double %x, ptr %0, align 8 @@ -232,6 +277,18 @@ ; LARGE32-O0-NEXT: lwz r0, 8(r1) ; LARGE32-O0-NEXT: mtlr r0 ; LARGE32-O0-NEXT: blr +; +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeFloat: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, TLFloat[UL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stfs f1, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeFloat: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, TLFloat[UL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stfs f1, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLFloat) store float %x, ptr %0, align 4 @@ -282,6 +339,18 @@ ; LARGE32-O0-NEXT: lwz r0, 8(r1) ; LARGE32-O0-NEXT: mtlr r0 ; LARGE32-O0-NEXT: blr +; +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadInt: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, TLInt[UL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lwz r3, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadInt: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, TLInt[UL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lwz r3, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLInt) %1 = load i32, ptr %0, align 4 @@ -336,6 +405,22 @@ ; LARGE32-O0-NEXT: lwz r0, 8(r1) ; LARGE32-O0-NEXT: mtlr r0 ; LARGE32-O0-NEXT: blr +; +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadLongLong: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, TLLongLong[UL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: # kill: def $r3 killed $r3 killed $x3 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: clrldi r3, r3, 32 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadLongLong: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, TLLongLong[UL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: # kill: def $r3 killed $r3 killed $x3 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: clrldi r3, r3, 32 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @TLLongLong) %1 = load i64, ptr %0, align 8 @@ -407,6 +492,30 @@ ; LARGE32-O0-NEXT: lwz r0, 8(r1) ; LARGE32-O0-NEXT: mtlr r0 ; LARGE32-O0-NEXT: blr +; +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadDouble: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, TLDouble[UL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfd f0, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: # kill: def $f1 killed $f0 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: xscvdpsxws f0, f0 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: addi r3, r1, -12 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stfiwx f0, 0, r3 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lwz r3, -12(r1) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: clrldi r3, r3, 32 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadDouble: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, TLDouble[UL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfd f0, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: # kill: def $f1 killed $f0 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: xscvdpsxws f0, f0 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addi r3, r1, -12 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stfiwx f0, 0, r3 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lwz r3, -12(r1) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: clrldi r3, r3, 32 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @TLDouble) %1 = load double, ptr %0, align 8 @@ -474,6 +583,26 @@ ; LARGE32-O0-NEXT: lwz r0, 8(r1) ; LARGE32-O0-NEXT: mtlr r0 ; LARGE32-O0-NEXT: blr +; +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadFloat: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, TLFloat[UL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfs f0, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: fctiwz f0, f0 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stfd f0, -8(r1) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lwa r3, -4(r1) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: clrldi r3, r3, 32 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadFloat: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, TLFloat[UL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfs f0, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: fctiwz f0, f0 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stfd f0, -8(r1) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lwa r3, -4(r1) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: clrldi r3, r3, 32 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLFloat) %1 = load float, ptr %0, align 4 diff --git a/llvm/test/CodeGen/PowerPC/ppc64-nonfunc-calls.ll b/llvm/test/CodeGen/PowerPC/ppc64-nonfunc-calls.ll --- a/llvm/test/CodeGen/PowerPC/ppc64-nonfunc-calls.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-nonfunc-calls.ll @@ -37,7 +37,7 @@ ; CHECK-DAG: addis [[REG1:[0-9]+]], 13, tls_something@tprel@ha ; CHECK-DAG: std 2, 40(1) ; CHECK-DAG: addi [[REG3:[0-9]+]], [[REG1]], tls_something@tprel@l -; CHECK-DAG: ld [[REG2:[0-9]+]], 0([[REG3]]) +; CHECK-DAG: ld [[REG2:[0-9]+]], tls_something@tprel@l([[REG1]]) ; CHECK-DAG: ld 11, 16([[REG3]]) ; CHECK-DAG: ld 2, 8([[REG3]]) ; CHECK-DAG: mtctr [[REG2]]