diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp @@ -69,6 +69,8 @@ return {XCOFF::RelocationType::R_TOCU, SignAndSizeForHalf16}; case MCSymbolRefExpr::VK_PPC_L: return {XCOFF::RelocationType::R_TOCL, SignAndSizeForHalf16}; + case MCSymbolRefExpr::VK_PPC_AIX_TLSLE: + return {XCOFF::RelocationType::R_TLS_LE, SignAndSizeForHalf16}; } } break; case PPC::fixup_ppc_half16ds: @@ -82,6 +84,8 @@ return {XCOFF::RelocationType::R_TOC, 15}; case MCSymbolRefExpr::VK_PPC_L: return {XCOFF::RelocationType::R_TOCL, 15}; + case MCSymbolRefExpr::VK_PPC_AIX_TLSLE: + return {XCOFF::RelocationType::R_TLS_LE, 15}; } } break; case PPC::fixup_ppc_br24: diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -198,6 +198,7 @@ void LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI); void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK); void EmitAIXTlsCallHelper(const MachineInstr *MI); + bool emitAIXSmallLocalExecTLSAccess(const MachineInstr *MI, MCInst &TmpInst); bool runOnMachineFunction(MachineFunction &MF) override { Subtarget = &MF.getSubtarget(); bool Changed = AsmPrinter::runOnMachineFunction(MF); @@ -1521,6 +1522,9 @@ if (MO.getGlobal()->getPointerAlignment(DL) < 4) llvm_unreachable("Global must be word-aligned for LD, STD, LWA!"); } + // Handle assembly printing for -maix-small-local-exec-tls on AIX (64-bit). + if (emitAIXSmallLocalExecTLSAccess(MI, TmpInst)) + return; // Now process the instruction normally. break; } @@ -1534,12 +1538,62 @@ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::EnforceIEIO)); return; } + case PPC::ADDI8: + case PPC::LBZ: + case PPC::STB: + case PPC::LBZ8: + case PPC::STB8: + case PPC::LHA: + case PPC::LHZ: + case PPC::STH: + case PPC::LWZ: + case PPC::STW: + case PPC::LHA8: + case PPC::LHZ8: + case PPC::STH8: + case PPC::LWZ8: + case PPC::STW8: + case PPC::LFS: + case PPC::STFS: + case PPC::LFD: + case PPC::STFD: { + // Handle assembly printing for -maix-small-local-exec-tls on AIX (64-bit). + if (emitAIXSmallLocalExecTLSAccess(MI, TmpInst)) + return; + break; + } } LowerPPCMachineInstrToMCInst(MI, TmpInst, *this); EmitToStreamer(*OutStreamer, TmpInst); } +bool PPCAsmPrinter::emitAIXSmallLocalExecTLSAccess(const MachineInstr *MI, + MCInst &TmpInst) { + unsigned OrigMIOpc = MI->getOpcode(); + bool IsMIADDI8 = OrigMIOpc == PPC::ADDI8; + unsigned OpNum = IsMIADDI8 ? 2 : 1; + const MachineOperand &MO = MI->getOperand(OpNum); + + // Only handle custom assembly printing when compiling on AIX, + // with -maix-small-local-exec-tls on. + if (!Subtarget->hasAIXSmallLocalExecTLS()) + return false; + + // Handle small-local-exec-tls accesses only for the local-exec TLS model, + // which is represented by the MO_TREL_FLAG target flag. + if (!(MO.getTargetFlags() & PPCII::MO_TPREL_FLAG)) + return false; + + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this); + // Change the opcode to load address if the original opcode is addi. + if (IsMIADDI8) + TmpInst.setOpcode(PPC::LA8); + + EmitToStreamer(*OutStreamer, TmpInst); + return true; +} + void PPCLinuxAsmPrinter::emitGNUAttributes(Module &M) { // Emit float ABI into GNU attribute Metadata *MD = M.getModuleFlag("float-abi"); diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -7530,6 +7530,7 @@ void PPCDAGToDAGISel::PeepholePPC64() { SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); + bool HasAIXSmallLocalExecTLS = Subtarget->hasAIXSmallLocalExecTLS(); while (Position != CurDAG->allnodes_begin()) { SDNode *N = &*--Position; @@ -7619,7 +7620,7 @@ // For these cases, the immediate may not be divisible by 4, in // which case the fold is illegal for DS-form instructions. (The // other cases provide aligned addresses and are always safe.) - if (RequiresMod4Offset && + if (RequiresMod4Offset && !HasAIXSmallLocalExecTLS && (!isa(Base.getOperand(1)) || Base.getConstantOperandVal(1) % 4 != 0)) continue; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -149,6 +149,11 @@ static const char AIXSSPCanaryWordName[] = "__ssp_canary_word"; +// A faster local-exec TLS access sequence (enabled with the +// -maix-small-local-exec-tls option) can be produced for TLS variables with a +// size of 32KB or under. +static uint64_t AIXTLSUpperDisplacement = 32760; + // FIXME: Remove this once the bug has been fixed! extern cl::opt ANDIGlueBug; @@ -3329,6 +3334,7 @@ const GlobalValue *GV = GA->getGlobal(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); bool Is64Bit = Subtarget.isPPC64(); + bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS(); TLSModel::Model Model = getTargetMachine().getTLSModel(GV); if (Model == TLSModel::LocalExec) { @@ -3336,7 +3342,7 @@ DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG); SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA); SDValue TLSReg; - if (Is64Bit) + if (Is64Bit) { // For local-exec on AIX (64-bit), the sequence that is generated involves // a load of the variable offset (from the TOC), followed by an add of the // loaded variable offset to R13 (the thread pointer). @@ -3344,7 +3350,22 @@ // ld reg1,var[TC](2) // add reg2, reg1, r13 // r13 contains the thread pointer TLSReg = DAG.getRegister(PPC::X13, MVT::i64); - else + + // Produce a faster access sequence for local-exec TLS variables where + // the offset from the TLS base is encoded as an immediate operand. + // This is available on 64-bit mode only. + // + // In order to utilize a faster local-exec access sequence (enabled by the + // -maix-small-local-exec-tls option), the size of the TLS variable that + // is being considered for the faster sequence needs to be roughly + // under 32KB (represented by AIXTLSUpperDisplacement). The size needs + // to be slightly less than 32KB due to additional overhead. + uint64_t GVTypeSize = + GV->getParent()->getDataLayout().getTypeAllocSize(GV->getValueType()); + if (HasAIXSmallLocalExecTLS && + (GVTypeSize < (AIXTLSUpperDisplacement))) + return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg); + } else { // For local-exec on AIX (32-bit), the sequence that is generated involves // loading the variable offset from the TOC, generating a call to // .__get_tpointer to get the thread pointer (which will be in R3), and @@ -3353,6 +3374,13 @@ // bla .__get_tpointer // add reg2, reg1, r3 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT); + + // The faster access sequence for local-exec controlled by + // -maix-small-local-exec-tls is not available on 32-bit mode. + if (HasAIXSmallLocalExecTLS) + report_fatal_error("The small-local-exec TLS access sequence is " + "currently on supported on AIX (64-bit mode)."); + } return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset); } diff --git a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp --- a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -47,6 +47,20 @@ return TM.getSymbol(GV); TM.getNameWithPrefix(Name, GV, Mang); + + // For accesses with -maix-small-local-exec-tls on AIX, map the operand to + // its corresponding MOSymbol and ensure the symbol has the form: + // variableName[storageClass]. The offset information from the machine + // operand is also preserved in the symbol. + if (MO.getParent() + ->getMF() + ->getSubtarget() + .hasAIXSmallLocalExecTLS() && + MO.getTargetFlags() & PPCII::MO_TPREL_FLAG) { + MCSymbol *Sym = AP.getSymbol(GV); + Sym->setOffset(MO.getOffset()); + return Sym; + } } MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); @@ -98,6 +112,8 @@ RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD_PCREL; else if (MO.getTargetFlags() == PPCII::MO_GOT_TPREL_PCREL_FLAG) RefKind = MCSymbolRefExpr::VK_PPC_GOT_TPREL_PCREL; + else if (MO.getTargetFlags() == PPCII::MO_TPREL_FLAG) + RefKind = MCSymbolRefExpr::VK_PPC_AIX_TLSLE; const MachineInstr *MI = MO.getParent(); const MachineFunction *MF = MI->getMF(); diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-char.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-char.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-char.ll @@ -0,0 +1,153 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff -mattr=+aix-small-local-exec-tls < %s \ +; RUN: | FileCheck %s --check-prefix=SMALL-LOCAL-EXEC-SMALLCM64 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large \ +; RUN: -mattr=+aix-small-local-exec-tls < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL-LOCAL-EXEC-LARGECM64 + +@ThreadLocalVarInit = thread_local(localexec) global i8 1, align 1 +@VarInit = local_unnamed_addr global i8 87, align 1 +@IThreadLocalVarInit = internal thread_local(localexec) global i8 1, align 1 +declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1 + +define void @storeITLInit(i8 noundef zeroext %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeITLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stb r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeITLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stb r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @IThreadLocalVarInit) + store i8 %x, ptr %0, align 1 + ret void +} + +define void @storeTLInit(i8 noundef zeroext %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeTLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stb r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeTLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stb r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalVarInit) + store i8 %x, ptr %0, align 1 + ret void +} + +define zeroext i8 @loadITLInit() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadITLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lbz r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadITLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lbz r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @IThreadLocalVarInit) + %1 = load i8, ptr %0, align 1 + ret i8 %1 +} + +define zeroext i8 @loadITLInit2() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadITLInit2: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # @VarInit +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lbz r4, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lbz r3, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: clrldi r3, r3, 56 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadITLInit2: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0@u(r2) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lbz r4, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0@l(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lbz r3, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: clrldi r3, r3, 56 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @IThreadLocalVarInit) + %1 = load i8, ptr %0, align 1 + %2 = load i8, ptr @VarInit, align 1 + %add = add i8 %2, %1 + ret i8 %add +} + +define zeroext i8 @loadTLInit() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadTLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lbz r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadTLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lbz r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalVarInit) + %1 = load i8, ptr %0, align 1 + ret i8 %1 +} + +define zeroext i8 @loadTLInit2() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadTLInit2: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # @VarInit +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lbz r4, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lbz r3, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: clrldi r3, r3, 56 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadTLInit2: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0@u(r2) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lbz r4, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0@l(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lbz r3, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: clrldi r3, r3, 56 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalVarInit) + %1 = load i8, ptr %0, align 1 + %2 = load i8, ptr @VarInit, align 1 + %add = add i8 %2, %1 + ret i8 %add +} + +define void @loadStore1(i8 noundef zeroext %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadStore1: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lbz r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: addi r3, r3, 9 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stb r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadStore1: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lbz r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addi r3, r3, 9 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stb r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @IThreadLocalVarInit) + %1 = load i8, ptr %0, align 1 + %add = add i8 %1, 9 + store i8 %add, ptr %0, align 1 + ret void +} diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-double.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-double.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-double.ll @@ -0,0 +1,160 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff -mattr=+aix-small-local-exec-tls < %s \ +; RUN: | FileCheck %s --check-prefix=SMALL-LOCAL-EXEC-SMALLCM64 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large \ +; RUN: -mattr=+aix-small-local-exec-tls < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL-LOCAL-EXEC-LARGECM64 + +@ThreadLocalVarInit = thread_local(localexec) global double 1.000000e+00, align 8 +@VarInit = local_unnamed_addr global double 8.700000e+01, align 8 +@IThreadLocalVarInit = internal thread_local(localexec) global double 1.000000e+00, align 8 +declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1 + +define void @storeITLInit(double noundef %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeITLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stfd f1, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeITLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stfd f1, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) + store double %x, ptr %0, align 8 + ret void +} + +define void @storeTLInit(double noundef %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeTLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stfd f1, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeTLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stfd f1, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) + store double %x, ptr %0, align 8 + ret void +} + +define double @loadITLInit() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadITLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfd f1, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadITLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfd f1, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) + %1 = load double, ptr %0, align 8 + ret double %1 +} + +define double @loadITLInit2() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadITLInit2: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # @VarInit +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfd f0, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfd f1, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: xsadddp f1, f0, f1 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadITLInit2: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0@u(r2) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfd f0, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0@l(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfd f1, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: xsadddp f1, f0, f1 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) + %1 = load double, ptr %0, align 8 + %2 = load double, ptr @VarInit, align 8 + %add = fadd double %1, %2 + ret double %add +} + +define double @loadTLInit() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadTLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfd f1, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadTLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfd f1, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) + %1 = load double, ptr %0, align 8 + ret double %1 +} + +define double @loadTLInit2() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadTLInit2: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # @VarInit +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfd f0, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfd f1, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: xsadddp f1, f0, f1 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadTLInit2: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0@u(r2) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfd f0, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0@l(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfd f1, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: xsadddp f1, f0, f1 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) + %1 = load double, ptr %0, align 8 + %2 = load double, ptr @VarInit, align 8 + %add = fadd double %1, %2 + ret double %add +} + +define void @loadStore1(double noundef %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadStore1: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: vspltisw v2, 1 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfd f1, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: xvcvsxwdp vs0, vs34 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: vspltisw v2, 8 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: xsadddp f0, f1, f0 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: xvcvsxwdp vs1, vs34 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: xsadddp f0, f0, f1 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stfd f0, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadStore1: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: vspltisw v2, 1 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfd f1, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: xvcvsxwdp vs0, vs34 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: vspltisw v2, 8 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: xsadddp f0, f1, f0 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: xvcvsxwdp vs1, vs34 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: xsadddp f0, f0, f1 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stfd f0, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) + %1 = load double, ptr %0, align 8 + %inc = fadd double %1, 1.000000e+00 + %add = fadd double %inc, 8.000000e+00 + store double %add, ptr %0, align 8 + ret void +} diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-float.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-float.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-float.ll @@ -0,0 +1,160 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff -mattr=+aix-small-local-exec-tls < %s \ +; RUN: | FileCheck %s --check-prefix=SMALL-LOCAL-EXEC-SMALLCM64 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large \ +; RUN: -mattr=+aix-small-local-exec-tls < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL-LOCAL-EXEC-LARGECM64 + +@ThreadLocalVarInit = thread_local(localexec) global float 1.000000e+00, align 4 +@VarInit = local_unnamed_addr global float 8.700000e+01, align 4 +@IThreadLocalVarInit = internal thread_local(localexec) global float 1.000000e+00, align 4 +declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1 + +define void @storeITLInit(float noundef %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeITLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stfs f1, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeITLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stfs f1, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) + store float %x, ptr %0, align 4 + ret void +} + +define void @storeTLInit(float noundef %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeTLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stfs f1, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeTLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stfs f1, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) + store float %x, ptr %0, align 4 + ret void +} + +define float @loadITLInit() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadITLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfs f1, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadITLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfs f1, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) + %1 = load float, ptr %0, align 4 + ret float %1 +} + +define float @loadITLInit2() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadITLInit2: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # @VarInit +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfs f0, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfs f1, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: fadds f1, f0, f1 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadITLInit2: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0@u(r2) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfs f0, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0@l(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfs f1, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: fadds f1, f0, f1 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) + %1 = load float, ptr %0, align 4 + %2 = load float, ptr @VarInit, align 4 + %add = fadd float %1, %2 + ret float %add +} + +define float @loadTLInit() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadTLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfs f1, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadTLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfs f1, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) + %1 = load float, ptr %0, align 4 + ret float %1 +} + +define float @loadTLInit2() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadTLInit2: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # @VarInit +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfs f0, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfs f1, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: fadds f1, f0, f1 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadTLInit2: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0@u(r2) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfs f0, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0@l(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfs f1, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: fadds f1, f0, f1 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) + %1 = load float, ptr %0, align 4 + %2 = load float, ptr @VarInit, align 4 + %add = fadd float %1, %2 + ret float %add +} + +define void @loadStore1(float noundef %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadStore1: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: vspltisw v2, 1 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfs f1, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: xvcvsxwdp vs0, vs34 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: vspltisw v2, 8 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: xvcvsxwdp vs2, vs34 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: fadds f0, f1, f0 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: fadds f0, f0, f2 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stfs f0, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadStore1: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: vspltisw v2, 1 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfs f1, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: xvcvsxwdp vs0, vs34 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: vspltisw v2, 8 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: xvcvsxwdp vs2, vs34 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: fadds f0, f1, f0 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: fadds f0, f0, f2 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stfs f0, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) + %1 = load float, ptr %0, align 4 + %inc = fadd float %1, 1.000000e+00 + %add = fadd float %inc, 8.000000e+00 + store float %add, ptr %0, align 4 + ret void +} diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-int.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-int.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-int.ll @@ -0,0 +1,154 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff -mattr=+aix-small-local-exec-tls < %s \ +; RUN: | FileCheck %s --check-prefix=SMALL-LOCAL-EXEC-SMALLCM64 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large \ +; RUN: -mattr=+aix-small-local-exec-tls < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL-LOCAL-EXEC-LARGECM64 + +@ThreadLocalVarInit = thread_local(localexec) global i32 1, align 4 +@VarInit = local_unnamed_addr global i32 87, align 4 +@IThreadLocalVarInit = internal thread_local(localexec) global i32 1, align 4 +declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1 + +define void @storeITLInit(i32 noundef signext %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeITLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeITLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) + store i32 %x, ptr %0, align 4 + ret void +} + +define void @storeTLInit(i32 noundef signext %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeTLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeTLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) + store i32 %x, ptr %0, align 4 + ret void +} + +define signext i32 @loadITLInit() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadITLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lwa r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadITLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lwa r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) + %1 = load i32, ptr %0, align 4 + ret i32 %1 +} + +define signext i32 @loadITLInit2() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadITLInit2: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # @VarInit +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lwz r4, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lwz r3, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: extsw r3, r3 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadITLInit2: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0@u(r2) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lwz r4, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0@l(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lwz r3, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: extsw r3, r3 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) + %1 = load i32, ptr %0, align 4 + %2 = load i32, ptr @VarInit, align 4 + %add = add nsw i32 %2, %1 + ret i32 %add +} + +define signext i32 @loadTLInit() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadTLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lwa r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadTLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lwa r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) + %1 = load i32, ptr %0, align 4 + ret i32 %1 +} + +define signext i32 @loadTLInit2() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadTLInit2: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # @VarInit +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lwz r4, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lwz r3, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: extsw r3, r3 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadTLInit2: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0@u(r2) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lwz r4, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0@l(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lwz r3, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: extsw r3, r3 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) + %1 = load i32, ptr %0, align 4 + %2 = load i32, ptr @VarInit, align 4 + %add = add nsw i32 %2, %1 + ret i32 %add +} + +define void @loadStore1(i32 noundef signext %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadStore1: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lwz r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: addi r3, r3, 9 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadStore1: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lwz r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addi r3, r3, 9 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) + %1 = load i32, ptr %0, align 4 + %add = add nsw i32 %1, 9 + store i32 %add, ptr %0, align 4 + ret void +} + diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-largeaccess.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-largeaccess.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-largeaccess.ll @@ -0,0 +1,249 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff -mattr=+aix-small-local-exec-tls < %s \ +; RUN: | FileCheck %s --check-prefix=SMALL-LOCAL-EXEC-SMALLCM64 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large \ +; RUN: -mattr=+aix-small-local-exec-tls < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL-LOCAL-EXEC-LARGECM64 + +; Test disassembly of object. +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+aix-small-local-exec-tls \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff -xcoff-traceback-table=false \ +; RUN: --code-model=large -filetype=obj -o %t.o < %s +; RUN: llvm-objdump -D -r --symbol-description %t.o | FileCheck --check-prefix=DIS %s + +@mySmallLocalExecTLSv1 = thread_local(localexec) global [8187 x i32] zeroinitializer, align 4 +@mySmallLocalExecTLS2 = thread_local(localexec) global [4000 x i32] zeroinitializer, align 4 +@mySmallLocalExecTLS3 = thread_local(localexec) global [4000 x i32] zeroinitializer, align 4 +@mySmallLocalExecTLS4 = thread_local(localexec) global [4000 x i32] zeroinitializer, align 4 +@mySmallLocalExecTLS5 = thread_local(localexec) global [4000 x i32] zeroinitializer, align 4 +@mySmallLocalExecTLSv2 = thread_local(localexec) global [9000 x i32] zeroinitializer, align 4 +declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1 + +; All accesses use a "faster" local-exec sequence directly off the thread pointer. +define signext i32 @StoreArrays1() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: StoreArrays1: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, mySmallLocalExecTLSv1[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r4, 1 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r5, 4 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r6, mySmallLocalExecTLS2[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r7, 2 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r4, mySmallLocalExecTLSv1[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r4, mySmallLocalExecTLS3[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r5, 24(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 3 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r7, 320(r6) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, 324(r4) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r4, mySmallLocalExecTLS4[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r6, mySmallLocalExecTLS5[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r7, 88 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 102 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r5, 328(r4) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r7, 332(r6) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: StoreArrays1: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, mySmallLocalExecTLSv1[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r4, 1 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r5, 4 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r6, mySmallLocalExecTLS2[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r7, 2 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r4, mySmallLocalExecTLSv1[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r4, mySmallLocalExecTLS3[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r5, 24(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 3 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r7, 320(r6) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, 324(r4) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r4, mySmallLocalExecTLS4[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r6, mySmallLocalExecTLS5[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r7, 88 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 102 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r5, 328(r4) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r7, 332(r6) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalExecTLSv1) + store i32 1, ptr %0, align 4 + %arrayidx1 = getelementptr inbounds [8187 x i32], ptr %0, i64 0, i64 6 + store i32 4, ptr %arrayidx1, align 4 + %1 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalExecTLS2) + %arrayidx2 = getelementptr inbounds [4000 x i32], ptr %1, i64 0, i64 80 + store i32 2, ptr %arrayidx2, align 4 + %2 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalExecTLS3) + %arrayidx3 = getelementptr inbounds [4000 x i32], ptr %2, i64 0, i64 81 + store i32 3, ptr %arrayidx3, align 4 + %3 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalExecTLS4) + %arrayidx4 = getelementptr inbounds [4000 x i32], ptr %3, i64 0, i64 82 + store i32 4, ptr %arrayidx4, align 4 + %4 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalExecTLS5) + %arrayidx5 = getelementptr inbounds [4000 x i32], ptr %4, i64 0, i64 83 + store i32 88, ptr %arrayidx5, align 4 + %5 = load i32, ptr %0, align 4 + %6 = load i32, ptr %arrayidx1, align 4 + %7 = load i32, ptr %arrayidx2, align 4 + %8 = load i32, ptr %arrayidx3, align 4 + %9 = load i32, ptr %arrayidx4, align 4 + %add = add i32 %5, 88 + %add9 = add i32 %add, %6 + %add11 = add i32 %add9, %7 + %add13 = add i32 %add11, %8 + %add15 = add i32 %add13, %9 + ret i32 %add15 +} + +; Example of one access using the regular local-exec access from the TOC. +define signext i32 @StoreArrays2() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: StoreArrays2: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @mySmallLocalExecTLSv2 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r4, 1 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r5, 4 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r6, mySmallLocalExecTLS2[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r7, 2 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: add r3, r13, r3 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r4, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r4, mySmallLocalExecTLS3[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r5, 24(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 3 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r7, 320(r6) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, 324(r4) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r4, mySmallLocalExecTLS4[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r6, mySmallLocalExecTLS5[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r7, 88 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 102 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r5, 328(r4) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r7, 332(r6) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: StoreArrays2: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0@u(r2) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r4, 1 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r5, 4 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r6, mySmallLocalExecTLS2[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r7, 2 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0@l(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: add r3, r13, r3 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r4, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r4, mySmallLocalExecTLS3[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r5, 24(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 3 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r7, 320(r6) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, 324(r4) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r4, mySmallLocalExecTLS4[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r6, mySmallLocalExecTLS5[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r7, 88 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 102 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r5, 328(r4) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r7, 332(r6) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalExecTLSv2) + store i32 1, ptr %0, align 4 + %arrayidx1 = getelementptr inbounds [9000 x i32], ptr %0, i64 0, i64 6 + store i32 4, ptr %arrayidx1, align 4 + %1 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalExecTLS2) + %arrayidx2 = getelementptr inbounds [4000 x i32], ptr %1, i64 0, i64 80 + store i32 2, ptr %arrayidx2, align 4 + %2 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalExecTLS3) + %arrayidx3 = getelementptr inbounds [4000 x i32], ptr %2, i64 0, i64 81 + store i32 3, ptr %arrayidx3, align 4 + %3 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalExecTLS4) + %arrayidx4 = getelementptr inbounds [4000 x i32], ptr %3, i64 0, i64 82 + store i32 4, ptr %arrayidx4, align 4 + %4 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalExecTLS5) + %arrayidx5 = getelementptr inbounds [4000 x i32], ptr %4, i64 0, i64 83 + store i32 88, ptr %arrayidx5, align 4 + %5 = load i32, ptr %0, align 4 + %6 = load i32, ptr %arrayidx1, align 4 + %7 = load i32, ptr %arrayidx2, align 4 + %8 = load i32, ptr %arrayidx3, align 4 + %9 = load i32, ptr %arrayidx4, align 4 + %add = add i32 %5, 88 + %add9 = add i32 %add, %6 + %add11 = add i32 %add9, %7 + %add13 = add i32 %add11, %8 + %add15 = add i32 %add13, %9 + ret i32 %add15 +} + +; DIS: {{.*}}aix-small-local-exec-tls-largeaccess.ll.tmp.o: file format aix5coff64-rs6000 +; DIS: Disassembly of section .text: +; DIS: 0000000000000000 (idx: 3) .StoreArrays1: +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addi 3, 13, 0 +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 15) mySmallLocalExecTLSv1[TL] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 4, 1 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 5, 4 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addi 6, 13, 32748 +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 17) mySmallLocalExecTLS2[TL] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 7, 2 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 4, 0(13) +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 15) mySmallLocalExecTLSv1[TL] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addi 4, 13, -16788 +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 19) mySmallLocalExecTLS3[TL] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 5, 24(3) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 3, 3 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 7, 320(6) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 3, 324(4) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addi 4, 13, -788 +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 21) mySmallLocalExecTLS4[TL] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addi 6, 13, 15212 +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 23) mySmallLocalExecTLS5[TL] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 7, 88 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 3, 102 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 5, 328(4) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 7, 332(6) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} blr + +; DIS: 0000000000000050 (idx: 5) .StoreArrays2: +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addis 3, 2, 0 +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 13) mySmallLocalExecTLSv2[TE] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 4, 1 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 5, 4 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addi 6, 13, 32748 +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 17) mySmallLocalExecTLS2[TL] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 7, 2 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 3, 0(3) +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 13) mySmallLocalExecTLSv2[TE] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 13, 3 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 4, 0(3) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addi 4, 13, -16788 +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 19) mySmallLocalExecTLS3[TL] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 5, 24(3) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 3, 3 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 7, 320(6) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 3, 324(4) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addi 4, 13, -788 +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 21) mySmallLocalExecTLS4[TL] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addi 6, 13, 15212 +; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 23) mySmallLocalExecTLS5[TL] +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 7, 88 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 3, 102 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 5, 328(4) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 7, 332(6) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} blr + +; DIS: Disassembly of section .data: +; DIS: 00000000000000a0 (idx: 7) StoreArrays1[DS]: +; DIS-NEXT: a0: 00 00 00 00 +; DIS-NEXT: 00000000000000a0: R_POS (idx: 3) .StoreArrays1 +; DIS-NEXT: a4: 00 00 00 00 +; DIS-NEXT: a8: 00 00 00 00 +; DIS-NEXT: 00000000000000a8: R_POS (idx: 11) TOC[TC0] +; DIS-NEXT: ac: 00 00 00 d0 + +; DIS: 00000000000000b8 (idx: 9) StoreArrays2[DS]: +; DIS-NEXT: b8: 00 00 00 00 +; DIS-NEXT: 00000000000000b8: R_POS (idx: 5) .StoreArrays2 +; DIS-NEXT: bc: 00 00 00 50 +; DIS-NEXT: c0: 00 00 00 00 +; DIS-NEXT: 00000000000000c0: R_POS (idx: 11) TOC[TC0] +; DIS-NEXT: c4: 00 00 00 d0 + +; DIS: 00000000000000d0 (idx: 13) mySmallLocalExecTLSv2[TE]: +; DIS-NEXT: d0: 00 00 00 00 +; DIS-NEXT: 00000000000000d0: R_TLS_LE (idx: 25) mySmallLocalExecTLSv2[TL] +; DIS-NEXT: d4: 00 01 79 ec diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-longlong.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-longlong.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-longlong.ll @@ -0,0 +1,149 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff -mattr=+aix-small-local-exec-tls < %s \ +; RUN: | FileCheck %s --check-prefix=SMALL-LOCAL-EXEC-SMALLCM64 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large \ +; RUN: -mattr=+aix-small-local-exec-tls < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL-LOCAL-EXEC-LARGECM64 + +@ThreadLocalVarInit = thread_local(localexec) global i64 1, align 8 +@VarInit = local_unnamed_addr global i64 87, align 8 +@IThreadLocalVarInit = internal thread_local(localexec) global i64 1, align 8 +declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1 + +define void @storeITLInit(i64 noundef %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeITLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeITLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) + store i64 %x, ptr %0, align 8 + ret void +} + +define void @storeTLInit(i64 noundef %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeTLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeTLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) + store i64 %x, ptr %0, align 8 + ret void +} + +define i64 @loadITLInit() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadITLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadITLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) + %1 = load i64, ptr %0, align 8 + ret i64 %1 +} + +define i64 @loadITLInit2() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadITLInit2: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # @VarInit +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r4, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadITLInit2: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0@u(r2) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r4, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0@l(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) + %1 = load i64, ptr %0, align 8 + %2 = load i64, ptr @VarInit, align 8 + %add = add nsw i64 %2, %1 + ret i64 %add +} + +define i64 @loadTLInit() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadTLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadTLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) + %1 = load i64, ptr %0, align 8 + ret i64 %1 +} + +define i64 @loadTLInit2() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadTLInit2: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # @VarInit +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r4, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadTLInit2: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0@u(r2) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r4, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0@l(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) + %1 = load i64, ptr %0, align 8 + %2 = load i64, ptr @VarInit, align 8 + %add = add nsw i64 %2, %1 + ret i64 %add +} + +define void @loadStore1(i64 noundef %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadStore1: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: addi r3, r3, 9 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadStore1: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addi r3, r3, 9 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) + %1 = load i64, ptr %0, align 8 + %add = add nsw i64 %1, 9 + store i64 %add, ptr %0, align 8 + ret void +} diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-short.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-short.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-short.ll @@ -0,0 +1,154 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff -mattr=+aix-small-local-exec-tls < %s \ +; RUN: | FileCheck %s --check-prefix=SMALL-LOCAL-EXEC-SMALLCM64 +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large \ +; RUN: -mattr=+aix-small-local-exec-tls < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL-LOCAL-EXEC-LARGECM64 + +@ThreadLocalVarInit = thread_local(localexec) global i16 1, align 2 +@VarInit = local_unnamed_addr global i16 87, align 2 +@IThreadLocalVarInit = internal thread_local(localexec) global i16 1, align 2 +declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1 + +define void @storeITLInit(i16 noundef signext %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeITLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: sth r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeITLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: sth r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @IThreadLocalVarInit) + store i16 %x, ptr %0, align 2 + ret void +} + +define void @storeTLInit(i16 noundef signext %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeTLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: sth r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeTLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: sth r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @ThreadLocalVarInit) + store i16 %x, ptr %0, align 2 + ret void +} + +define signext i16 @loadITLInit() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadITLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lha r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadITLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lha r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @IThreadLocalVarInit) + %1 = load i16, ptr %0, align 2 + ret i16 %1 +} + +define signext i16 @loadITLInit2() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadITLInit2: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # @VarInit +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lhz r4, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lhz r3, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: extsh r3, r3 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadITLInit2: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0@u(r2) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lhz r4, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0@l(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lhz r3, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: extsh r3, r3 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @IThreadLocalVarInit) + %1 = load i16, ptr %0, align 2 + %2 = load i16, ptr @VarInit, align 2 + %add = add i16 %2, %1 + ret i16 %add +} + +define signext i16 @loadTLInit() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadTLInit: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lha r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadTLInit: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lha r3, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @ThreadLocalVarInit) + %1 = load i16, ptr %0, align 2 + ret i16 %1 +} + +define signext i16 @loadTLInit2() { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadTLInit2: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # @VarInit +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lhz r4, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lhz r3, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: extsh r3, r3 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadTLInit2: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0@u(r2) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lhz r4, ThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0@l(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lhz r3, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: add r3, r3, r4 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: extsh r3, r3 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @ThreadLocalVarInit) + %1 = load i16, ptr %0, align 2 + %2 = load i16, ptr @VarInit, align 2 + %add = add i16 %2, %1 + ret i16 %add +} + +define void @loadStore1(i16 noundef signext %x) { +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadStore1: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lhz r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: addi r3, r3, 9 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: sth r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadStore1: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lhz r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addi r3, r3, 9 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: sth r3, IThreadLocalVarInit[TL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr +entry: + %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @IThreadLocalVarInit) + %1 = load i16, ptr %0, align 2 + %add = add i16 %1, 9 + store i16 %add, ptr %0, align 2 + ret void +} + diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-O0.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-O0.ll --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-O0.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-O0.ll @@ -11,6 +11,13 @@ ; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ ; RUN: -mtriple powerpc-ibm-aix-xcoff --code-model=large < %s \ ; RUN: | FileCheck %s --check-prefix=LARGE32-O0 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff -mattr=+aix-small-local-exec-tls < %s \ +; RUN: | FileCheck %s --check-prefix=SMALL-LOCAL-EXEC-SMALLCM64 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large \ +; RUN: -mattr=+aix-small-local-exec-tls < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL-LOCAL-EXEC-LARGECM64 @TLInt = internal thread_local(localexec) global i32 0, align 4 @TLLongLong = internal thread_local(localexec) global i64 0, align 8 @@ -70,6 +77,20 @@ ; LARGE32-O0-NEXT: lwz r0, 8(r1) ; LARGE32-O0-NEXT: mtlr r0 ; LARGE32-O0-NEXT: blr +; +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeInt: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: # kill: def $r3 killed $r3 killed $x3 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r4, TLInt[UL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, 0(r4) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeInt: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: # kill: def $r3 killed $r3 killed $x3 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r4, TLInt[UL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, 0(r4) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLInt) store i32 %x, ptr %0, align 4 @@ -132,6 +153,18 @@ ; LARGE32-O0-NEXT: lwz r0, 8(r1) ; LARGE32-O0-NEXT: mtlr r0 ; LARGE32-O0-NEXT: blr +; +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeLongLong: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r4, TLLongLong[UL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r3, 0(r4) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeLongLong: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r4, TLLongLong[UL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r3, 0(r4) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @TLLongLong) store i64 %x, ptr %0, align 8 @@ -182,6 +215,18 @@ ; LARGE32-O0-NEXT: lwz r0, 8(r1) ; LARGE32-O0-NEXT: mtlr r0 ; LARGE32-O0-NEXT: blr +; +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeDouble: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, TLDouble[UL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stxsdx f1, 0, r3 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeDouble: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, TLDouble[UL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stxsdx f1, 0, r3 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @TLDouble) store double %x, ptr %0, align 8 @@ -232,6 +277,18 @@ ; LARGE32-O0-NEXT: lwz r0, 8(r1) ; LARGE32-O0-NEXT: mtlr r0 ; LARGE32-O0-NEXT: blr +; +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeFloat: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, TLFloat[UL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stfs f1, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeFloat: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, TLFloat[UL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stfs f1, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLFloat) store float %x, ptr %0, align 4 @@ -282,6 +339,18 @@ ; LARGE32-O0-NEXT: lwz r0, 8(r1) ; LARGE32-O0-NEXT: mtlr r0 ; LARGE32-O0-NEXT: blr +; +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadInt: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, TLInt[UL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lwz r3, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadInt: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, TLInt[UL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lwz r3, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLInt) %1 = load i32, ptr %0, align 4 @@ -336,6 +405,22 @@ ; LARGE32-O0-NEXT: lwz r0, 8(r1) ; LARGE32-O0-NEXT: mtlr r0 ; LARGE32-O0-NEXT: blr +; +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadLongLong: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, TLLongLong[UL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: # kill: def $r3 killed $r3 killed $x3 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: clrldi r3, r3, 32 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadLongLong: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, TLLongLong[UL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: # kill: def $r3 killed $r3 killed $x3 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: clrldi r3, r3, 32 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @TLLongLong) %1 = load i64, ptr %0, align 8 @@ -407,6 +492,30 @@ ; LARGE32-O0-NEXT: lwz r0, 8(r1) ; LARGE32-O0-NEXT: mtlr r0 ; LARGE32-O0-NEXT: blr +; +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadDouble: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, TLDouble[UL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfd f0, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: # kill: def $f1 killed $f0 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: xscvdpsxws f0, f0 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: addi r3, r1, -12 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stfiwx f0, 0, r3 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lwz r3, -12(r1) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: clrldi r3, r3, 32 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadDouble: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, TLDouble[UL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfd f0, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: # kill: def $f1 killed $f0 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: xscvdpsxws f0, f0 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addi r3, r1, -12 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stfiwx f0, 0, r3 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lwz r3, -12(r1) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: clrldi r3, r3, 32 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @TLDouble) %1 = load double, ptr %0, align 8 @@ -474,6 +583,26 @@ ; LARGE32-O0-NEXT: lwz r0, 8(r1) ; LARGE32-O0-NEXT: mtlr r0 ; LARGE32-O0-NEXT: blr +; +; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadFloat: +; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, TLFloat[UL]@le(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lfs f0, 0(r3) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: fctiwz f0, f0 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stfd f0, -8(r1) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lwa r3, -4(r1) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: clrldi r3, r3, 32 +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr +; +; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadFloat: +; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, TLFloat[UL]@le(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lfs f0, 0(r3) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: fctiwz f0, f0 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stfd f0, -8(r1) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lwa r3, -4(r1) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: clrldi r3, r3, 32 +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLFloat) %1 = load float, ptr %0, align 4