diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -16,6 +16,7 @@ #include "PPC.h" #include "PPCISelLowering.h" #include "PPCMachineFunctionInfo.h" +#include "PPCRegisterInfo.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" #include "llvm/ADT/APInt.h" @@ -7436,6 +7437,7 @@ void PPCDAGToDAGISel::PeepholePPC64() { SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); + bool IsAIX = Subtarget->isAIXABI(); while (Position != CurDAG->allnodes_begin()) { SDNode *N = &*--Position; // Skip dead nodes and any non-machine opcodes. @@ -7501,7 +7503,15 @@ unsigned Flags = 0; bool ReplaceFlags = true; + bool FoldADDTLS = false; + int Offset = N->getConstantOperandVal(FirstOp); + // For TLS local-exec accesses on AIX 64-bit, we emit an ADD8TLS node to + // add the result of loading the variable offset to the thread pointer + // (X13). We can optimize this add, followed by any D-Form memory operation, + // as long as the memory operation's immediate offset is 0, into an X-Form + // load/store, and remove the ADD8TLS node completely. + // // When the feeding operation is an add-immediate of some sort, // determine whether we need to add relocation information to the // target flags on the immediate operand when we fold it into the @@ -7515,6 +7525,28 @@ switch (Base.getMachineOpcode()) { default: continue; + case PPC::ADD8TLS: { + // The optimization for D-Form load/store fed by an ADD8TLS is only + // available on AIX. + if (!IsAIX) + continue; + // Base is the ADD8TLS operation. The first operand in this instruction + // should be the thread pointer (X13), so we do not do this optimization + // if we do not have X13. + RegisterSDNode *AddFirstOpReg = + dyn_cast_or_null(Base.getOperand(0).getNode()); + if (!AddFirstOpReg) + continue; + if (AddFirstOpReg->getReg() != PPC::X13) + continue; + // The optimization to convert the D-Form load/store into its X-Form + // counterpart should only occur if the immediate offset is 0. + if (Offset != 0) + continue; + ReplaceFlags = false; + FoldADDTLS = true; + break; + } case PPC::ADDI8: case PPC::ADDI: // In some cases (such as TLS) the relocation information @@ -7540,14 +7572,20 @@ break; } - SDValue ImmOpnd = Base.getOperand(1); + // If Base is an ADDI* instruction, then this is the immediate operand of + // the addi instruction. + // If Base is an ADD8TLS instruction, this will just represent the second + // operand of an add instruction (which would be the variable offset of the + // TLS variable that was loaded from an LDtoc instruction). + SDValue SecondAddOperand = Base.getOperand(1); // On PPC64, the TOC base pointer is guaranteed by the ABI only to have // 8-byte alignment, and so we can only use offsets less than 8 (otherwise, // we might have needed different @ha relocation values for the offset // pointers). int MaxDisplacement = 7; - if (GlobalAddressSDNode *GA = dyn_cast(ImmOpnd)) { + if (GlobalAddressSDNode *GA = + dyn_cast(SecondAddOperand)) { const GlobalValue *GV = GA->getGlobal(); Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout()); MaxDisplacement = std::min((int)Alignment.value() - 1, MaxDisplacement); @@ -7556,7 +7594,6 @@ bool UpdateHBase = false; SDValue HBase = Base.getOperand(0); - int Offset = N->getConstantOperandVal(FirstOp); if (ReplaceFlags) { if (Offset < 0 || Offset > MaxDisplacement) { // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only @@ -7573,7 +7610,7 @@ continue; SDValue HImmOpnd = HBase.getOperand(1); - if (HImmOpnd != ImmOpnd) + if (HImmOpnd != SecondAddOperand) continue; UpdateHBase = true; @@ -7584,7 +7621,7 @@ // 2. If the addend is a constant, then it can be combined with a // non-zero offset, but only if the result meets the encoding // requirements. - if (auto *C = dyn_cast(ImmOpnd)) { + if (auto *C = dyn_cast(SecondAddOperand)) { Offset += C->getSExtValue(); if (RequiresMod4Offset && (Offset % 4) != 0) @@ -7593,8 +7630,8 @@ if (!isInt<16>(Offset)) continue; - ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd), - ImmOpnd.getValueType()); + SecondAddOperand = CurDAG->getTargetConstant( + Offset, SDLoc(SecondAddOperand), SecondAddOperand.getValueType()); } else if (Offset != 0) { continue; } @@ -7604,7 +7641,13 @@ // immediate and substitute them into the load or store. If // needed, update the target flags for the immediate operand to // reflect the necessary relocation information. - LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); + // + // Or, optimize an ADD8TLS (on AIX) feeding into a D-Form load/store + // into an X-Form load/store. + LLVM_DEBUG(dbgs() << "Folding add"); + if (!IsAIX) + LLVM_DEBUG(dbgs() << "-immediate"); + LLVM_DEBUG(dbgs() << " into mem-op:\nBase: "); LLVM_DEBUG(Base->dump(CurDAG)); LLVM_DEBUG(dbgs() << "\nN: "); LLVM_DEBUG(N->dump(CurDAG)); @@ -7613,7 +7656,8 @@ // If the relocation information isn't already present on the // immediate operand, add it now. if (ReplaceFlags) { - if (GlobalAddressSDNode *GA = dyn_cast(ImmOpnd)) { + if (GlobalAddressSDNode *GA = + dyn_cast(SecondAddOperand)) { SDLoc dl(GA); const GlobalValue *GV = GA->getGlobal(); Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout()); @@ -7623,25 +7667,42 @@ LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n"); continue; } - ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags); + SecondAddOperand = + CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags); } else if (ConstantPoolSDNode *CP = - dyn_cast(ImmOpnd)) { + dyn_cast(SecondAddOperand)) { const Constant *C = CP->getConstVal(); - ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlign(), - Offset, Flags); + SecondAddOperand = CurDAG->getTargetConstantPool( + C, MVT::i64, CP->getAlign(), Offset, Flags); } } - if (FirstOp == 1) // Store - (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd, - Base.getOperand(0), N->getOperand(3)); - else // Load - (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0), - N->getOperand(2)); + const PPCRegisterInfo *TRI = Subtarget->getRegisterInfo(); + // The X-Form load/store opcode of the corresponding D-Form load/store + // opcode is needed if we are transforming a D-Form load into its X-Form + // counterpart when optimizing away the ADD8TLS node. + unsigned XFormMemOpOpc = TRI->getMappedIdxOpcForImmOpc(StorageOpcode); + if (FirstOp == 1) { // Store + if (IsAIX && FoldADDTLS) + (void)CurDAG->SelectNodeTo(N, XFormMemOpOpc, N->getValueType(0), + N->getOperand(0), SecondAddOperand, + Base.getOperand(0)); + else + (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), SecondAddOperand, + Base.getOperand(0), N->getOperand(3)); + } else { // Load + if (IsAIX && FoldADDTLS) + (void)CurDAG->SelectNodeTo(N, XFormMemOpOpc, N->getValueType(0), + SecondAddOperand, Base.getOperand(0), + N->getOperand(2)); + else + (void)CurDAG->UpdateNodeOperands(N, SecondAddOperand, + Base.getOperand(0), N->getOperand(2)); + } if (UpdateHBase) (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0), - ImmOpnd); + SecondAddOperand); // The add-immediate may now be dead, in which case remove it. if (Base.getNode()->use_empty()) diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-double.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-double.ll --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-double.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-double.ll @@ -5,6 +5,12 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ ; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \ ; RUN: | FileCheck %s --check-prefix=LARGE64 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL64-O0 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \ +; RUN: | FileCheck %s --check-prefix=LARGE64-O0 @ThreadLocalVarInit = thread_local(localexec) global double 0x4021947AE147AE14, align 8 @VarInit = global double 8.787000e+01, align 8 @@ -17,17 +23,30 @@ ; SMALL64-LABEL: storeITLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: stfd f1, 0(r3) +; SMALL64-NEXT: stfdx f1, r3, r13 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeITLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C0@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: stfd f1, 0(r3) +; LARGE64-NEXT: stfdx f1, r3, r13 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: storeITLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: stxsdx f1, 0, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeITLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C0@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: stxsdx f1, 0, r3 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit) store double %x, ptr %0, align 8 @@ -38,17 +57,30 @@ ; SMALL64-LABEL: storeITLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: stfd f1, 0(r3) +; SMALL64-NEXT: stfdx f1, r3, r13 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeITLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C1@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: stfd f1, 0(r3) +; LARGE64-NEXT: stfdx f1, r3, r13 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: storeITLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: stxsdx f1, 0, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeITLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C1@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: stxsdx f1, 0, r3 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) store double %x, ptr %0, align 8 @@ -59,17 +91,30 @@ ; SMALL64-LABEL: storeTLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: stfd f1, 0(r3) +; SMALL64-NEXT: stfdx f1, r3, r13 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeTLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C2@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: stfd f1, 0(r3) +; LARGE64-NEXT: stfdx f1, r3, r13 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: storeTLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: stxsdx f1, 0, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeTLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C2@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: stxsdx f1, 0, r3 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit) store double %x, ptr %0, align 8 @@ -80,17 +125,30 @@ ; SMALL64-LABEL: storeTLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: stfd f1, 0(r3) +; SMALL64-NEXT: stfdx f1, r3, r13 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeTLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C3@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: stfd f1, 0(r3) +; LARGE64-NEXT: stfdx f1, r3, r13 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: storeTLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: stxsdx f1, 0, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeTLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C3@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: stxsdx f1, 0, r3 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) store double %x, ptr %0, align 8 @@ -101,17 +159,30 @@ ; SMALL64-LABEL: loadITLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lfd f1, 0(r3) +; SMALL64-NEXT: lfdx f1, r3, r13 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadITLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C0@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lfd f1, 0(r3) +; LARGE64-NEXT: lfdx f1, r3, r13 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lxsdx f1, 0, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C0@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lxsdx f1, 0, r3 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit) %1 = load double, ptr %0, align 8 @@ -123,9 +194,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lfdx f0, r3, r13 ; SMALL64-NEXT: lfd f1, 0(r4) -; SMALL64-NEXT: lfd f0, 0(r3) ; SMALL64-NEXT: xsadddp f1, f0, f1 ; SMALL64-NEXT: blr ; @@ -135,11 +205,32 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lfdx f0, r3, r13 ; LARGE64-NEXT: lfd f1, 0(r4) -; LARGE64-NEXT: lfd f0, 0(r3) ; LARGE64-NEXT: xsadddp f1, f0, f1 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLUninit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lxsdx f0, 0, r3 +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lxsdx f1, 0, r3 +; SMALL64-O0-NEXT: xsadddp f1, f0, f1 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLUninit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C0@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lxsdx f0, 0, r3 +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lxsdx f1, 0, r3 +; LARGE64-O0-NEXT: xsadddp f1, f0, f1 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit) %1 = load double, ptr %0, align 8 @@ -152,17 +243,30 @@ ; SMALL64-LABEL: loadITLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lfd f1, 0(r3) +; SMALL64-NEXT: lfdx f1, r3, r13 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadITLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C1@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lfd f1, 0(r3) +; LARGE64-NEXT: lfdx f1, r3, r13 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lxsdx f1, 0, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C1@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lxsdx f1, 0, r3 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) %1 = load double, ptr %0, align 8 @@ -174,9 +278,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lfdx f0, r3, r13 ; SMALL64-NEXT: lfd f1, 0(r4) -; SMALL64-NEXT: lfd f0, 0(r3) ; SMALL64-NEXT: xsadddp f1, f0, f1 ; SMALL64-NEXT: blr ; @@ -186,11 +289,32 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lfdx f0, r3, r13 ; LARGE64-NEXT: lfd f1, 0(r4) -; LARGE64-NEXT: lfd f0, 0(r3) ; LARGE64-NEXT: xsadddp f1, f0, f1 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLInit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lxsdx f0, 0, r3 +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lxsdx f1, 0, r3 +; SMALL64-O0-NEXT: xsadddp f1, f0, f1 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLInit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C1@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lxsdx f0, 0, r3 +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lxsdx f1, 0, r3 +; LARGE64-O0-NEXT: xsadddp f1, f0, f1 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) %1 = load double, ptr %0, align 8 @@ -203,17 +327,30 @@ ; SMALL64-LABEL: loadTLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lfd f1, 0(r3) +; SMALL64-NEXT: lfdx f1, r3, r13 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadTLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C2@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lfd f1, 0(r3) +; LARGE64-NEXT: lfdx f1, r3, r13 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lxsdx f1, 0, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C2@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lxsdx f1, 0, r3 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit) %1 = load double, ptr %0, align 8 @@ -225,9 +362,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lfdx f0, r3, r13 ; SMALL64-NEXT: lfd f1, 0(r4) -; SMALL64-NEXT: lfd f0, 0(r3) ; SMALL64-NEXT: xsadddp f1, f0, f1 ; SMALL64-NEXT: blr ; @@ -237,11 +373,32 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lfdx f0, r3, r13 ; LARGE64-NEXT: lfd f1, 0(r4) -; LARGE64-NEXT: lfd f0, 0(r3) ; LARGE64-NEXT: xsadddp f1, f0, f1 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLUninit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lxsdx f0, 0, r3 +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lxsdx f1, 0, r3 +; SMALL64-O0-NEXT: xsadddp f1, f0, f1 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLUninit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C2@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lxsdx f0, 0, r3 +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lxsdx f1, 0, r3 +; LARGE64-O0-NEXT: xsadddp f1, f0, f1 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit) %1 = load double, ptr %0, align 8 @@ -254,17 +411,30 @@ ; SMALL64-LABEL: loadTLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lfd f1, 0(r3) +; SMALL64-NEXT: lfdx f1, r3, r13 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadTLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C3@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lfd f1, 0(r3) +; LARGE64-NEXT: lfdx f1, r3, r13 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lxsdx f1, 0, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C3@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lxsdx f1, 0, r3 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) %1 = load double, ptr %0, align 8 @@ -276,9 +446,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lfdx f0, r3, r13 ; SMALL64-NEXT: lfd f1, 0(r4) -; SMALL64-NEXT: lfd f0, 0(r3) ; SMALL64-NEXT: xsadddp f1, f0, f1 ; SMALL64-NEXT: blr ; @@ -288,11 +457,32 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lfdx f0, r3, r13 ; LARGE64-NEXT: lfd f1, 0(r4) -; LARGE64-NEXT: lfd f0, 0(r3) ; LARGE64-NEXT: xsadddp f1, f0, f1 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLInit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lxsdx f0, 0, r3 +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lxsdx f1, 0, r3 +; SMALL64-O0-NEXT: xsadddp f1, f0, f1 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLInit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C3@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lxsdx f0, 0, r3 +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lxsdx f1, 0, r3 +; LARGE64-O0-NEXT: xsadddp f1, f0, f1 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) %1 = load double, ptr %0, align 8 diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-int.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-int.ll --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-int.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-int.ll @@ -5,6 +5,12 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ ; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \ ; RUN: | FileCheck %s --check-prefix=LARGE64 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL64-O0 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \ +; RUN: | FileCheck %s --check-prefix=LARGE64-O0 @ThreadLocalVarInit = thread_local(localexec) global i32 1, align 4 @VarInit = global i32 87, align 4 @@ -17,17 +23,32 @@ ; SMALL64-LABEL: storeITLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: stw r3, 0(r4) +; SMALL64-NEXT: stwx r3, r4, r13 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeITLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C0@u(r2) ; LARGE64-NEXT: ld r4, L..C0@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: stw r3, 0(r4) +; LARGE64-NEXT: stwx r3, r4, r13 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: storeITLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; SMALL64-O0-NEXT: ld r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r4, r13, r4 +; SMALL64-O0-NEXT: stw r3, 0(r4) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeITLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; LARGE64-O0-NEXT: addis r4, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r4, L..C0@l(r4) +; LARGE64-O0-NEXT: add r4, r13, r4 +; LARGE64-O0-NEXT: stw r3, 0(r4) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit) store i32 %x, ptr %0, align 4 @@ -38,17 +59,32 @@ ; SMALL64-LABEL: storeITLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: stw r3, 0(r4) +; SMALL64-NEXT: stwx r3, r4, r13 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeITLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C1@u(r2) ; LARGE64-NEXT: ld r4, L..C1@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: stw r3, 0(r4) +; LARGE64-NEXT: stwx r3, r4, r13 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: storeITLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; SMALL64-O0-NEXT: ld r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r4, r13, r4 +; SMALL64-O0-NEXT: stw r3, 0(r4) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeITLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; LARGE64-O0-NEXT: addis r4, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r4, L..C1@l(r4) +; LARGE64-O0-NEXT: add r4, r13, r4 +; LARGE64-O0-NEXT: stw r3, 0(r4) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) store i32 %x, ptr %0, align 4 @@ -59,17 +95,32 @@ ; SMALL64-LABEL: storeTLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: stw r3, 0(r4) +; SMALL64-NEXT: stwx r3, r4, r13 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeTLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C2@u(r2) ; LARGE64-NEXT: ld r4, L..C2@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: stw r3, 0(r4) +; LARGE64-NEXT: stwx r3, r4, r13 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: storeTLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; SMALL64-O0-NEXT: ld r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r4, r13, r4 +; SMALL64-O0-NEXT: stw r3, 0(r4) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeTLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; LARGE64-O0-NEXT: addis r4, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r4, L..C2@l(r4) +; LARGE64-O0-NEXT: add r4, r13, r4 +; LARGE64-O0-NEXT: stw r3, 0(r4) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit) store i32 %x, ptr %0, align 4 @@ -80,17 +131,32 @@ ; SMALL64-LABEL: storeTLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: stw r3, 0(r4) +; SMALL64-NEXT: stwx r3, r4, r13 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeTLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C3@u(r2) ; LARGE64-NEXT: ld r4, L..C3@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: stw r3, 0(r4) +; LARGE64-NEXT: stwx r3, r4, r13 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: storeTLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; SMALL64-O0-NEXT: ld r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r4, r13, r4 +; SMALL64-O0-NEXT: stw r3, 0(r4) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeTLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; LARGE64-O0-NEXT: addis r4, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r4, L..C3@l(r4) +; LARGE64-O0-NEXT: add r4, r13, r4 +; LARGE64-O0-NEXT: stw r3, 0(r4) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) store i32 %x, ptr %0, align 4 @@ -101,17 +167,30 @@ ; SMALL64-LABEL: loadITLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lwa r3, 0(r3) +; SMALL64-NEXT: lwax r3, r3, r13 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadITLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C0@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lwa r3, 0(r3) +; LARGE64-NEXT: lwax r3, r3, r13 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lwa r3, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C0@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lwa r3, 0(r3) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit) %1 = load i32, ptr %0, align 4 @@ -123,9 +202,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lwzx r3, r3, r13 ; SMALL64-NEXT: lwz r4, 0(r4) -; SMALL64-NEXT: lwz r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: extsw r3, r3 ; SMALL64-NEXT: blr @@ -136,12 +214,35 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lwzx r3, r3, r13 ; LARGE64-NEXT: lwz r4, 0(r4) -; LARGE64-NEXT: lwz r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: extsw r3, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLUninit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lwz r4, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lwz r3, 0(r3) +; SMALL64-O0-NEXT: add r3, r3, r4 +; SMALL64-O0-NEXT: extsw r3, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLUninit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C0@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lwz r4, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lwz r3, 0(r3) +; LARGE64-O0-NEXT: add r3, r3, r4 +; LARGE64-O0-NEXT: extsw r3, r3 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit) %1 = load i32, ptr %0, align 4 @@ -154,17 +255,30 @@ ; SMALL64-LABEL: loadITLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lwa r3, 0(r3) +; SMALL64-NEXT: lwax r3, r3, r13 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadITLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C1@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lwa r3, 0(r3) +; LARGE64-NEXT: lwax r3, r3, r13 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lwa r3, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C1@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lwa r3, 0(r3) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) %1 = load i32, ptr %0, align 4 @@ -176,9 +290,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lwzx r3, r3, r13 ; SMALL64-NEXT: lwz r4, 0(r4) -; SMALL64-NEXT: lwz r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: extsw r3, r3 ; SMALL64-NEXT: blr @@ -189,12 +302,35 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lwzx r3, r3, r13 ; LARGE64-NEXT: lwz r4, 0(r4) -; LARGE64-NEXT: lwz r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: extsw r3, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLInit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lwz r4, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lwz r3, 0(r3) +; SMALL64-O0-NEXT: add r3, r3, r4 +; SMALL64-O0-NEXT: extsw r3, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLInit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C1@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lwz r4, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lwz r3, 0(r3) +; LARGE64-O0-NEXT: add r3, r3, r4 +; LARGE64-O0-NEXT: extsw r3, r3 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit) %1 = load i32, ptr %0, align 4 @@ -207,17 +343,30 @@ ; SMALL64-LABEL: loadTLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lwa r3, 0(r3) +; SMALL64-NEXT: lwax r3, r3, r13 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadTLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C2@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lwa r3, 0(r3) +; LARGE64-NEXT: lwax r3, r3, r13 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lwa r3, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C2@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lwa r3, 0(r3) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit) %1 = load i32, ptr %0, align 4 @@ -229,9 +378,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lwzx r3, r3, r13 ; SMALL64-NEXT: lwz r4, 0(r4) -; SMALL64-NEXT: lwz r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: extsw r3, r3 ; SMALL64-NEXT: blr @@ -242,12 +390,35 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lwzx r3, r3, r13 ; LARGE64-NEXT: lwz r4, 0(r4) -; LARGE64-NEXT: lwz r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: extsw r3, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLUninit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lwz r4, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lwz r3, 0(r3) +; SMALL64-O0-NEXT: add r3, r3, r4 +; SMALL64-O0-NEXT: extsw r3, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLUninit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C2@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lwz r4, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lwz r3, 0(r3) +; LARGE64-O0-NEXT: add r3, r3, r4 +; LARGE64-O0-NEXT: extsw r3, r3 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit) %1 = load i32, ptr %0, align 4 @@ -260,17 +431,30 @@ ; SMALL64-LABEL: loadTLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lwa r3, 0(r3) +; SMALL64-NEXT: lwax r3, r3, r13 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadTLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C3@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lwa r3, 0(r3) +; LARGE64-NEXT: lwax r3, r3, r13 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lwa r3, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C3@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lwa r3, 0(r3) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) %1 = load i32, ptr %0, align 4 @@ -282,9 +466,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lwzx r3, r3, r13 ; SMALL64-NEXT: lwz r4, 0(r4) -; SMALL64-NEXT: lwz r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: extsw r3, r3 ; SMALL64-NEXT: blr @@ -295,12 +478,35 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lwzx r3, r3, r13 ; LARGE64-NEXT: lwz r4, 0(r4) -; LARGE64-NEXT: lwz r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: extsw r3, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLInit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lwz r4, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: lwz r3, 0(r3) +; SMALL64-O0-NEXT: add r3, r3, r4 +; SMALL64-O0-NEXT: extsw r3, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLInit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C3@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lwz r4, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: lwz r3, 0(r3) +; LARGE64-O0-NEXT: add r3, r3, r4 +; LARGE64-O0-NEXT: extsw r3, r3 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit) %1 = load i32, ptr %0, align 4 diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll @@ -5,6 +5,12 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ ; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \ ; RUN: | FileCheck %s --check-prefix=LARGE64 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL64-O0 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \ +; RUN: | FileCheck %s --check-prefix=LARGE64-O0 @ThreadLocalVarInit = thread_local(localexec) global i64 1, align 8 @VarInit = global i64 87, align 8 @@ -17,17 +23,30 @@ ; SMALL64-LABEL: storeITLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: std r3, 0(r4) +; SMALL64-NEXT: stdx r3, r4, r13 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeITLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C0@u(r2) ; LARGE64-NEXT: ld r4, L..C0@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: std r3, 0(r4) +; LARGE64-NEXT: stdx r3, r4, r13 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: storeITLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r4, r13, r4 +; SMALL64-O0-NEXT: std r3, 0(r4) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeITLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r4, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r4, L..C0@l(r4) +; LARGE64-O0-NEXT: add r4, r13, r4 +; LARGE64-O0-NEXT: std r3, 0(r4) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit) store i64 %x, ptr %0, align 8 @@ -38,17 +57,30 @@ ; SMALL64-LABEL: storeITLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: std r3, 0(r4) +; SMALL64-NEXT: stdx r3, r4, r13 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeITLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C1@u(r2) ; LARGE64-NEXT: ld r4, L..C1@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: std r3, 0(r4) +; LARGE64-NEXT: stdx r3, r4, r13 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: storeITLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r4, r13, r4 +; SMALL64-O0-NEXT: std r3, 0(r4) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeITLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r4, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r4, L..C1@l(r4) +; LARGE64-O0-NEXT: add r4, r13, r4 +; LARGE64-O0-NEXT: std r3, 0(r4) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) store i64 %x, ptr %0, align 8 @@ -59,17 +91,30 @@ ; SMALL64-LABEL: storeTLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: std r3, 0(r4) +; SMALL64-NEXT: stdx r3, r4, r13 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeTLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C2@u(r2) ; LARGE64-NEXT: ld r4, L..C2@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: std r3, 0(r4) +; LARGE64-NEXT: stdx r3, r4, r13 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: storeTLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r4, r13, r4 +; SMALL64-O0-NEXT: std r3, 0(r4) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeTLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r4, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r4, L..C2@l(r4) +; LARGE64-O0-NEXT: add r4, r13, r4 +; LARGE64-O0-NEXT: std r3, 0(r4) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit) store i64 %x, ptr %0, align 8 @@ -80,17 +125,30 @@ ; SMALL64-LABEL: storeTLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: std r3, 0(r4) +; SMALL64-NEXT: stdx r3, r4, r13 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeTLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C3@u(r2) ; LARGE64-NEXT: ld r4, L..C3@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: std r3, 0(r4) +; LARGE64-NEXT: stdx r3, r4, r13 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: storeTLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r4, r13, r4 +; SMALL64-O0-NEXT: std r3, 0(r4) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeTLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r4, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r4, L..C3@l(r4) +; LARGE64-O0-NEXT: add r4, r13, r4 +; LARGE64-O0-NEXT: std r3, 0(r4) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) store i64 %x, ptr %0, align 8 @@ -101,17 +159,30 @@ ; SMALL64-LABEL: loadITLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: ld r3, 0(r3) +; SMALL64-NEXT: ldx r3, r3, r13 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadITLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C0@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: ld r3, 0(r3) +; LARGE64-NEXT: ldx r3, r3, r13 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: ld r3, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C0@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: ld r3, 0(r3) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit) %1 = load i64, ptr %0, align 8 @@ -123,9 +194,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: ldx r3, r3, r13 ; SMALL64-NEXT: ld r4, 0(r4) -; SMALL64-NEXT: ld r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: blr ; @@ -135,11 +205,32 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: ldx r3, r3, r13 ; LARGE64-NEXT: ld r4, 0(r4) -; LARGE64-NEXT: ld r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLUninit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: ld r4, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: ld r3, 0(r3) +; SMALL64-O0-NEXT: add r3, r3, r4 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLUninit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C0@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: ld r4, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: ld r3, 0(r3) +; LARGE64-O0-NEXT: add r3, r3, r4 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit) %1 = load i64, ptr %0, align 8 @@ -152,17 +243,30 @@ ; SMALL64-LABEL: loadITLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: ld r3, 0(r3) +; SMALL64-NEXT: ldx r3, r3, r13 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadITLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C1@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: ld r3, 0(r3) +; LARGE64-NEXT: ldx r3, r3, r13 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: ld r3, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C1@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: ld r3, 0(r3) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) %1 = load i64, ptr %0, align 8 @@ -174,9 +278,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: ldx r3, r3, r13 ; SMALL64-NEXT: ld r4, 0(r4) -; SMALL64-NEXT: ld r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: blr ; @@ -186,11 +289,32 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: ldx r3, r3, r13 ; LARGE64-NEXT: ld r4, 0(r4) -; LARGE64-NEXT: ld r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadITLInit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: ld r4, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: ld r3, 0(r3) +; SMALL64-O0-NEXT: add r3, r3, r4 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadITLInit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C1@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: ld r4, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: ld r3, 0(r3) +; LARGE64-O0-NEXT: add r3, r3, r4 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit) %1 = load i64, ptr %0, align 8 @@ -203,17 +327,30 @@ ; SMALL64-LABEL: loadTLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: ld r3, 0(r3) +; SMALL64-NEXT: ldx r3, r3, r13 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadTLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C2@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: ld r3, 0(r3) +; LARGE64-NEXT: ldx r3, r3, r13 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLUninit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: ld r3, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLUninit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C2@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: ld r3, 0(r3) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit) %1 = load i64, ptr %0, align 8 @@ -225,9 +362,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: ldx r3, r3, r13 ; SMALL64-NEXT: ld r4, 0(r4) -; SMALL64-NEXT: ld r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: blr ; @@ -237,11 +373,32 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: ldx r3, r3, r13 ; LARGE64-NEXT: ld r4, 0(r4) -; LARGE64-NEXT: ld r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLUninit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: ld r4, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: ld r3, 0(r3) +; SMALL64-O0-NEXT: add r3, r3, r4 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLUninit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C2@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: ld r4, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: ld r3, 0(r3) +; LARGE64-O0-NEXT: add r3, r3, r4 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit) %1 = load i64, ptr %0, align 8 @@ -254,17 +411,30 @@ ; SMALL64-LABEL: loadTLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: ld r3, 0(r3) +; SMALL64-NEXT: ldx r3, r3, r13 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadTLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C3@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: ld r3, 0(r3) +; LARGE64-NEXT: ldx r3, r3, r13 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLInit: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: ld r3, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLInit: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C3@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: ld r3, 0(r3) +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) %1 = load i64, ptr %0, align 8 @@ -276,9 +446,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: ldx r3, r3, r13 ; SMALL64-NEXT: ld r4, 0(r4) -; SMALL64-NEXT: ld r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: blr ; @@ -288,11 +457,32 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: ldx r3, r3, r13 ; LARGE64-NEXT: ld r4, 0(r4) -; LARGE64-NEXT: ld r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: blr +; +; SMALL64-O0-LABEL: loadTLInit2: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: ld r4, 0(r3) +; SMALL64-O0-NEXT: ld r3, L..C4(r2) # @VarInit +; SMALL64-O0-NEXT: ld r3, 0(r3) +; SMALL64-O0-NEXT: add r3, r3, r4 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadTLInit2: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C3@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: ld r4, 0(r3) +; LARGE64-O0-NEXT: addis r3, L..C4@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C4@l(r3) +; LARGE64-O0-NEXT: ld r3, 0(r3) +; LARGE64-O0-NEXT: add r3, r3, r4 +; LARGE64-O0-NEXT: blr entry: %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit) %1 = load i64, ptr %0, align 8 diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large.ll --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large.ll @@ -48,7 +48,7 @@ ; RELOC-NEXT: Type: R_TOCL (0x31) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x22 +; RELOC-NEXT: Virtual Address: 0x12 ; RELOC-NEXT: Symbol: ThreadLocalVarInit (17) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -56,7 +56,7 @@ ; RELOC-NEXT: Type: R_TOCU (0x30) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x26 +; RELOC-NEXT: Virtual Address: 0x16 ; RELOC-NEXT: Symbol: VarInit (19) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -64,7 +64,7 @@ ; RELOC-NEXT: Type: R_TOCU (0x30) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x2A +; RELOC-NEXT: Virtual Address: 0x1A ; RELOC-NEXT: Symbol: ThreadLocalVarInit (17) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -72,7 +72,7 @@ ; RELOC-NEXT: Type: R_TOCL (0x31) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x2E +; RELOC-NEXT: Virtual Address: 0x1E ; RELOC-NEXT: Symbol: VarInit (19) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -82,7 +82,7 @@ ; RELOC-NEXT: } ; RELOC-NEXT: Section (index: 2) .data { ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x50 +; RELOC-NEXT: Virtual Address: 0x38 ; RELOC-NEXT: Symbol: .storeITLUninit (3) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -90,7 +90,7 @@ ; RELOC-NEXT: Type: R_POS (0x0) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x58 +; RELOC-NEXT: Virtual Address: 0x40 ; RELOC-NEXT: Symbol: TOC (13) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -98,7 +98,7 @@ ; RELOC-NEXT: Type: R_POS (0x0) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x68 +; RELOC-NEXT: Virtual Address: 0x50 ; RELOC-NEXT: Symbol: .loadTLInit (5) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -106,7 +106,7 @@ ; RELOC-NEXT: Type: R_POS (0x0) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x70 +; RELOC-NEXT: Virtual Address: 0x58 ; RELOC-NEXT: Symbol: TOC (13) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -114,7 +114,7 @@ ; RELOC-NEXT: Type: R_POS (0x0) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x80 +; RELOC-NEXT: Virtual Address: 0x68 ; RELOC-NEXT: Symbol: IThreadLocalVarUninit (23) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -122,7 +122,7 @@ ; RELOC-NEXT: Type: R_TLS_LE (0x23) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x88 +; RELOC-NEXT: Virtual Address: 0x70 ; RELOC-NEXT: Symbol: ThreadLocalVarInit (21) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -130,7 +130,7 @@ ; RELOC-NEXT: Type: R_TLS_LE (0x23) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x90 +; RELOC-NEXT: Virtual Address: 0x78 ; RELOC-NEXT: Symbol: VarInit (7) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -165,7 +165,7 @@ ; SYM-NEXT: NumberOfAuxEntries: 1 ; SYM-NEXT: CSECT Auxiliary Entry { ; SYM-NEXT: Index: 2 -; SYM-NEXT: SectionLen: 68 +; SYM-NEXT: SectionLen: 48 ; SYM-NEXT: ParameterHashIndex: 0x0 ; SYM-NEXT: TypeChkSectNum: 0x0 ; SYM-NEXT: SymbolAlignmentLog2: 5 @@ -196,7 +196,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 5 ; SYM-NEXT: Name: .loadTLInit -; SYM-NEXT: Value (RelocatableAddress): 0x20 +; SYM-NEXT: Value (RelocatableAddress): 0x10 ; SYM-NEXT: Section: .text ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_EXT (0x2) @@ -215,7 +215,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 7 ; SYM-NEXT: Name: VarInit -; SYM-NEXT: Value (RelocatableAddress): 0x48 +; SYM-NEXT: Value (RelocatableAddress): 0x30 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_EXT (0x2) @@ -234,7 +234,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 9 ; SYM-NEXT: Name: storeITLUninit -; SYM-NEXT: Value (RelocatableAddress): 0x50 +; SYM-NEXT: Value (RelocatableAddress): 0x38 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_EXT (0x2) @@ -253,7 +253,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 11 ; SYM-NEXT: Name: loadTLInit -; SYM-NEXT: Value (RelocatableAddress): 0x68 +; SYM-NEXT: Value (RelocatableAddress): 0x50 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_EXT (0x2) @@ -272,7 +272,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 13 ; SYM-NEXT: Name: TOC -; SYM-NEXT: Value (RelocatableAddress): 0x80 +; SYM-NEXT: Value (RelocatableAddress): 0x68 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) @@ -291,7 +291,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 15 ; SYM-NEXT: Name: IThreadLocalVarUninit -; SYM-NEXT: Value (RelocatableAddress): 0x80 +; SYM-NEXT: Value (RelocatableAddress): 0x68 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) @@ -310,7 +310,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 17 ; SYM-NEXT: Name: ThreadLocalVarInit -; SYM-NEXT: Value (RelocatableAddress): 0x88 +; SYM-NEXT: Value (RelocatableAddress): 0x70 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) @@ -329,7 +329,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 19 ; SYM-NEXT: Name: VarInit -; SYM-NEXT: Value (RelocatableAddress): 0x90 +; SYM-NEXT: Value (RelocatableAddress): 0x78 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) @@ -392,10 +392,9 @@ ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 15) IThreadLocalVarUninit[TE] ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 4, 0(4) ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 15) IThreadLocalVarUninit[TE] -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 4, 13, 4 -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} std 3, 0(4) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stdx 3, 4, 13 ; DIS-NEXT: blr -; DIS: 0000000000000020 (idx: 5) .loadTLInit: +; DIS: 0000000000000010 (idx: 5) .loadTLInit: ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addis 3, 2, 0 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 17) ThreadLocalVarInit[TE] ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addis 4, 2, 0 @@ -404,42 +403,41 @@ ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 17) ThreadLocalVarInit[TE] ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 4, 16(4) ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 19) VarInit[TE] -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 13, 3 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ldx 3, 3, 13 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 4, 0(4) -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 3, 0(3) ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 4, 3 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} blr ; DIS: Disassembly of section .data: -; DIS: 0000000000000048 (idx: 7) VarInit[RW]: -; DIS-NEXT: 48: 00 00 00 00 -; DIS-NEXT: 4c: 00 00 00 57 -; DIS: 0000000000000050 (idx: 9) storeITLUninit[DS]: +; DIS: 0000000000000030 (idx: 7) VarInit[RW]: +; DIS-NEXT: 30: 00 00 00 00 +; DIS-NEXT: 34: 00 00 00 57 +; DIS: 0000000000000038 (idx: 9) storeITLUninit[DS]: +; DIS-NEXT: 38: 00 00 00 00 +; DIS-NEXT: 0000000000000038: R_POS (idx: 3) .storeITLUninit +; DIS-NEXT: 3c: 00 00 00 00 +; DIS-NEXT: 40: 00 00 00 00 +; DIS-NEXT: 0000000000000040: R_POS (idx: 13) TOC[TC0] +; DIS-NEXT: 44: 00 00 00 68 +; DIS: 0000000000000050 (idx: 11) loadTLInit[DS]: ; DIS-NEXT: 50: 00 00 00 00 -; DIS-NEXT: 0000000000000050: R_POS (idx: 3) .storeITLUninit -; DIS-NEXT: 54: 00 00 00 00 +; DIS-NEXT: 0000000000000050: R_POS (idx: 5) .loadTLInit +; DIS-NEXT: 54: 00 00 00 10 ; DIS-NEXT: 58: 00 00 00 00 ; DIS-NEXT: 0000000000000058: R_POS (idx: 13) TOC[TC0] -; DIS-NEXT: 5c: 00 00 00 80 -; DIS: 0000000000000068 (idx: 11) loadTLInit[DS]: +; DIS-NEXT: 5c: 00 00 00 68 +; DIS: 0000000000000068 (idx: 15) IThreadLocalVarUninit[TE]: ; DIS-NEXT: 68: 00 00 00 00 -; DIS-NEXT: 0000000000000068: R_POS (idx: 5) .loadTLInit -; DIS-NEXT: 6c: 00 00 00 20 +; DIS-NEXT: 0000000000000068: R_TLS_LE (idx: 23) IThreadLocalVarUninit[UL] +; DIS-NEXT: 6c: 00 00 00 00 +; DIS: 0000000000000070 (idx: 17) ThreadLocalVarInit[TE]: ; DIS-NEXT: 70: 00 00 00 00 -; DIS-NEXT: 0000000000000070: R_POS (idx: 13) TOC[TC0] -; DIS-NEXT: 74: 00 00 00 80 -; DIS: 0000000000000080 (idx: 15) IThreadLocalVarUninit[TE]: -; DIS-NEXT: 80: 00 00 00 00 -; DIS-NEXT: 0000000000000080: R_TLS_LE (idx: 23) IThreadLocalVarUninit[UL] -; DIS-NEXT: 84: 00 00 00 00 -; DIS: 0000000000000088 (idx: 17) ThreadLocalVarInit[TE]: -; DIS-NEXT: 88: 00 00 00 00 -; DIS-NEXT: 0000000000000088: R_TLS_LE (idx: 21) ThreadLocalVarInit[TL] -; DIS-NEXT: 8c: 00 00 00 00 -; DIS: 0000000000000090 (idx: 19) VarInit[TE]: -; DIS-NEXT: 90: 00 00 00 00 -; DIS-NEXT: 0000000000000090: R_POS (idx: 7) VarInit[RW] -; DIS-NEXT: 94: 00 00 00 48 +; DIS-NEXT: 0000000000000070: R_TLS_LE (idx: 21) ThreadLocalVarInit[TL] +; DIS-NEXT: 74: 00 00 00 00 +; DIS: 0000000000000078 (idx: 19) VarInit[TE]: +; DIS-NEXT: 78: 00 00 00 00 +; DIS-NEXT: 0000000000000078: R_POS (idx: 7) VarInit[RW] +; DIS-NEXT: 7c: 00 00 00 30 ; DIS: Disassembly of section .tdata: ; DIS: 0000000000000000 (idx: 21) ThreadLocalVarInit[TL]: diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc.ll --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc.ll @@ -58,7 +58,7 @@ ; RELOC-NEXT: } ; RELOC-NEXT: Section (index: 2) .data { ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x38 +; RELOC-NEXT: Virtual Address: 0x30 ; RELOC-NEXT: Symbol: .storeITLUninit (3) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -66,7 +66,7 @@ ; RELOC-NEXT: Type: R_POS (0x0) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x40 +; RELOC-NEXT: Virtual Address: 0x38 ; RELOC-NEXT: Symbol: TOC (15) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -74,7 +74,7 @@ ; RELOC-NEXT: Type: R_POS (0x0) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x50 +; RELOC-NEXT: Virtual Address: 0x48 ; RELOC-NEXT: Symbol: .loadTLInit (5) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -82,7 +82,7 @@ ; RELOC-NEXT: Type: R_POS (0x0) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x58 +; RELOC-NEXT: Virtual Address: 0x50 ; RELOC-NEXT: Symbol: TOC (15) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -90,7 +90,7 @@ ; RELOC-NEXT: Type: R_POS (0x0) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x68 +; RELOC-NEXT: Virtual Address: 0x60 ; RELOC-NEXT: Symbol: IThreadLocalVarUninit (27) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -98,7 +98,7 @@ ; RELOC-NEXT: Type: R_TLS_LE (0x23) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x70 +; RELOC-NEXT: Virtual Address: 0x68 ; RELOC-NEXT: Symbol: ThreadLocalVarInit (25) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -106,7 +106,7 @@ ; RELOC-NEXT: Type: R_TLS_LE (0x23) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { -; RELOC-NEXT: Virtual Address: 0x78 +; RELOC-NEXT: Virtual Address: 0x70 ; RELOC-NEXT: Symbol: VarInit (9) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -141,7 +141,7 @@ ; SYM-NEXT: NumberOfAuxEntries: 1 ; SYM-NEXT: CSECT Auxiliary Entry { ; SYM-NEXT: Index: 2 -; SYM-NEXT: SectionLen: 48 +; SYM-NEXT: SectionLen: 44 ; SYM-NEXT: ParameterHashIndex: 0x0 ; SYM-NEXT: TypeChkSectNum: 0x0 ; SYM-NEXT: SymbolAlignmentLog2: 5 @@ -191,7 +191,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 7 ; SYM-NEXT: Name: .data -; SYM-NEXT: Value (RelocatableAddress): 0x30 +; SYM-NEXT: Value (RelocatableAddress): 0x2C ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) @@ -210,7 +210,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 9 ; SYM-NEXT: Name: VarInit -; SYM-NEXT: Value (RelocatableAddress): 0x30 +; SYM-NEXT: Value (RelocatableAddress): 0x2C ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_EXT (0x2) @@ -229,7 +229,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 11 ; SYM-NEXT: Name: storeITLUninit -; SYM-NEXT: Value (RelocatableAddress): 0x38 +; SYM-NEXT: Value (RelocatableAddress): 0x30 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_EXT (0x2) @@ -248,7 +248,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 13 ; SYM-NEXT: Name: loadTLInit -; SYM-NEXT: Value (RelocatableAddress): 0x50 +; SYM-NEXT: Value (RelocatableAddress): 0x48 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_EXT (0x2) @@ -267,7 +267,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 15 ; SYM-NEXT: Name: TOC -; SYM-NEXT: Value (RelocatableAddress): 0x68 +; SYM-NEXT: Value (RelocatableAddress): 0x60 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) @@ -286,7 +286,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 17 ; SYM-NEXT: Name: IThreadLocalVarUninit -; SYM-NEXT: Value (RelocatableAddress): 0x68 +; SYM-NEXT: Value (RelocatableAddress): 0x60 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) @@ -305,7 +305,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 19 ; SYM-NEXT: Name: ThreadLocalVarInit -; SYM-NEXT: Value (RelocatableAddress): 0x70 +; SYM-NEXT: Value (RelocatableAddress): 0x68 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) @@ -324,7 +324,7 @@ ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 21 ; SYM-NEXT: Name: VarInit -; SYM-NEXT: Value (RelocatableAddress): 0x78 +; SYM-NEXT: Value (RelocatableAddress): 0x70 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) @@ -404,47 +404,45 @@ ; DIS: 0000000000000000 (idx: 3) .storeITLUninit: ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 4, 0(2) ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 17) IThreadLocalVarUninit[TC] -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 4, 13, 4 -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 3, 0(4) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stwx 3, 4, 13 ; DIS-NEXT: blr ; DIS: 0000000000000010 (idx: 5) .loadTLInit: ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 3, 8(2) ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 19) ThreadLocalVarInit[TC] ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 4, 16(2) ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 21) VarInit[TC] -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 13, 3 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwzx 3, 3, 13 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 4, 0(4) -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 3, 0(3) ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 4, 3 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} extsw 3, 3 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} blr ; DIS: Disassembly of section .data: -; DIS: 0000000000000030 (idx: 9) VarInit: -; DIS-NEXT: 30: 00 00 00 57 -; DIS: 0000000000000038 (idx: 11) storeITLUninit[DS]: -; DIS-NEXT: 8: 00 00 00 00 -; DIS-NEXT: 0000000000000038: R_POS (idx: 3) .storeITLUninit -; DIS-NEXT: 3c: 00 00 00 00 -; DIS-NEXT: 40: 00 00 00 00 -; DIS-NEXT: 0000000000000040: R_POS (idx: 15) TOC[TC0] -; DIS-NEXT: 44: 00 00 00 68 -; DIS: 0000000000000050 (idx: 13) loadTLInit[DS]: +; DIS: 000000000000002c (idx: 9) VarInit: +; DIS-NEXT: 2c: 00 00 00 57 +; DIS: 0000000000000030 (idx: 11) storeITLUninit[DS]: +; DIS-NEXT: 30: 00 00 00 00 +; DIS-NEXT: 0000000000000030: R_POS (idx: 3) .storeITLUninit +; DIS-NEXT: 34: 00 00 00 00 +; DIS-NEXT: 38: 00 00 00 00 +; DIS-NEXT: 0000000000000038: R_POS (idx: 15) TOC[TC0] +; DIS-NEXT: 3c: 00 00 00 60 +; DIS: 0000000000000048 (idx: 13) loadTLInit[DS]: +; DIS-NEXT: 48: 00 00 00 00 +; DIS-NEXT: 0000000000000048: R_POS (idx: 5) .loadTLInit +; DIS-NEXT: 4c: 00 00 00 10 ; DIS-NEXT: 50: 00 00 00 00 -; DIS-NEXT: 0000000000000050: R_POS (idx: 5) .loadTLInit -; DIS-NEXT: 54: 00 00 00 10 -; DIS-NEXT: 58: 00 00 00 00 -; DIS-NEXT: 0000000000000058: R_POS (idx: 15) TOC[TC0] -; DIS-NEXT: 5c: 00 00 00 68 -; DIS: 0000000000000068 (idx: 17) IThreadLocalVarUninit[TC]: +; DIS-NEXT: 0000000000000050: R_POS (idx: 15) TOC[TC0] +; DIS-NEXT: 54: 00 00 00 60 +; DIS: 0000000000000060 (idx: 17) IThreadLocalVarUninit[TC]: +; DIS-NEXT: 60: 00 00 00 00 +; DIS-NEXT: 0000000000000060: R_TLS_LE (idx: 27) IThreadLocalVarUninit[UL] +; DIS: 0000000000000068 (idx: 19) ThreadLocalVarInit[TC]: ; DIS-NEXT: 68: 00 00 00 00 -; DIS-NEXT: 0000000000000068: R_TLS_LE (idx: 27) IThreadLocalVarUninit[UL] -; DIS: 0000000000000070 (idx: 19) ThreadLocalVarInit[TC]: +; DIS-NEXT: 0000000000000068: R_TLS_LE (idx: 25) ThreadLocalVarInit +; DIS: 0000000000000070 (idx: 21) VarInit[TC]: ; DIS-NEXT: 70: 00 00 00 00 -; DIS-NEXT: 0000000000000070: R_TLS_LE (idx: 25) ThreadLocalVarInit -; DIS: 0000000000000078 (idx: 21) VarInit[TC]: -; DIS-NEXT: 78: 00 00 00 00 -; DIS-NEXT: 0000000000000078: R_POS (idx: 9) VarInit +; DIS-NEXT: 0000000000000070: R_POS (idx: 9) VarInit ; DIS: Disassembly of section .tdata: ; DIS: 0000000000000000 (idx: 25) ThreadLocalVarInit: