diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -704,16 +704,87 @@ return false; } -bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) { - SDValue Base = ST->getBasePtr(); +// isThreadPointerAcquisitionNode - Check if the operands of an ADD_TLS +// instruction use the thread pointer. +static bool isThreadPointerAcquisitionNode(SDValue Base, SelectionDAG *CurDAG) { + assert( + Base.getOpcode() == PPCISD::ADD_TLS && + "Only expecting the ADD_TLS instruction to acquire the thread pointer!"); + const PPCSubtarget &Subtarget = + CurDAG->getMachineFunction().getSubtarget(); + + // Account for when ADD_TLS is used for the initial-exec TLS model on Linux. + // + // Although ADD_TLS does not explicitly use the thread pointer + // register when LD_GOT_TPREL_L is one of it's operands, the LD_GOT_TPREL_L + // instruction will have a relocation specifier, @got@tprel, that is used to + // generate a GOT entry. The linker replaces this entry with an offset for a + // for a thread local variable, which will be relative to the thread pointer. + if (Base.getOperand(0).getOpcode() == PPCISD::LD_GOT_TPREL_L) + return true; + // When using PC-Relative instructions for initial-exec, a MAT_PCREL_ADDR + // node is produced instead to represent the aforementioned situation. + LoadSDNode *LD = dyn_cast(Base.getOperand(0)); + if (LD && LD->getBasePtr().getOpcode() == PPCISD::MAT_PCREL_ADDR) + return true; + + // A GET_TPOINTER PPCISD node (only produced on AIX 32-bit mode) as an operand + // to ADD_TLS represents a call to .__get_tpointer to get the thread pointer, + // later returning it into R3. + if (Base.getOperand(0).getOpcode() == PPCISD::GET_TPOINTER) + return true; + + // The ADD_TLS note is explicitly acquiring the thread pointer (X13/R13). + RegisterSDNode *AddFirstOpReg = + dyn_cast_or_null(Base.getOperand(0).getNode()); + if (AddFirstOpReg) + if (AddFirstOpReg->getReg() == Subtarget.getThreadPointerRegister()) + return true; + + return false; +} + +// canOptimizeTLSDFormToXForm - Optimize TLS accesses when an ADD_TLS +// instruction is present. An ADD_TLS instruction, followed by a D-Form memory +// operation, can be optimized to use an X-Form load or store, allowing the +// ADD_TLS node to be removed completely. +static bool canOptimizeTLSDFormToXForm(SelectionDAG *CurDAG, SDValue Base, + uint64_t SrcValueOffset, + SDValue Offset) { + + // Do not do this transformation at -O0. + if (CurDAG->getTarget().getOptLevel() == CodeGenOpt::None) + return false; + + // In order to perform this optimization inside tryTLSXForm[Load|Store], + // Base is expected to be an ADD_TLS node. if (Base.getOpcode() != PPCISD::ADD_TLS) return false; - SDValue Offset = ST->getOffset(); - if (!Offset.isUndef()) + + // The optimization to convert the D-Form load/store into its X-Form + // counterpart should only occur if the source value offset of the load/ + // store is 0. This also means that The offset should always be undefined. + if (SrcValueOffset != 0 || !Offset.isUndef()) return false; + if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR) return false; + // Does the Base pointer of the load/store use the thread pointer? + // If the thread pointer is not used as one of the operands of ADD_TLS, + // then this optimization is not valid. + if (!isThreadPointerAcquisitionNode(Base, CurDAG)) + return false; + + return true; +} + +bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) { + SDValue Base = ST->getBasePtr(); + if (!canOptimizeTLSDFormToXForm(CurDAG, Base, ST->getSrcValueOffset(), + ST->getOffset())) + return false; + SDLoc dl(ST); EVT MemVT = ST->getMemoryVT(); EVT RegVT = ST->getValue().getValueType(); @@ -759,12 +830,8 @@ bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) { SDValue Base = LD->getBasePtr(); - if (Base.getOpcode() != PPCISD::ADD_TLS) - return false; - SDValue Offset = LD->getOffset(); - if (!Offset.isUndef()) - return false; - if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR) + if (!canOptimizeTLSDFormToXForm(CurDAG, Base, LD->getSrcValueOffset(), + LD->getOffset())) return false; SDLoc dl(LD); @@ -5426,9 +5493,10 @@ } case ISD::STORE: { - // Change TLS initial-exec D-form stores to X-form stores. + // Change TLS initial-exec (or TLS local-exec on AIX) D-form stores to + // X-form stores. StoreSDNode *ST = cast(N); - if (EnableTLSOpt && Subtarget->isELFv2ABI() && + if (EnableTLSOpt && (Subtarget->isELFv2ABI() || Subtarget->isAIXABI()) && ST->getAddressingMode() != ISD::PRE_INC) if (tryTLSXFormStore(ST)) return; @@ -5441,8 +5509,9 @@ // Normal loads are handled by code generated from the .td file. if (LD->getAddressingMode() != ISD::PRE_INC) { - // Change TLS initial-exec D-form loads to X-form loads. - if (EnableTLSOpt && Subtarget->isELFv2ABI()) + // Change TLS initial-exec (or TLS local-exec on AIX) D-form loads to + // X-form loads. + if (EnableTLSOpt && (Subtarget->isELFv2ABI() || Subtarget->isAIXABI())) if (tryTLSXFormLoad(LD)) return; break; diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -276,6 +276,12 @@ return IsPPC64 ? PPC::X2 : PPC::R2; } + MCRegister getThreadPointerRegister() const { + assert((is64BitELFABI() || isAIXABI()) && + "Should only be called for targets with a thread pointer register."); + return IsPPC64 ? PPC::X13 : PPC::R13; + } + MCRegister getStackPointerRegister() const { return IsPPC64 ? PPC::X1 : PPC::R1; } diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-O0.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-O0.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-O0.ll @@ -0,0 +1,525 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL64-O0 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \ +; RUN: | FileCheck %s --check-prefix=LARGE64-O0 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s \ +; RUN: --check-prefix=SMALL32-O0 +; RUN: llc -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -mtriple powerpc-ibm-aix-xcoff --code-model=large < %s \ +; RUN: | FileCheck %s --check-prefix=LARGE32-O0 + +@TLInt = internal thread_local(localexec) global i32 0, align 4 +@TLLongLong = internal thread_local(localexec) global i64 0, align 8 +@TLDouble = internal thread_local(localexec) global double 0.000000e+00, align 8 +@TLFloat = internal thread_local(localexec) global float 0.000000e+00, align 4 +declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1 + +define void @storeInt(i32 noundef %x) { +; SMALL64-O0-LABEL: storeInt: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; SMALL64-O0-NEXT: ld r4, L..C0(r2) # target-flags(ppc-tprel) @TLInt +; SMALL64-O0-NEXT: add r4, r13, r4 +; SMALL64-O0-NEXT: stw r3, 0(r4) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeInt: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; LARGE64-O0-NEXT: addis r4, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r4, L..C0@l(r4) +; LARGE64-O0-NEXT: add r4, r13, r4 +; LARGE64-O0-NEXT: stw r3, 0(r4) +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: storeInt: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: stw r3, 28(r1) # 4-byte Folded Spill +; SMALL32-O0-NEXT: lwz r5, L..C0(r2) # target-flags(ppc-tprel) @TLInt +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: mr r4, r3 +; SMALL32-O0-NEXT: lwz r3, 28(r1) # 4-byte Folded Reload +; SMALL32-O0-NEXT: add r4, r4, r5 +; SMALL32-O0-NEXT: stw r3, 0(r4) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: storeInt: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: stw r3, 28(r1) # 4-byte Folded Spill +; LARGE32-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE32-O0-NEXT: lwz r5, L..C0@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: mr r4, r3 +; LARGE32-O0-NEXT: lwz r3, 28(r1) # 4-byte Folded Reload +; LARGE32-O0-NEXT: add r4, r4, r5 +; LARGE32-O0-NEXT: stw r3, 0(r4) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLInt) + store i32 %x, ptr %0, align 4 + ret void +} + +define void @storeLongLong(i64 noundef %x) { +; SMALL64-O0-LABEL: storeLongLong: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r4, L..C1(r2) # target-flags(ppc-tprel) @TLLongLong +; SMALL64-O0-NEXT: add r4, r13, r4 +; SMALL64-O0-NEXT: std r3, 0(r4) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeLongLong: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r4, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r4, L..C1@l(r4) +; LARGE64-O0-NEXT: add r4, r13, r4 +; LARGE64-O0-NEXT: std r3, 0(r4) +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: storeLongLong: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: mr r5, r4 +; SMALL32-O0-NEXT: stw r3, 28(r1) # 4-byte Folded Spill +; SMALL32-O0-NEXT: # kill: def $r4 killed $r5 +; SMALL32-O0-NEXT: lwz r6, L..C1(r2) # target-flags(ppc-tprel) @TLLongLong +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: mr r4, r3 +; SMALL32-O0-NEXT: lwz r3, 28(r1) # 4-byte Folded Reload +; SMALL32-O0-NEXT: add r4, r4, r6 +; SMALL32-O0-NEXT: stw r5, 4(r4) +; SMALL32-O0-NEXT: stw r3, 0(r4) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: storeLongLong: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: mr r5, r4 +; LARGE32-O0-NEXT: stw r3, 28(r1) # 4-byte Folded Spill +; LARGE32-O0-NEXT: # kill: def $r4 killed $r5 +; LARGE32-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE32-O0-NEXT: lwz r6, L..C1@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: mr r4, r3 +; LARGE32-O0-NEXT: lwz r3, 28(r1) # 4-byte Folded Reload +; LARGE32-O0-NEXT: add r4, r4, r6 +; LARGE32-O0-NEXT: stw r5, 4(r4) +; LARGE32-O0-NEXT: stw r3, 0(r4) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @TLLongLong) + store i64 %x, ptr %0, align 8 + ret void +} + +define void @storeDouble(double noundef %x) { +; SMALL64-O0-LABEL: storeDouble: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @TLDouble +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: stxsdx f1, 0, r3 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeDouble: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C2@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: stxsdx f1, 0, r3 +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: storeDouble: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @TLDouble +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: stfd f1, 0(r3) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: storeDouble: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C2@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: stfd f1, 0(r3) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @TLDouble) + store double %x, ptr %0, align 8 + ret void +} + +define void @storeFloat(float noundef %x) { +; SMALL64-O0-LABEL: storeFloat: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @TLFloat +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: stfs f1, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: storeFloat: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C3@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: stfs f1, 0(r3) +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: storeFloat: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @TLFloat +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: stfs f1, 0(r3) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: storeFloat: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C3@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: stfs f1, 0(r3) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLFloat) + store float %x, ptr %0, align 4 + ret void +} + +define i32 @loadInt() { +; SMALL64-O0-LABEL: loadInt: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @TLInt +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lwz r3, 0(r3) +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadInt: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C0@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lwz r3, 0(r3) +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadInt: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @TLInt +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lwz r3, 0(r3) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadInt: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C0@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C0@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lwz r3, 0(r3) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLInt) + %1 = load i32, ptr %0, align 4 + ret i32 %1 +} + +define i32 @loadLongLong() { +; SMALL64-O0-LABEL: loadLongLong: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @TLLongLong +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: ld r3, 0(r3) +; SMALL64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; SMALL64-O0-NEXT: clrldi r3, r3, 32 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadLongLong: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C1@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: ld r3, 0(r3) +; LARGE64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; LARGE64-O0-NEXT: clrldi r3, r3, 32 +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadLongLong: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @TLLongLong +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lwz r3, 4(r3) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadLongLong: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C1@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C1@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lwz r3, 4(r3) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @TLLongLong) + %1 = load i64, ptr %0, align 8 + %conv = trunc i64 %1 to i32 + ret i32 %conv +} + +define i32 @loadDouble() { +; SMALL64-O0-LABEL: loadDouble: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @TLDouble +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lfd f0, 0(r3) +; SMALL64-O0-NEXT: # kill: def $f1 killed $f0 +; SMALL64-O0-NEXT: xscvdpsxws f0, f0 +; SMALL64-O0-NEXT: addi r3, r1, -12 +; SMALL64-O0-NEXT: stfiwx f0, 0, r3 +; SMALL64-O0-NEXT: lwz r3, -12(r1) +; SMALL64-O0-NEXT: clrldi r3, r3, 32 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadDouble: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C2@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lfd f0, 0(r3) +; LARGE64-O0-NEXT: # kill: def $f1 killed $f0 +; LARGE64-O0-NEXT: xscvdpsxws f0, f0 +; LARGE64-O0-NEXT: addi r3, r1, -12 +; LARGE64-O0-NEXT: stfiwx f0, 0, r3 +; LARGE64-O0-NEXT: lwz r3, -12(r1) +; LARGE64-O0-NEXT: clrldi r3, r3, 32 +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadDouble: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @TLDouble +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lfd f0, 0(r3) +; SMALL32-O0-NEXT: xscvdpsxws f0, f0 +; SMALL32-O0-NEXT: addi r3, r1, 28 +; SMALL32-O0-NEXT: stfiwx f0, 0, r3 +; SMALL32-O0-NEXT: lwz r3, 28(r1) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadDouble: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C2@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C2@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lfd f0, 0(r3) +; LARGE32-O0-NEXT: xscvdpsxws f0, f0 +; LARGE32-O0-NEXT: addi r3, r1, 28 +; LARGE32-O0-NEXT: stfiwx f0, 0, r3 +; LARGE32-O0-NEXT: lwz r3, 28(r1) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr +entry: + %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @TLDouble) + %1 = load double, ptr %0, align 8 + %conv = fptosi double %1 to i32 + ret i32 %conv +} + +define i32 @loadFloat() { +; SMALL64-O0-LABEL: loadFloat: +; SMALL64-O0: # %bb.0: # %entry +; SMALL64-O0-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @TLFloat +; SMALL64-O0-NEXT: add r3, r13, r3 +; SMALL64-O0-NEXT: lfs f0, 0(r3) +; SMALL64-O0-NEXT: fctiwz f0, f0 +; SMALL64-O0-NEXT: stfd f0, -8(r1) +; SMALL64-O0-NEXT: lwa r3, -4(r1) +; SMALL64-O0-NEXT: clrldi r3, r3, 32 +; SMALL64-O0-NEXT: blr +; +; LARGE64-O0-LABEL: loadFloat: +; LARGE64-O0: # %bb.0: # %entry +; LARGE64-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE64-O0-NEXT: ld r3, L..C3@l(r3) +; LARGE64-O0-NEXT: add r3, r13, r3 +; LARGE64-O0-NEXT: lfs f0, 0(r3) +; LARGE64-O0-NEXT: fctiwz f0, f0 +; LARGE64-O0-NEXT: stfd f0, -8(r1) +; LARGE64-O0-NEXT: lwa r3, -4(r1) +; LARGE64-O0-NEXT: clrldi r3, r3, 32 +; LARGE64-O0-NEXT: blr +; +; SMALL32-O0-LABEL: loadFloat: +; SMALL32-O0: # %bb.0: # %entry +; SMALL32-O0-NEXT: mflr r0 +; SMALL32-O0-NEXT: stwu r1, -32(r1) +; SMALL32-O0-NEXT: stw r0, 40(r1) +; SMALL32-O0-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @TLFloat +; SMALL32-O0-NEXT: bla .__get_tpointer[PR] +; SMALL32-O0-NEXT: add r3, r3, r4 +; SMALL32-O0-NEXT: lfs f0, 0(r3) +; SMALL32-O0-NEXT: xscvdpsxws f0, f0 +; SMALL32-O0-NEXT: addi r3, r1, 28 +; SMALL32-O0-NEXT: stfiwx f0, 0, r3 +; SMALL32-O0-NEXT: lwz r3, 28(r1) +; SMALL32-O0-NEXT: addi r1, r1, 32 +; SMALL32-O0-NEXT: lwz r0, 8(r1) +; SMALL32-O0-NEXT: mtlr r0 +; SMALL32-O0-NEXT: blr +; +; LARGE32-O0-LABEL: loadFloat: +; LARGE32-O0: # %bb.0: # %entry +; LARGE32-O0-NEXT: mflr r0 +; LARGE32-O0-NEXT: stwu r1, -32(r1) +; LARGE32-O0-NEXT: stw r0, 40(r1) +; LARGE32-O0-NEXT: addis r3, L..C3@u(r2) +; LARGE32-O0-NEXT: lwz r4, L..C3@l(r3) +; LARGE32-O0-NEXT: bla .__get_tpointer[PR] +; LARGE32-O0-NEXT: add r3, r3, r4 +; LARGE32-O0-NEXT: lfs f0, 0(r3) +; LARGE32-O0-NEXT: xscvdpsxws f0, f0 +; LARGE32-O0-NEXT: addi r3, r1, 28 +; LARGE32-O0-NEXT: stfiwx f0, 0, r3 +; LARGE32-O0-NEXT: lwz r3, 28(r1) +; LARGE32-O0-NEXT: addi r1, r1, 32 +; LARGE32-O0-NEXT: lwz r0, 8(r1) +; LARGE32-O0-NEXT: mtlr r0 +; LARGE32-O0-NEXT: blr +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLFloat) + %1 = load float, ptr %0, align 4 + %conv = fptosi float %1 to i32 + ret i32 %conv +} + +; TOC Entry Checks. + +; SMALL64-O0-LABEL: .toc +; SMALL64-O0-LABEL:L..C0: +; SMALL64-O0-NEXT: .tc TLInt[TC],TLInt[UL]@le +; SMALL64-O0-LABEL:L..C1: +; SMALL64-O0-NEXT: .tc TLLongLong[TC],TLLongLong[UL]@le +; SMALL64-O0-LABEL:L..C2: +; SMALL64-O0-NEXT: .tc TLDouble[TC],TLDouble[UL]@le +; SMALL64-O0-LABEL:L..C3: +; SMALL64-O0-NEXT: .tc TLFloat[TC],TLFloat[UL]@le + +; LARGE64-O0-LABEL: .toc +; LARGE64-O0-LABEL:L..C0: +; LARGE64-O0-NEXT: .tc TLInt[TE],TLInt[UL]@le +; LARGE64-O0-LABEL:L..C1: +; LARGE64-O0-NEXT: .tc TLLongLong[TE],TLLongLong[UL]@le +; LARGE64-O0-LABEL:L..C2: +; LARGE64-O0-NEXT: .tc TLDouble[TE],TLDouble[UL]@le +; LARGE64-O0-LABEL:L..C3: +; LARGE64-O0-NEXT: .tc TLFloat[TE],TLFloat[UL]@le + +; SMALL32-O0-LABEL: .toc +; SMALL32-O0-LABEL:L..C0: +; SMALL32-O0-NEXT: .tc TLInt[TC],TLInt[UL]@le +; SMALL32-O0-LABEL:L..C1: +; SMALL32-O0-NEXT: .tc TLLongLong[TC],TLLongLong[UL]@le +; SMALL32-O0-LABEL:L..C2: +; SMALL32-O0-NEXT: .tc TLDouble[TC],TLDouble[UL]@le +; SMALL32-O0-LABEL:L..C3: +; SMALL32-O0-NEXT: .tc TLFloat[TC],TLFloat[UL]@le + +; LARGE32-O0-LABEL: .toc +; LARGE32-O0-LABEL:L..C0: +; LARGE32-O0-NEXT: .tc TLInt[TE],TLInt[UL]@le +; LARGE32-O0-LABEL:L..C1: +; LARGE32-O0-NEXT: .tc TLLongLong[TE],TLLongLong[UL]@le +; LARGE32-O0-LABEL:L..C2: +; LARGE32-O0-NEXT: .tc TLDouble[TE],TLDouble[UL]@le +; LARGE32-O0-LABEL:L..C3: +; LARGE32-O0-NEXT: .tc TLFloat[TE],TLFloat[UL]@le + diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-double.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-double.ll --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-double.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-double.ll @@ -23,16 +23,14 @@ ; SMALL64-LABEL: storeITLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: stfd f1, 0(r3) +; SMALL64-NEXT: stfdx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeITLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C0@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: stfd f1, 0(r3) +; LARGE64-NEXT: stfdx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeITLUninit: @@ -42,8 +40,7 @@ ; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: stfd f1, 0(r3) +; SMALL32-NEXT: stfdx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -57,8 +54,7 @@ ; LARGE32-NEXT: addis r3, L..C0@u(r2) ; LARGE32-NEXT: lwz r4, L..C0@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: stfd f1, 0(r3) +; LARGE32-NEXT: stfdx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -73,16 +69,14 @@ ; SMALL64-LABEL: storeITLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: stfd f1, 0(r3) +; SMALL64-NEXT: stfdx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeITLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C1@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: stfd f1, 0(r3) +; LARGE64-NEXT: stfdx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeITLInit: @@ -92,8 +86,7 @@ ; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: stfd f1, 0(r3) +; SMALL32-NEXT: stfdx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -107,8 +100,7 @@ ; LARGE32-NEXT: addis r3, L..C1@u(r2) ; LARGE32-NEXT: lwz r4, L..C1@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: stfd f1, 0(r3) +; LARGE32-NEXT: stfdx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -123,16 +115,14 @@ ; SMALL64-LABEL: storeTLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: stfd f1, 0(r3) +; SMALL64-NEXT: stfdx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeTLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C2@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: stfd f1, 0(r3) +; LARGE64-NEXT: stfdx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeTLUninit: @@ -142,8 +132,7 @@ ; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: stfd f1, 0(r3) +; SMALL32-NEXT: stfdx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -157,8 +146,7 @@ ; LARGE32-NEXT: addis r3, L..C2@u(r2) ; LARGE32-NEXT: lwz r4, L..C2@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: stfd f1, 0(r3) +; LARGE32-NEXT: stfdx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -173,16 +161,14 @@ ; SMALL64-LABEL: storeTLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: stfd f1, 0(r3) +; SMALL64-NEXT: stfdx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeTLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C3@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: stfd f1, 0(r3) +; LARGE64-NEXT: stfdx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeTLInit: @@ -192,8 +178,7 @@ ; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: stfd f1, 0(r3) +; SMALL32-NEXT: stfdx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -207,8 +192,7 @@ ; LARGE32-NEXT: addis r3, L..C3@u(r2) ; LARGE32-NEXT: lwz r4, L..C3@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: stfd f1, 0(r3) +; LARGE32-NEXT: stfdx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -223,16 +207,14 @@ ; SMALL64-LABEL: loadITLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lfd f1, 0(r3) +; SMALL64-NEXT: lfdx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadITLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C0@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lfd f1, 0(r3) +; LARGE64-NEXT: lfdx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadITLUninit: @@ -240,10 +222,9 @@ ; SMALL32-NEXT: mflr r0 ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lfd f1, 0(r3) +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfdx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -257,8 +238,7 @@ ; LARGE32-NEXT: addis r3, L..C0@u(r2) ; LARGE32-NEXT: lwz r4, L..C0@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfd f1, 0(r3) +; LARGE32-NEXT: lfdx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -274,9 +254,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lfdx f0, r13, r3 ; SMALL64-NEXT: lfd f1, 0(r4) -; SMALL64-NEXT: lfd f0, 0(r3) ; SMALL64-NEXT: xsadddp f1, f0, f1 ; SMALL64-NEXT: blr ; @@ -286,9 +265,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lfdx f0, r13, r3 ; LARGE64-NEXT: lfd f1, 0(r4) -; LARGE64-NEXT: lfd f0, 0(r3) ; LARGE64-NEXT: xsadddp f1, f0, f1 ; LARGE64-NEXT: blr ; @@ -298,11 +276,10 @@ ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfdx f0, r3, r4 ; SMALL32-NEXT: lfd f1, 0(r5) -; SMALL32-NEXT: lfd f0, 0(r3) ; SMALL32-NEXT: xsadddp f1, f0, f1 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -317,8 +294,7 @@ ; LARGE32-NEXT: addis r3, L..C0@u(r2) ; LARGE32-NEXT: lwz r4, L..C0@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfd f0, 0(r3) +; LARGE32-NEXT: lfdx f0, r3, r4 ; LARGE32-NEXT: addis r3, L..C4@u(r2) ; LARGE32-NEXT: lwz r3, L..C4@l(r3) ; LARGE32-NEXT: lfd f1, 0(r3) @@ -339,16 +315,14 @@ ; SMALL64-LABEL: loadITLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lfd f1, 0(r3) +; SMALL64-NEXT: lfdx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadITLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C1@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lfd f1, 0(r3) +; LARGE64-NEXT: lfdx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadITLInit: @@ -356,10 +330,9 @@ ; SMALL32-NEXT: mflr r0 ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lfd f1, 0(r3) +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfdx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -373,8 +346,7 @@ ; LARGE32-NEXT: addis r3, L..C1@u(r2) ; LARGE32-NEXT: lwz r4, L..C1@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfd f1, 0(r3) +; LARGE32-NEXT: lfdx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -390,9 +362,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lfdx f0, r13, r3 ; SMALL64-NEXT: lfd f1, 0(r4) -; SMALL64-NEXT: lfd f0, 0(r3) ; SMALL64-NEXT: xsadddp f1, f0, f1 ; SMALL64-NEXT: blr ; @@ -402,9 +373,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lfdx f0, r13, r3 ; LARGE64-NEXT: lfd f1, 0(r4) -; LARGE64-NEXT: lfd f0, 0(r3) ; LARGE64-NEXT: xsadddp f1, f0, f1 ; LARGE64-NEXT: blr ; @@ -414,11 +384,10 @@ ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfdx f0, r3, r4 ; SMALL32-NEXT: lfd f1, 0(r5) -; SMALL32-NEXT: lfd f0, 0(r3) ; SMALL32-NEXT: xsadddp f1, f0, f1 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -433,8 +402,7 @@ ; LARGE32-NEXT: addis r3, L..C1@u(r2) ; LARGE32-NEXT: lwz r4, L..C1@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfd f0, 0(r3) +; LARGE32-NEXT: lfdx f0, r3, r4 ; LARGE32-NEXT: addis r3, L..C4@u(r2) ; LARGE32-NEXT: lwz r3, L..C4@l(r3) ; LARGE32-NEXT: lfd f1, 0(r3) @@ -455,16 +423,14 @@ ; SMALL64-LABEL: loadTLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lfd f1, 0(r3) +; SMALL64-NEXT: lfdx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadTLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C2@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lfd f1, 0(r3) +; LARGE64-NEXT: lfdx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadTLUninit: @@ -472,10 +438,9 @@ ; SMALL32-NEXT: mflr r0 ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lfd f1, 0(r3) +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfdx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -489,8 +454,7 @@ ; LARGE32-NEXT: addis r3, L..C2@u(r2) ; LARGE32-NEXT: lwz r4, L..C2@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfd f1, 0(r3) +; LARGE32-NEXT: lfdx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -506,9 +470,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lfdx f0, r13, r3 ; SMALL64-NEXT: lfd f1, 0(r4) -; SMALL64-NEXT: lfd f0, 0(r3) ; SMALL64-NEXT: xsadddp f1, f0, f1 ; SMALL64-NEXT: blr ; @@ -518,9 +481,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lfdx f0, r13, r3 ; LARGE64-NEXT: lfd f1, 0(r4) -; LARGE64-NEXT: lfd f0, 0(r3) ; LARGE64-NEXT: xsadddp f1, f0, f1 ; LARGE64-NEXT: blr ; @@ -530,11 +492,10 @@ ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfdx f0, r3, r4 ; SMALL32-NEXT: lfd f1, 0(r5) -; SMALL32-NEXT: lfd f0, 0(r3) ; SMALL32-NEXT: xsadddp f1, f0, f1 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -549,8 +510,7 @@ ; LARGE32-NEXT: addis r3, L..C2@u(r2) ; LARGE32-NEXT: lwz r4, L..C2@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfd f0, 0(r3) +; LARGE32-NEXT: lfdx f0, r3, r4 ; LARGE32-NEXT: addis r3, L..C4@u(r2) ; LARGE32-NEXT: lwz r3, L..C4@l(r3) ; LARGE32-NEXT: lfd f1, 0(r3) @@ -571,16 +531,14 @@ ; SMALL64-LABEL: loadTLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lfd f1, 0(r3) +; SMALL64-NEXT: lfdx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadTLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C3@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lfd f1, 0(r3) +; LARGE64-NEXT: lfdx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadTLInit: @@ -588,10 +546,9 @@ ; SMALL32-NEXT: mflr r0 ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lfd f1, 0(r3) +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfdx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -605,8 +562,7 @@ ; LARGE32-NEXT: addis r3, L..C3@u(r2) ; LARGE32-NEXT: lwz r4, L..C3@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfd f1, 0(r3) +; LARGE32-NEXT: lfdx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -622,9 +578,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lfdx f0, r13, r3 ; SMALL64-NEXT: lfd f1, 0(r4) -; SMALL64-NEXT: lfd f0, 0(r3) ; SMALL64-NEXT: xsadddp f1, f0, f1 ; SMALL64-NEXT: blr ; @@ -634,9 +589,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lfdx f0, r13, r3 ; LARGE64-NEXT: lfd f1, 0(r4) -; LARGE64-NEXT: lfd f0, 0(r3) ; LARGE64-NEXT: xsadddp f1, f0, f1 ; LARGE64-NEXT: blr ; @@ -646,11 +600,10 @@ ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfdx f0, r3, r4 ; SMALL32-NEXT: lfd f1, 0(r5) -; SMALL32-NEXT: lfd f0, 0(r3) ; SMALL32-NEXT: xsadddp f1, f0, f1 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -665,8 +618,7 @@ ; LARGE32-NEXT: addis r3, L..C3@u(r2) ; LARGE32-NEXT: lwz r4, L..C3@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfd f0, 0(r3) +; LARGE32-NEXT: lfdx f0, r3, r4 ; LARGE32-NEXT: addis r3, L..C4@u(r2) ; LARGE32-NEXT: lwz r3, L..C4@l(r3) ; LARGE32-NEXT: lfd f1, 0(r3) diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-float.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-float.ll --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-float.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-float.ll @@ -23,16 +23,14 @@ ; SMALL64-LABEL: storeITLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: stfs f1, 0(r3) +; SMALL64-NEXT: stfsx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeITLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C0@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: stfs f1, 0(r3) +; LARGE64-NEXT: stfsx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeITLUninit: @@ -42,8 +40,7 @@ ; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: stfs f1, 0(r3) +; SMALL32-NEXT: stfsx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -57,8 +54,7 @@ ; LARGE32-NEXT: addis r3, L..C0@u(r2) ; LARGE32-NEXT: lwz r4, L..C0@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: stfs f1, 0(r3) +; LARGE32-NEXT: stfsx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -73,16 +69,14 @@ ; SMALL64-LABEL: storeITLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: stfs f1, 0(r3) +; SMALL64-NEXT: stfsx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeITLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C1@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: stfs f1, 0(r3) +; LARGE64-NEXT: stfsx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeITLInit: @@ -92,8 +86,7 @@ ; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: stfs f1, 0(r3) +; SMALL32-NEXT: stfsx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -107,8 +100,7 @@ ; LARGE32-NEXT: addis r3, L..C1@u(r2) ; LARGE32-NEXT: lwz r4, L..C1@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: stfs f1, 0(r3) +; LARGE32-NEXT: stfsx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -123,16 +115,14 @@ ; SMALL64-LABEL: storeTLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: stfs f1, 0(r3) +; SMALL64-NEXT: stfsx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeTLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C2@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: stfs f1, 0(r3) +; LARGE64-NEXT: stfsx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeTLUninit: @@ -142,8 +132,7 @@ ; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: stfs f1, 0(r3) +; SMALL32-NEXT: stfsx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -157,8 +146,7 @@ ; LARGE32-NEXT: addis r3, L..C2@u(r2) ; LARGE32-NEXT: lwz r4, L..C2@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: stfs f1, 0(r3) +; LARGE32-NEXT: stfsx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -173,16 +161,14 @@ ; SMALL64-LABEL: storeTLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: stfs f1, 0(r3) +; SMALL64-NEXT: stfsx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeTLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C3@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: stfs f1, 0(r3) +; LARGE64-NEXT: stfsx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeTLInit: @@ -192,8 +178,7 @@ ; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: stfs f1, 0(r3) +; SMALL32-NEXT: stfsx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -207,8 +192,7 @@ ; LARGE32-NEXT: addis r3, L..C3@u(r2) ; LARGE32-NEXT: lwz r4, L..C3@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: stfs f1, 0(r3) +; LARGE32-NEXT: stfsx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -223,16 +207,14 @@ ; SMALL64-LABEL: loadITLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lfs f1, 0(r3) +; SMALL64-NEXT: lfsx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadITLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C0@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lfs f1, 0(r3) +; LARGE64-NEXT: lfsx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadITLUninit: @@ -240,10 +222,9 @@ ; SMALL32-NEXT: mflr r0 ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lfs f1, 0(r3) +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfsx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -257,8 +238,7 @@ ; LARGE32-NEXT: addis r3, L..C0@u(r2) ; LARGE32-NEXT: lwz r4, L..C0@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfs f1, 0(r3) +; LARGE32-NEXT: lfsx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -274,9 +254,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lfsx f0, r13, r3 ; SMALL64-NEXT: lfs f1, 0(r4) -; SMALL64-NEXT: lfs f0, 0(r3) ; SMALL64-NEXT: fadds f1, f0, f1 ; SMALL64-NEXT: blr ; @@ -286,9 +265,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lfsx f0, r13, r3 ; LARGE64-NEXT: lfs f1, 0(r4) -; LARGE64-NEXT: lfs f0, 0(r3) ; LARGE64-NEXT: fadds f1, f0, f1 ; LARGE64-NEXT: blr ; @@ -298,11 +276,10 @@ ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfsx f0, r3, r4 ; SMALL32-NEXT: lfs f1, 0(r5) -; SMALL32-NEXT: lfs f0, 0(r3) ; SMALL32-NEXT: fadds f1, f0, f1 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -317,8 +294,7 @@ ; LARGE32-NEXT: addis r3, L..C0@u(r2) ; LARGE32-NEXT: lwz r4, L..C0@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfs f0, 0(r3) +; LARGE32-NEXT: lfsx f0, r3, r4 ; LARGE32-NEXT: addis r3, L..C4@u(r2) ; LARGE32-NEXT: lwz r3, L..C4@l(r3) ; LARGE32-NEXT: lfs f1, 0(r3) @@ -339,16 +315,14 @@ ; SMALL64-LABEL: loadITLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lfs f1, 0(r3) +; SMALL64-NEXT: lfsx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadITLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C1@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lfs f1, 0(r3) +; LARGE64-NEXT: lfsx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadITLInit: @@ -356,10 +330,9 @@ ; SMALL32-NEXT: mflr r0 ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lfs f1, 0(r3) +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfsx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -373,8 +346,7 @@ ; LARGE32-NEXT: addis r3, L..C1@u(r2) ; LARGE32-NEXT: lwz r4, L..C1@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfs f1, 0(r3) +; LARGE32-NEXT: lfsx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -390,9 +362,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lfsx f0, r13, r3 ; SMALL64-NEXT: lfs f1, 0(r4) -; SMALL64-NEXT: lfs f0, 0(r3) ; SMALL64-NEXT: fadds f1, f0, f1 ; SMALL64-NEXT: blr ; @@ -402,9 +373,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lfsx f0, r13, r3 ; LARGE64-NEXT: lfs f1, 0(r4) -; LARGE64-NEXT: lfs f0, 0(r3) ; LARGE64-NEXT: fadds f1, f0, f1 ; LARGE64-NEXT: blr ; @@ -414,11 +384,10 @@ ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfsx f0, r3, r4 ; SMALL32-NEXT: lfs f1, 0(r5) -; SMALL32-NEXT: lfs f0, 0(r3) ; SMALL32-NEXT: fadds f1, f0, f1 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -433,8 +402,7 @@ ; LARGE32-NEXT: addis r3, L..C1@u(r2) ; LARGE32-NEXT: lwz r4, L..C1@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfs f0, 0(r3) +; LARGE32-NEXT: lfsx f0, r3, r4 ; LARGE32-NEXT: addis r3, L..C4@u(r2) ; LARGE32-NEXT: lwz r3, L..C4@l(r3) ; LARGE32-NEXT: lfs f1, 0(r3) @@ -455,16 +423,14 @@ ; SMALL64-LABEL: loadTLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lfs f1, 0(r3) +; SMALL64-NEXT: lfsx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadTLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C2@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lfs f1, 0(r3) +; LARGE64-NEXT: lfsx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadTLUninit: @@ -472,10 +438,9 @@ ; SMALL32-NEXT: mflr r0 ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lfs f1, 0(r3) +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfsx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -489,8 +454,7 @@ ; LARGE32-NEXT: addis r3, L..C2@u(r2) ; LARGE32-NEXT: lwz r4, L..C2@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfs f1, 0(r3) +; LARGE32-NEXT: lfsx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -506,9 +470,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lfsx f0, r13, r3 ; SMALL64-NEXT: lfs f1, 0(r4) -; SMALL64-NEXT: lfs f0, 0(r3) ; SMALL64-NEXT: fadds f1, f0, f1 ; SMALL64-NEXT: blr ; @@ -518,9 +481,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lfsx f0, r13, r3 ; LARGE64-NEXT: lfs f1, 0(r4) -; LARGE64-NEXT: lfs f0, 0(r3) ; LARGE64-NEXT: fadds f1, f0, f1 ; LARGE64-NEXT: blr ; @@ -530,11 +492,10 @@ ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfsx f0, r3, r4 ; SMALL32-NEXT: lfs f1, 0(r5) -; SMALL32-NEXT: lfs f0, 0(r3) ; SMALL32-NEXT: fadds f1, f0, f1 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -549,8 +510,7 @@ ; LARGE32-NEXT: addis r3, L..C2@u(r2) ; LARGE32-NEXT: lwz r4, L..C2@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfs f0, 0(r3) +; LARGE32-NEXT: lfsx f0, r3, r4 ; LARGE32-NEXT: addis r3, L..C4@u(r2) ; LARGE32-NEXT: lwz r3, L..C4@l(r3) ; LARGE32-NEXT: lfs f1, 0(r3) @@ -571,16 +531,14 @@ ; SMALL64-LABEL: loadTLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lfs f1, 0(r3) +; SMALL64-NEXT: lfsx f1, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadTLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C3@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lfs f1, 0(r3) +; LARGE64-NEXT: lfsx f1, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadTLInit: @@ -588,10 +546,9 @@ ; SMALL32-NEXT: mflr r0 ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lfs f1, 0(r3) +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfsx f1, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -605,8 +562,7 @@ ; LARGE32-NEXT: addis r3, L..C3@u(r2) ; LARGE32-NEXT: lwz r4, L..C3@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfs f1, 0(r3) +; LARGE32-NEXT: lfsx f1, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -622,9 +578,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lfsx f0, r13, r3 ; SMALL64-NEXT: lfs f1, 0(r4) -; SMALL64-NEXT: lfs f0, 0(r3) ; SMALL64-NEXT: fadds f1, f0, f1 ; SMALL64-NEXT: blr ; @@ -634,9 +589,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lfsx f0, r13, r3 ; LARGE64-NEXT: lfs f1, 0(r4) -; LARGE64-NEXT: lfs f0, 0(r3) ; LARGE64-NEXT: fadds f1, f0, f1 ; LARGE64-NEXT: blr ; @@ -646,11 +600,10 @@ ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lfsx f0, r3, r4 ; SMALL32-NEXT: lfs f1, 0(r5) -; SMALL32-NEXT: lfs f0, 0(r3) ; SMALL32-NEXT: fadds f1, f0, f1 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -665,8 +618,7 @@ ; LARGE32-NEXT: addis r3, L..C3@u(r2) ; LARGE32-NEXT: lwz r4, L..C3@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lfs f0, 0(r3) +; LARGE32-NEXT: lfsx f0, r3, r4 ; LARGE32-NEXT: addis r3, L..C4@u(r2) ; LARGE32-NEXT: lwz r3, L..C4@l(r3) ; LARGE32-NEXT: lfs f1, 0(r3) diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-int.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-int.ll --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-int.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-int.ll @@ -23,16 +23,14 @@ ; SMALL64-LABEL: storeITLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: stw r3, 0(r4) +; SMALL64-NEXT: stwx r3, r13, r4 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeITLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C0@u(r2) ; LARGE64-NEXT: ld r4, L..C0@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: stw r3, 0(r4) +; LARGE64-NEXT: stwx r3, r13, r4 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeITLUninit: @@ -43,8 +41,7 @@ ; SMALL32-NEXT: mr r4, r3 ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r5 -; SMALL32-NEXT: stw r4, 0(r3) +; SMALL32-NEXT: stwx r4, r3, r5 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -59,8 +56,7 @@ ; LARGE32-NEXT: addis r3, L..C0@u(r2) ; LARGE32-NEXT: lwz r5, L..C0@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r5 -; LARGE32-NEXT: stw r4, 0(r3) +; LARGE32-NEXT: stwx r4, r3, r5 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -75,16 +71,14 @@ ; SMALL64-LABEL: storeITLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: stw r3, 0(r4) +; SMALL64-NEXT: stwx r3, r13, r4 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeITLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C1@u(r2) ; LARGE64-NEXT: ld r4, L..C1@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: stw r3, 0(r4) +; LARGE64-NEXT: stwx r3, r13, r4 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeITLInit: @@ -95,8 +89,7 @@ ; SMALL32-NEXT: mr r4, r3 ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r5 -; SMALL32-NEXT: stw r4, 0(r3) +; SMALL32-NEXT: stwx r4, r3, r5 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -111,8 +104,7 @@ ; LARGE32-NEXT: addis r3, L..C1@u(r2) ; LARGE32-NEXT: lwz r5, L..C1@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r5 -; LARGE32-NEXT: stw r4, 0(r3) +; LARGE32-NEXT: stwx r4, r3, r5 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -127,16 +119,14 @@ ; SMALL64-LABEL: storeTLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: stw r3, 0(r4) +; SMALL64-NEXT: stwx r3, r13, r4 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeTLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C2@u(r2) ; LARGE64-NEXT: ld r4, L..C2@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: stw r3, 0(r4) +; LARGE64-NEXT: stwx r3, r13, r4 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeTLUninit: @@ -147,8 +137,7 @@ ; SMALL32-NEXT: mr r4, r3 ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r5 -; SMALL32-NEXT: stw r4, 0(r3) +; SMALL32-NEXT: stwx r4, r3, r5 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -163,8 +152,7 @@ ; LARGE32-NEXT: addis r3, L..C2@u(r2) ; LARGE32-NEXT: lwz r5, L..C2@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r5 -; LARGE32-NEXT: stw r4, 0(r3) +; LARGE32-NEXT: stwx r4, r3, r5 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -179,16 +167,14 @@ ; SMALL64-LABEL: storeTLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: stw r3, 0(r4) +; SMALL64-NEXT: stwx r3, r13, r4 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeTLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C3@u(r2) ; LARGE64-NEXT: ld r4, L..C3@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: stw r3, 0(r4) +; LARGE64-NEXT: stwx r3, r13, r4 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeTLInit: @@ -199,8 +185,7 @@ ; SMALL32-NEXT: mr r4, r3 ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r5 -; SMALL32-NEXT: stw r4, 0(r3) +; SMALL32-NEXT: stwx r4, r3, r5 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -215,8 +200,7 @@ ; LARGE32-NEXT: addis r3, L..C3@u(r2) ; LARGE32-NEXT: lwz r5, L..C3@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r5 -; LARGE32-NEXT: stw r4, 0(r3) +; LARGE32-NEXT: stwx r4, r3, r5 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -231,16 +215,14 @@ ; SMALL64-LABEL: loadITLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lwa r3, 0(r3) +; SMALL64-NEXT: lwax r3, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadITLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C0@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lwa r3, 0(r3) +; LARGE64-NEXT: lwax r3, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadITLUninit: @@ -248,10 +230,9 @@ ; SMALL32-NEXT: mflr r0 ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lwz r3, 0(r3) +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lwzx r3, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -265,8 +246,7 @@ ; LARGE32-NEXT: addis r3, L..C0@u(r2) ; LARGE32-NEXT: lwz r4, L..C0@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: lwzx r3, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -282,9 +262,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lwzx r3, r13, r3 ; SMALL64-NEXT: lwz r4, 0(r4) -; SMALL64-NEXT: lwz r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: extsw r3, r3 ; SMALL64-NEXT: blr @@ -295,9 +274,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lwzx r3, r13, r3 ; LARGE64-NEXT: lwz r4, 0(r4) -; LARGE64-NEXT: lwz r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: extsw r3, r3 ; LARGE64-NEXT: blr @@ -308,11 +286,10 @@ ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lwzx r3, r3, r4 ; SMALL32-NEXT: lwz r4, 0(r5) -; SMALL32-NEXT: lwz r3, 0(r3) ; SMALL32-NEXT: add r3, r4, r3 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -327,8 +304,7 @@ ; LARGE32-NEXT: addis r3, L..C0@u(r2) ; LARGE32-NEXT: lwz r4, L..C0@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: lwzx r3, r3, r4 ; LARGE32-NEXT: addis r4, L..C4@u(r2) ; LARGE32-NEXT: lwz r4, L..C4@l(r4) ; LARGE32-NEXT: lwz r4, 0(r4) @@ -349,16 +325,14 @@ ; SMALL64-LABEL: loadITLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lwa r3, 0(r3) +; SMALL64-NEXT: lwax r3, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadITLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C1@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lwa r3, 0(r3) +; LARGE64-NEXT: lwax r3, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadITLInit: @@ -366,10 +340,9 @@ ; SMALL32-NEXT: mflr r0 ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lwz r3, 0(r3) +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lwzx r3, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -383,8 +356,7 @@ ; LARGE32-NEXT: addis r3, L..C1@u(r2) ; LARGE32-NEXT: lwz r4, L..C1@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: lwzx r3, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -400,9 +372,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lwzx r3, r13, r3 ; SMALL64-NEXT: lwz r4, 0(r4) -; SMALL64-NEXT: lwz r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: extsw r3, r3 ; SMALL64-NEXT: blr @@ -413,9 +384,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lwzx r3, r13, r3 ; LARGE64-NEXT: lwz r4, 0(r4) -; LARGE64-NEXT: lwz r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: extsw r3, r3 ; LARGE64-NEXT: blr @@ -426,11 +396,10 @@ ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lwzx r3, r3, r4 ; SMALL32-NEXT: lwz r4, 0(r5) -; SMALL32-NEXT: lwz r3, 0(r3) ; SMALL32-NEXT: add r3, r4, r3 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -445,8 +414,7 @@ ; LARGE32-NEXT: addis r3, L..C1@u(r2) ; LARGE32-NEXT: lwz r4, L..C1@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: lwzx r3, r3, r4 ; LARGE32-NEXT: addis r4, L..C4@u(r2) ; LARGE32-NEXT: lwz r4, L..C4@l(r4) ; LARGE32-NEXT: lwz r4, 0(r4) @@ -467,16 +435,14 @@ ; SMALL64-LABEL: loadTLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lwa r3, 0(r3) +; SMALL64-NEXT: lwax r3, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadTLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C2@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lwa r3, 0(r3) +; LARGE64-NEXT: lwax r3, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadTLUninit: @@ -484,10 +450,9 @@ ; SMALL32-NEXT: mflr r0 ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lwz r3, 0(r3) +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lwzx r3, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -501,8 +466,7 @@ ; LARGE32-NEXT: addis r3, L..C2@u(r2) ; LARGE32-NEXT: lwz r4, L..C2@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: lwzx r3, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -518,9 +482,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lwzx r3, r13, r3 ; SMALL64-NEXT: lwz r4, 0(r4) -; SMALL64-NEXT: lwz r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: extsw r3, r3 ; SMALL64-NEXT: blr @@ -531,9 +494,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lwzx r3, r13, r3 ; LARGE64-NEXT: lwz r4, 0(r4) -; LARGE64-NEXT: lwz r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: extsw r3, r3 ; LARGE64-NEXT: blr @@ -544,11 +506,10 @@ ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lwzx r3, r3, r4 ; SMALL32-NEXT: lwz r4, 0(r5) -; SMALL32-NEXT: lwz r3, 0(r3) ; SMALL32-NEXT: add r3, r4, r3 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -563,8 +524,7 @@ ; LARGE32-NEXT: addis r3, L..C2@u(r2) ; LARGE32-NEXT: lwz r4, L..C2@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: lwzx r3, r3, r4 ; LARGE32-NEXT: addis r4, L..C4@u(r2) ; LARGE32-NEXT: lwz r4, L..C4@l(r4) ; LARGE32-NEXT: lwz r4, 0(r4) @@ -585,16 +545,14 @@ ; SMALL64-LABEL: loadTLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: lwa r3, 0(r3) +; SMALL64-NEXT: lwax r3, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadTLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C3@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: lwa r3, 0(r3) +; LARGE64-NEXT: lwax r3, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadTLInit: @@ -602,10 +560,9 @@ ; SMALL32-NEXT: mflr r0 ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lwz r3, 0(r3) +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lwzx r3, r3, r4 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -619,8 +576,7 @@ ; LARGE32-NEXT: addis r3, L..C3@u(r2) ; LARGE32-NEXT: lwz r4, L..C3@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: lwzx r3, r3, r4 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -636,9 +592,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: lwzx r3, r13, r3 ; SMALL64-NEXT: lwz r4, 0(r4) -; SMALL64-NEXT: lwz r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: extsw r3, r3 ; SMALL64-NEXT: blr @@ -649,9 +604,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: lwzx r3, r13, r3 ; LARGE64-NEXT: lwz r4, 0(r4) -; LARGE64-NEXT: lwz r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: extsw r3, r3 ; LARGE64-NEXT: blr @@ -662,11 +616,10 @@ ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit -; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 +; SMALL32-NEXT: bla .__get_tpointer[PR] +; SMALL32-NEXT: lwzx r3, r3, r4 ; SMALL32-NEXT: lwz r4, 0(r5) -; SMALL32-NEXT: lwz r3, 0(r3) ; SMALL32-NEXT: add r3, r4, r3 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -681,8 +634,7 @@ ; LARGE32-NEXT: addis r3, L..C3@u(r2) ; LARGE32-NEXT: lwz r4, L..C3@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: lwzx r3, r3, r4 ; LARGE32-NEXT: addis r4, L..C4@u(r2) ; LARGE32-NEXT: lwz r4, L..C4@l(r4) ; LARGE32-NEXT: lwz r4, 0(r4) diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll @@ -23,16 +23,14 @@ ; SMALL64-LABEL: storeITLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: std r3, 0(r4) +; SMALL64-NEXT: stdx r3, r13, r4 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeITLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C0@u(r2) ; LARGE64-NEXT: ld r4, L..C0@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: std r3, 0(r4) +; LARGE64-NEXT: stdx r3, r13, r4 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeITLUninit: @@ -43,9 +41,9 @@ ; SMALL32-NEXT: mr r5, r3 ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r6 -; SMALL32-NEXT: stw r4, 4(r3) -; SMALL32-NEXT: stw r5, 0(r3) +; SMALL32-NEXT: add r7, r3, r6 +; SMALL32-NEXT: stwx r5, r3, r6 +; SMALL32-NEXT: stw r4, 4(r7) ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -60,9 +58,9 @@ ; LARGE32-NEXT: addis r3, L..C0@u(r2) ; LARGE32-NEXT: lwz r6, L..C0@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r6 -; LARGE32-NEXT: stw r4, 4(r3) -; LARGE32-NEXT: stw r5, 0(r3) +; LARGE32-NEXT: add r7, r3, r6 +; LARGE32-NEXT: stwx r5, r3, r6 +; LARGE32-NEXT: stw r4, 4(r7) ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -77,16 +75,14 @@ ; SMALL64-LABEL: storeITLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: std r3, 0(r4) +; SMALL64-NEXT: stdx r3, r13, r4 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeITLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C1@u(r2) ; LARGE64-NEXT: ld r4, L..C1@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: std r3, 0(r4) +; LARGE64-NEXT: stdx r3, r13, r4 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeITLInit: @@ -97,9 +93,9 @@ ; SMALL32-NEXT: mr r5, r3 ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r6 -; SMALL32-NEXT: stw r4, 4(r3) -; SMALL32-NEXT: stw r5, 0(r3) +; SMALL32-NEXT: add r7, r3, r6 +; SMALL32-NEXT: stwx r5, r3, r6 +; SMALL32-NEXT: stw r4, 4(r7) ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -114,9 +110,9 @@ ; LARGE32-NEXT: addis r3, L..C1@u(r2) ; LARGE32-NEXT: lwz r6, L..C1@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r6 -; LARGE32-NEXT: stw r4, 4(r3) -; LARGE32-NEXT: stw r5, 0(r3) +; LARGE32-NEXT: add r7, r3, r6 +; LARGE32-NEXT: stwx r5, r3, r6 +; LARGE32-NEXT: stw r4, 4(r7) ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -131,16 +127,14 @@ ; SMALL64-LABEL: storeTLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: std r3, 0(r4) +; SMALL64-NEXT: stdx r3, r13, r4 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeTLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C2@u(r2) ; LARGE64-NEXT: ld r4, L..C2@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: std r3, 0(r4) +; LARGE64-NEXT: stdx r3, r13, r4 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeTLUninit: @@ -151,9 +145,9 @@ ; SMALL32-NEXT: mr r5, r3 ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r6 -; SMALL32-NEXT: stw r4, 4(r3) -; SMALL32-NEXT: stw r5, 0(r3) +; SMALL32-NEXT: add r7, r3, r6 +; SMALL32-NEXT: stwx r5, r3, r6 +; SMALL32-NEXT: stw r4, 4(r7) ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -168,9 +162,9 @@ ; LARGE32-NEXT: addis r3, L..C2@u(r2) ; LARGE32-NEXT: lwz r6, L..C2@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r6 -; LARGE32-NEXT: stw r4, 4(r3) -; LARGE32-NEXT: stw r5, 0(r3) +; LARGE32-NEXT: add r7, r3, r6 +; LARGE32-NEXT: stwx r5, r3, r6 +; LARGE32-NEXT: stw r4, 4(r7) ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -185,16 +179,14 @@ ; SMALL64-LABEL: storeTLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL64-NEXT: add r4, r13, r4 -; SMALL64-NEXT: std r3, 0(r4) +; SMALL64-NEXT: stdx r3, r13, r4 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: storeTLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r4, L..C3@u(r2) ; LARGE64-NEXT: ld r4, L..C3@l(r4) -; LARGE64-NEXT: add r4, r13, r4 -; LARGE64-NEXT: std r3, 0(r4) +; LARGE64-NEXT: stdx r3, r13, r4 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: storeTLInit: @@ -205,9 +197,9 @@ ; SMALL32-NEXT: mr r5, r3 ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r6 -; SMALL32-NEXT: stw r4, 4(r3) -; SMALL32-NEXT: stw r5, 0(r3) +; SMALL32-NEXT: add r7, r3, r6 +; SMALL32-NEXT: stwx r5, r3, r6 +; SMALL32-NEXT: stw r4, 4(r7) ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -222,9 +214,9 @@ ; LARGE32-NEXT: addis r3, L..C3@u(r2) ; LARGE32-NEXT: lwz r6, L..C3@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r6 -; LARGE32-NEXT: stw r4, 4(r3) -; LARGE32-NEXT: stw r5, 0(r3) +; LARGE32-NEXT: add r7, r3, r6 +; LARGE32-NEXT: stwx r5, r3, r6 +; LARGE32-NEXT: stw r4, 4(r7) ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -239,16 +231,14 @@ ; SMALL64-LABEL: loadITLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: ld r3, 0(r3) +; SMALL64-NEXT: ldx r3, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadITLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C0@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: ld r3, 0(r3) +; LARGE64-NEXT: ldx r3, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadITLUninit: @@ -258,9 +248,9 @@ ; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r4, r3, r4 -; SMALL32-NEXT: lwz r3, 0(r4) -; SMALL32-NEXT: lwz r4, 4(r4) +; SMALL32-NEXT: add r5, r3, r4 +; SMALL32-NEXT: lwzx r3, r3, r4 +; SMALL32-NEXT: lwz r4, 4(r5) ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -274,9 +264,9 @@ ; LARGE32-NEXT: addis r3, L..C0@u(r2) ; LARGE32-NEXT: lwz r4, L..C0@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r4, r3, r4 -; LARGE32-NEXT: lwz r3, 0(r4) -; LARGE32-NEXT: lwz r4, 4(r4) +; LARGE32-NEXT: add r5, r3, r4 +; LARGE32-NEXT: lwzx r3, r3, r4 +; LARGE32-NEXT: lwz r4, 4(r5) ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -292,9 +282,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: ldx r3, r13, r3 ; SMALL64-NEXT: ld r4, 0(r4) -; SMALL64-NEXT: ld r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: blr ; @@ -304,9 +293,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C0@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: ldx r3, r13, r3 ; LARGE64-NEXT: ld r4, 0(r4) -; LARGE64-NEXT: ld r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: blr ; @@ -318,12 +306,12 @@ ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lwz r6, 4(r5) +; SMALL32-NEXT: add r6, r3, r4 +; SMALL32-NEXT: lwz r7, 4(r5) +; SMALL32-NEXT: lwzx r3, r3, r4 +; SMALL32-NEXT: lwz r6, 4(r6) ; SMALL32-NEXT: lwz r5, 0(r5) -; SMALL32-NEXT: lwz r4, 4(r3) -; SMALL32-NEXT: lwz r3, 0(r3) -; SMALL32-NEXT: addc r4, r6, r4 +; SMALL32-NEXT: addc r4, r7, r6 ; SMALL32-NEXT: adde r3, r5, r3 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -338,9 +326,9 @@ ; LARGE32-NEXT: addis r3, L..C0@u(r2) ; LARGE32-NEXT: lwz r4, L..C0@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lwz r4, 4(r3) -; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: add r5, r3, r4 +; LARGE32-NEXT: lwzx r3, r3, r4 +; LARGE32-NEXT: lwz r4, 4(r5) ; LARGE32-NEXT: addis r5, L..C4@u(r2) ; LARGE32-NEXT: lwz r5, L..C4@l(r5) ; LARGE32-NEXT: lwz r6, 4(r5) @@ -363,16 +351,14 @@ ; SMALL64-LABEL: loadITLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: ld r3, 0(r3) +; SMALL64-NEXT: ldx r3, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadITLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C1@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: ld r3, 0(r3) +; LARGE64-NEXT: ldx r3, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadITLInit: @@ -382,9 +368,9 @@ ; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r4, r3, r4 -; SMALL32-NEXT: lwz r3, 0(r4) -; SMALL32-NEXT: lwz r4, 4(r4) +; SMALL32-NEXT: add r5, r3, r4 +; SMALL32-NEXT: lwzx r3, r3, r4 +; SMALL32-NEXT: lwz r4, 4(r5) ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -398,9 +384,9 @@ ; LARGE32-NEXT: addis r3, L..C1@u(r2) ; LARGE32-NEXT: lwz r4, L..C1@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r4, r3, r4 -; LARGE32-NEXT: lwz r3, 0(r4) -; LARGE32-NEXT: lwz r4, 4(r4) +; LARGE32-NEXT: add r5, r3, r4 +; LARGE32-NEXT: lwzx r3, r3, r4 +; LARGE32-NEXT: lwz r4, 4(r5) ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -416,9 +402,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: ldx r3, r13, r3 ; SMALL64-NEXT: ld r4, 0(r4) -; SMALL64-NEXT: ld r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: blr ; @@ -428,9 +413,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C1@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: ldx r3, r13, r3 ; LARGE64-NEXT: ld r4, 0(r4) -; LARGE64-NEXT: ld r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: blr ; @@ -442,12 +426,12 @@ ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lwz r6, 4(r5) +; SMALL32-NEXT: add r6, r3, r4 +; SMALL32-NEXT: lwz r7, 4(r5) +; SMALL32-NEXT: lwzx r3, r3, r4 +; SMALL32-NEXT: lwz r6, 4(r6) ; SMALL32-NEXT: lwz r5, 0(r5) -; SMALL32-NEXT: lwz r4, 4(r3) -; SMALL32-NEXT: lwz r3, 0(r3) -; SMALL32-NEXT: addc r4, r6, r4 +; SMALL32-NEXT: addc r4, r7, r6 ; SMALL32-NEXT: adde r3, r5, r3 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -462,9 +446,9 @@ ; LARGE32-NEXT: addis r3, L..C1@u(r2) ; LARGE32-NEXT: lwz r4, L..C1@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lwz r4, 4(r3) -; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: add r5, r3, r4 +; LARGE32-NEXT: lwzx r3, r3, r4 +; LARGE32-NEXT: lwz r4, 4(r5) ; LARGE32-NEXT: addis r5, L..C4@u(r2) ; LARGE32-NEXT: lwz r5, L..C4@l(r5) ; LARGE32-NEXT: lwz r6, 4(r5) @@ -487,16 +471,14 @@ ; SMALL64-LABEL: loadTLUninit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: ld r3, 0(r3) +; SMALL64-NEXT: ldx r3, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadTLUninit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C2@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: ld r3, 0(r3) +; LARGE64-NEXT: ldx r3, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadTLUninit: @@ -506,9 +488,9 @@ ; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r4, r3, r4 -; SMALL32-NEXT: lwz r3, 0(r4) -; SMALL32-NEXT: lwz r4, 4(r4) +; SMALL32-NEXT: add r5, r3, r4 +; SMALL32-NEXT: lwzx r3, r3, r4 +; SMALL32-NEXT: lwz r4, 4(r5) ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -522,9 +504,9 @@ ; LARGE32-NEXT: addis r3, L..C2@u(r2) ; LARGE32-NEXT: lwz r4, L..C2@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r4, r3, r4 -; LARGE32-NEXT: lwz r3, 0(r4) -; LARGE32-NEXT: lwz r4, 4(r4) +; LARGE32-NEXT: add r5, r3, r4 +; LARGE32-NEXT: lwzx r3, r3, r4 +; LARGE32-NEXT: lwz r4, 4(r5) ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -540,9 +522,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: ldx r3, r13, r3 ; SMALL64-NEXT: ld r4, 0(r4) -; SMALL64-NEXT: ld r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: blr ; @@ -552,9 +533,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C2@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: ldx r3, r13, r3 ; LARGE64-NEXT: ld r4, 0(r4) -; LARGE64-NEXT: ld r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: blr ; @@ -566,12 +546,12 @@ ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lwz r6, 4(r5) +; SMALL32-NEXT: add r6, r3, r4 +; SMALL32-NEXT: lwz r7, 4(r5) +; SMALL32-NEXT: lwzx r3, r3, r4 +; SMALL32-NEXT: lwz r6, 4(r6) ; SMALL32-NEXT: lwz r5, 0(r5) -; SMALL32-NEXT: lwz r4, 4(r3) -; SMALL32-NEXT: lwz r3, 0(r3) -; SMALL32-NEXT: addc r4, r6, r4 +; SMALL32-NEXT: addc r4, r7, r6 ; SMALL32-NEXT: adde r3, r5, r3 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -586,9 +566,9 @@ ; LARGE32-NEXT: addis r3, L..C2@u(r2) ; LARGE32-NEXT: lwz r4, L..C2@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lwz r4, 4(r3) -; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: add r5, r3, r4 +; LARGE32-NEXT: lwzx r3, r3, r4 +; LARGE32-NEXT: lwz r4, 4(r5) ; LARGE32-NEXT: addis r5, L..C4@u(r2) ; LARGE32-NEXT: lwz r5, L..C4@l(r5) ; LARGE32-NEXT: lwz r6, 4(r5) @@ -611,16 +591,14 @@ ; SMALL64-LABEL: loadTLInit: ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit -; SMALL64-NEXT: add r3, r13, r3 -; SMALL64-NEXT: ld r3, 0(r3) +; SMALL64-NEXT: ldx r3, r13, r3 ; SMALL64-NEXT: blr ; ; LARGE64-LABEL: loadTLInit: ; LARGE64: # %bb.0: # %entry ; LARGE64-NEXT: addis r3, L..C3@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) -; LARGE64-NEXT: add r3, r13, r3 -; LARGE64-NEXT: ld r3, 0(r3) +; LARGE64-NEXT: ldx r3, r13, r3 ; LARGE64-NEXT: blr ; ; SMALL32-LABEL: loadTLInit: @@ -630,9 +608,9 @@ ; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r4, r3, r4 -; SMALL32-NEXT: lwz r3, 0(r4) -; SMALL32-NEXT: lwz r4, 4(r4) +; SMALL32-NEXT: add r5, r3, r4 +; SMALL32-NEXT: lwzx r3, r3, r4 +; SMALL32-NEXT: lwz r4, 4(r5) ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -646,9 +624,9 @@ ; LARGE32-NEXT: addis r3, L..C3@u(r2) ; LARGE32-NEXT: lwz r4, L..C3@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r4, r3, r4 -; LARGE32-NEXT: lwz r3, 0(r4) -; LARGE32-NEXT: lwz r4, 4(r4) +; LARGE32-NEXT: add r5, r3, r4 +; LARGE32-NEXT: lwzx r3, r3, r4 +; LARGE32-NEXT: lwz r4, 4(r5) ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -664,9 +642,8 @@ ; SMALL64: # %bb.0: # %entry ; SMALL64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit ; SMALL64-NEXT: ld r4, L..C4(r2) # @VarInit -; SMALL64-NEXT: add r3, r13, r3 +; SMALL64-NEXT: ldx r3, r13, r3 ; SMALL64-NEXT: ld r4, 0(r4) -; SMALL64-NEXT: ld r3, 0(r3) ; SMALL64-NEXT: add r3, r4, r3 ; SMALL64-NEXT: blr ; @@ -676,9 +653,8 @@ ; LARGE64-NEXT: addis r4, L..C4@u(r2) ; LARGE64-NEXT: ld r3, L..C3@l(r3) ; LARGE64-NEXT: ld r4, L..C4@l(r4) -; LARGE64-NEXT: add r3, r13, r3 +; LARGE64-NEXT: ldx r3, r13, r3 ; LARGE64-NEXT: ld r4, 0(r4) -; LARGE64-NEXT: ld r3, 0(r3) ; LARGE64-NEXT: add r3, r4, r3 ; LARGE64-NEXT: blr ; @@ -690,12 +666,12 @@ ; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit ; SMALL32-NEXT: bla .__get_tpointer[PR] ; SMALL32-NEXT: stw r0, 40(r1) -; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lwz r6, 4(r5) +; SMALL32-NEXT: add r6, r3, r4 +; SMALL32-NEXT: lwz r7, 4(r5) +; SMALL32-NEXT: lwzx r3, r3, r4 +; SMALL32-NEXT: lwz r6, 4(r6) ; SMALL32-NEXT: lwz r5, 0(r5) -; SMALL32-NEXT: lwz r4, 4(r3) -; SMALL32-NEXT: lwz r3, 0(r3) -; SMALL32-NEXT: addc r4, r6, r4 +; SMALL32-NEXT: addc r4, r7, r6 ; SMALL32-NEXT: adde r3, r5, r3 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) @@ -710,9 +686,9 @@ ; LARGE32-NEXT: addis r3, L..C3@u(r2) ; LARGE32-NEXT: lwz r4, L..C3@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] -; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lwz r4, 4(r3) -; LARGE32-NEXT: lwz r3, 0(r3) +; LARGE32-NEXT: add r5, r3, r4 +; LARGE32-NEXT: lwzx r3, r3, r4 +; LARGE32-NEXT: lwz r4, 4(r5) ; LARGE32-NEXT: addis r5, L..C4@u(r2) ; LARGE32-NEXT: lwz r5, L..C4@l(r5) ; LARGE32-NEXT: lwz r6, 4(r5) diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large.ll --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large.ll @@ -44,28 +44,28 @@ ; RELOC-NEXT: Length: 16 ; RELOC-NEXT: Type: R_TOCL (0x31) ; RELOC-NEXT: } -; RELOC: Virtual Address: 0x22 +; RELOC: Virtual Address: 0x12 ; RELOC-NEXT: Symbol: ThreadLocalVarInit (17) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 ; RELOC-NEXT: Length: 16 ; RELOC-NEXT: Type: R_TOCU (0x30) ; RELOC-NEXT: } -; RELOC: Virtual Address: 0x2A +; RELOC: Virtual Address: 0x1A ; RELOC-NEXT: Symbol: ThreadLocalVarInit (17) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 ; RELOC-NEXT: Length: 16 ; RELOC-NEXT: Type: R_TOCL (0x31) ; RELOC-NEXT: } -; RELOC: Virtual Address: 0x80 +; RELOC: Virtual Address: 0x68 ; RELOC-NEXT: Symbol: IThreadLocalVarUninit (23) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 ; RELOC-NEXT: Length: 64 ; RELOC-NEXT: Type: R_TLS_LE (0x23) ; RELOC-NEXT: } -; RELOC: Virtual Address: 0x88 +; RELOC: Virtual Address: 0x70 ; RELOC-NEXT: Symbol: ThreadLocalVarInit (21) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -80,7 +80,7 @@ ; SYM-NEXT: Symbols [ ; SYM: Index: 15 ; SYM-NEXT: Name: IThreadLocalVarUninit -; SYM-NEXT: Value (RelocatableAddress): 0x80 +; SYM-NEXT: Value (RelocatableAddress): 0x68 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) @@ -98,7 +98,7 @@ ; SYM-NEXT: } ; SYM: Index: 17 ; SYM-NEXT: Name: ThreadLocalVarInit -; SYM-NEXT: Value (RelocatableAddress): 0x88 +; SYM-NEXT: Value (RelocatableAddress): 0x70 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) @@ -158,10 +158,9 @@ ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 15) IThreadLocalVarUninit[TE] ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 4, 0(4) ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 15) IThreadLocalVarUninit[TE] -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 4, 13, 4 -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} std 3, 0(4) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stdx 3, 13, 4 ; DIS-NEXT: blr -; DIS: 0000000000000020 (idx: 5) .loadTLInit: +; DIS: 0000000000000010 (idx: 5) .loadTLInit: ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addis 3, 2, 0 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 17) ThreadLocalVarInit[TE] ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addis 4, 2, 0 @@ -170,42 +169,41 @@ ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 17) ThreadLocalVarInit[TE] ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 4, 16(4) ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 19) VarInit[TE] -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 13, 3 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ldx 3, 13, 3 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 4, 0(4) -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 3, 0(3) ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 4, 3 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} blr ; DIS: Disassembly of section .data: -; DIS: 0000000000000048 (idx: 7) VarInit[RW]: -; DIS-NEXT: 48: 00 00 00 00 -; DIS-NEXT: 4c: 00 00 00 57 -; DIS: 0000000000000050 (idx: 9) storeITLUninit[DS]: +; DIS: 0000000000000030 (idx: 7) VarInit[RW]: +; DIS-NEXT: 30: 00 00 00 00 +; DIS-NEXT: 34: 00 00 00 57 +; DIS: 0000000000000038 (idx: 9) storeITLUninit[DS]: +; DIS-NEXT: 38: 00 00 00 00 +; DIS-NEXT: 0000000000000038: R_POS (idx: 3) .storeITLUninit +; DIS-NEXT: 3c: 00 00 00 00 +; DIS-NEXT: 40: 00 00 00 00 +; DIS-NEXT: 0000000000000040: R_POS (idx: 13) TOC[TC0] +; DIS-NEXT: 44: 00 00 00 68 +; DIS: 0000000000000050 (idx: 11) loadTLInit[DS]: ; DIS-NEXT: 50: 00 00 00 00 -; DIS-NEXT: 0000000000000050: R_POS (idx: 3) .storeITLUninit -; DIS-NEXT: 54: 00 00 00 00 +; DIS-NEXT: 0000000000000050: R_POS (idx: 5) .loadTLInit +; DIS-NEXT: 54: 00 00 00 10 ; DIS-NEXT: 58: 00 00 00 00 ; DIS-NEXT: 0000000000000058: R_POS (idx: 13) TOC[TC0] -; DIS-NEXT: 5c: 00 00 00 80 -; DIS: 0000000000000068 (idx: 11) loadTLInit[DS]: +; DIS-NEXT: 5c: 00 00 00 68 +; DIS: 0000000000000068 (idx: 15) IThreadLocalVarUninit[TE]: ; DIS-NEXT: 68: 00 00 00 00 -; DIS-NEXT: 0000000000000068: R_POS (idx: 5) .loadTLInit -; DIS-NEXT: 6c: 00 00 00 20 +; DIS-NEXT: 0000000000000068: R_TLS_LE (idx: 23) IThreadLocalVarUninit[UL] +; DIS-NEXT: 6c: 00 00 00 00 +; DIS: 0000000000000070 (idx: 17) ThreadLocalVarInit[TE]: ; DIS-NEXT: 70: 00 00 00 00 -; DIS-NEXT: 0000000000000070: R_POS (idx: 13) TOC[TC0] -; DIS-NEXT: 74: 00 00 00 80 -; DIS: 0000000000000080 (idx: 15) IThreadLocalVarUninit[TE]: -; DIS-NEXT: 80: 00 00 00 00 -; DIS-NEXT: 0000000000000080: R_TLS_LE (idx: 23) IThreadLocalVarUninit[UL] -; DIS-NEXT: 84: 00 00 00 00 -; DIS: 0000000000000088 (idx: 17) ThreadLocalVarInit[TE]: -; DIS-NEXT: 88: 00 00 00 00 -; DIS-NEXT: 0000000000000088: R_TLS_LE (idx: 21) ThreadLocalVarInit[TL] -; DIS-NEXT: 8c: 00 00 00 00 -; DIS: 0000000000000090 (idx: 19) VarInit[TE]: -; DIS-NEXT: 90: 00 00 00 00 -; DIS-NEXT: 0000000000000090: R_POS (idx: 7) VarInit[RW] -; DIS-NEXT: 94: 00 00 00 48 +; DIS-NEXT: 0000000000000070: R_TLS_LE (idx: 21) ThreadLocalVarInit[TL] +; DIS-NEXT: 74: 00 00 00 00 +; DIS: 0000000000000078 (idx: 19) VarInit[TE]: +; DIS-NEXT: 78: 00 00 00 00 +; DIS-NEXT: 0000000000000078: R_POS (idx: 7) VarInit[RW] +; DIS-NEXT: 7c: 00 00 00 30 ; DIS: Disassembly of section .tdata: ; DIS: 0000000000000000 (idx: 21) ThreadLocalVarInit[TL]: diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large32.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large32.ll --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large32.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large32.ll @@ -202,9 +202,9 @@ ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 17) IThreadLocalVarUninit[TE] ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} bla 0 ; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1) .__get_tpointer[PR] -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 3, 6 -; DIS-NEXT: stw 4, 4(3) -; DIS-NEXT: stw 5, 0(3) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 7, 3, 6 +; DIS-NEXT: stwx 5, 3, 6 +; DIS-NEXT: stw 4, 4(7) ; DIS-NEXT: addi 1, 1, 32 ; DIS-NEXT: lwz 0, 8(1) ; DIS-NEXT: mtlr 0 @@ -219,9 +219,9 @@ ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 19) ThreadLocalVarInit[TE] ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} bla 0 ; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1) .__get_tpointer[PR] -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 3, 4 -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 4, 4(3) -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 3, 0(3) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 5, 3, 4 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwzx 3, 3, 4 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 4, 4(5) ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addis 5, 2, 0 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 21) VarInit[TE] ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 5, 8(5) diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc.ll --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc.ll @@ -44,14 +44,14 @@ ; RELOC-NEXT: Length: 16 ; RELOC-NEXT: Type: R_TOC (0x3) ; RELOC-NEXT: } -; RELOC: Virtual Address: 0x68 +; RELOC: Virtual Address: 0x60 ; RELOC-NEXT: Symbol: IThreadLocalVarUninit (27) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 ; RELOC-NEXT: Length: 64 ; RELOC-NEXT: Type: R_TLS_LE (0x23) ; RELOC-NEXT: } -; RELOC: Virtual Address: 0x70 +; RELOC: Virtual Address: 0x68 ; RELOC-NEXT: Symbol: ThreadLocalVarInit (25) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -66,7 +66,7 @@ ; SYM-NEXT: Symbols [ ; SYM: Index: 17 ; SYM-NEXT: Name: IThreadLocalVarUninit -; SYM-NEXT: Value (RelocatableAddress): 0x68 +; SYM-NEXT: Value (RelocatableAddress): 0x60 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) @@ -84,7 +84,7 @@ ; SYM-NEXT: } ; SYM: Index: 19 ; SYM-NEXT: Name: ThreadLocalVarInit -; SYM-NEXT: Value (RelocatableAddress): 0x70 +; SYM-NEXT: Value (RelocatableAddress): 0x68 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) @@ -142,47 +142,45 @@ ; DIS: 0000000000000000 (idx: 3) .storeITLUninit: ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 4, 0(2) ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 17) IThreadLocalVarUninit[TC] -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 4, 13, 4 -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 3, 0(4) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stwx 3, 13, 4 ; DIS-NEXT: blr ; DIS: 0000000000000010 (idx: 5) .loadTLInit: ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 3, 8(2) ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 19) ThreadLocalVarInit[TC] ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 4, 16(2) ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 21) VarInit[TC] -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 13, 3 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwzx 3, 13, 3 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 4, 0(4) -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 3, 0(3) ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 4, 3 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} extsw 3, 3 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} blr ; DIS: Disassembly of section .data: -; DIS: 0000000000000030 (idx: 9) VarInit: -; DIS-NEXT: 30: 00 00 00 57 -; DIS: 0000000000000038 (idx: 11) storeITLUninit[DS]: -; DIS-NEXT: 8: 00 00 00 00 -; DIS-NEXT: 0000000000000038: R_POS (idx: 3) .storeITLUninit -; DIS-NEXT: 3c: 00 00 00 00 -; DIS-NEXT: 40: 00 00 00 00 -; DIS-NEXT: 0000000000000040: R_POS (idx: 15) TOC[TC0] -; DIS-NEXT: 44: 00 00 00 68 -; DIS: 0000000000000050 (idx: 13) loadTLInit[DS]: +; DIS: 000000000000002c (idx: 9) VarInit: +; DIS-NEXT: 2c: 00 00 00 57 +; DIS: 0000000000000030 (idx: 11) storeITLUninit[DS]: +; DIS-NEXT: 30: 00 00 00 00 +; DIS-NEXT: 0000000000000030: R_POS (idx: 3) .storeITLUninit +; DIS-NEXT: 34: 00 00 00 00 +; DIS-NEXT: 38: 00 00 00 00 +; DIS-NEXT: 0000000000000038: R_POS (idx: 15) TOC[TC0] +; DIS-NEXT: 3c: 00 00 00 60 +; DIS: 0000000000000048 (idx: 13) loadTLInit[DS]: +; DIS-NEXT: 48: 00 00 00 00 +; DIS-NEXT: 0000000000000048: R_POS (idx: 5) .loadTLInit +; DIS-NEXT: 4c: 00 00 00 10 ; DIS-NEXT: 50: 00 00 00 00 -; DIS-NEXT: 0000000000000050: R_POS (idx: 5) .loadTLInit -; DIS-NEXT: 54: 00 00 00 10 -; DIS-NEXT: 58: 00 00 00 00 -; DIS-NEXT: 0000000000000058: R_POS (idx: 15) TOC[TC0] -; DIS-NEXT: 5c: 00 00 00 68 -; DIS: 0000000000000068 (idx: 17) IThreadLocalVarUninit[TC]: +; DIS-NEXT: 0000000000000050: R_POS (idx: 15) TOC[TC0] +; DIS-NEXT: 54: 00 00 00 60 +; DIS: 0000000000000060 (idx: 17) IThreadLocalVarUninit[TC]: +; DIS-NEXT: 60: 00 00 00 00 +; DIS-NEXT: 0000000000000060: R_TLS_LE (idx: 27) IThreadLocalVarUninit[UL] +; DIS: 0000000000000068 (idx: 19) ThreadLocalVarInit[TC]: ; DIS-NEXT: 68: 00 00 00 00 -; DIS-NEXT: 0000000000000068: R_TLS_LE (idx: 27) IThreadLocalVarUninit[UL] -; DIS: 0000000000000070 (idx: 19) ThreadLocalVarInit[TC]: +; DIS-NEXT: 0000000000000068: R_TLS_LE (idx: 25) ThreadLocalVarInit +; DIS: 0000000000000070 (idx: 21) VarInit[TC]: ; DIS-NEXT: 70: 00 00 00 00 -; DIS-NEXT: 0000000000000070: R_TLS_LE (idx: 25) ThreadLocalVarInit -; DIS: 0000000000000078 (idx: 21) VarInit[TC]: -; DIS-NEXT: 78: 00 00 00 00 -; DIS-NEXT: 0000000000000078: R_POS (idx: 9) VarInit +; DIS-NEXT: 0000000000000070: R_POS (idx: 9) VarInit ; DIS: Disassembly of section .tdata: ; DIS: 0000000000000000 (idx: 25) ThreadLocalVarInit: diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc32.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc32.ll --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc32.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc32.ll @@ -51,21 +51,21 @@ ; RELOC-NEXT: Length: 16 ; RELOC-NEXT: Type: R_TOC (0x3) ; RELOC-NEXT: } -; RELOC: Virtual Address: 0x40 +; RELOC: Virtual Address: 0x44 ; RELOC-NEXT: Symbol: .__get_tpointer (1) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 ; RELOC-NEXT: Length: 26 ; RELOC-NEXT: Type: R_RBA (0x18) ; RELOC-NEXT: } -; RELOC: Virtual Address: 0x84 +; RELOC: Virtual Address: 0x80 ; RELOC-NEXT: Symbol: IThreadLocalVarUninit (29) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 ; RELOC-NEXT: Length: 32 ; RELOC-NEXT: Type: R_TLS_LE (0x23) ; RELOC-NEXT: } -; RELOC: Virtual Address: 0x88 +; RELOC: Virtual Address: 0x84 ; RELOC-NEXT: Symbol: ThreadLocalVarInit (27) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 @@ -99,7 +99,7 @@ ; SYM-NEXT: } ; SYM: Index: 19 ; SYM-NEXT: Name: IThreadLocalVarUninit -; SYM-NEXT: Value (RelocatableAddress): 0x84 +; SYM-NEXT: Value (RelocatableAddress): 0x80 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) @@ -118,7 +118,7 @@ ; SYM-NEXT: } ; SYM: Index: 21 ; SYM-NEXT: Name: ThreadLocalVarInit -; SYM-NEXT: Value (RelocatableAddress): 0x88 +; SYM-NEXT: Value (RelocatableAddress): 0x84 ; SYM-NEXT: Section: .data ; SYM-NEXT: Type: 0x0 ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) @@ -185,8 +185,7 @@ ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} bla 0 ; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1) .__get_tpointer[PR] ; DIS-NEXT: stw 0, 40(1) -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 3, 5 -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 4, 0(3) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stwx 4, 3, 5 ; DIS-NEXT: addi 1, 1, 32 ; DIS-NEXT: lwz 0, 8(1) ; DIS-NEXT: mtlr 0 @@ -198,12 +197,11 @@ ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 21) ThreadLocalVarInit[TC] ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 5, 8(2) ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 23) VarInit[TC] +; DIS-NEXT: stw 0, 40(1) ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} bla 0 ; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1) .__get_tpointer[PR] -; DIS-NEXT: stw 0, 40(1) -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 3, 4 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwzx 3, 3, 4 ; DIS-NEXT: lwz 4, 0(5) -; DIS-NEXT: lwz 3, 0(3) ; DIS-NEXT: add 3, 4, 3 ; DIS-NEXT: addi 1, 1, 32 ; DIS-NEXT: lwz 0, 8(1) @@ -211,29 +209,29 @@ ; DIS-NEXT: blr ; DIS: Disassembly of section .data: -; DIS: 00000068 (idx: 11) VarInit: -; DIS-NEXT: 68: 00 00 00 57 -; DIS: 0000006c (idx: 13) storeITLUninit[DS]: -; DIS-NEXT: 6c: 00 00 00 00 -; DIS-NEXT: 0000006c: R_POS (idx: 5) .storeITLUninit -; DIS-NEXT: 70: 00 00 00 84 -; DIS-NEXT: 00000070: R_POS (idx: 17) TOC[TC0] -; DIS-NEXT: 74: 00 00 00 00 -; DIS: 00000078 (idx: 15) loadTLInit[DS]: -; DIS-NEXT: 78: 00 00 00 30 -; DIS-NEXT: 00000078: R_POS (idx: 7) .loadTLInit -; DIS-NEXT: 7c: 00 00 00 84 -; DIS-NEXT: 0000007c: R_POS (idx: 17) TOC[TC0] +; DIS: 00000064 (idx: 11) VarInit: +; DIS-NEXT: 64: 00 00 00 57 +; DIS: 00000068 (idx: 13) storeITLUninit[DS]: +; DIS-NEXT: 68: 00 00 00 00 +; DIS-NEXT: 00000068: R_POS (idx: 5) .storeITLUninit +; DIS-NEXT: 6c: 00 00 00 80 +; DIS-NEXT: 0000006c: R_POS (idx: 17) TOC[TC0] +; DIS-NEXT: 70: 00 00 00 00 +; DIS: 00000074 (idx: 15) loadTLInit[DS]: +; DIS-NEXT: 74: 00 00 00 30 +; DIS-NEXT: 00000074: R_POS (idx: 7) .loadTLInit +; DIS-NEXT: 78: 00 00 00 80 +; DIS-NEXT: 00000078: R_POS (idx: 17) TOC[TC0] +; DIS-NEXT: 7c: 00 00 00 00 +; DIS: 00000080 (idx: 19) IThreadLocalVarUninit[TC]: ; DIS-NEXT: 80: 00 00 00 00 -; DIS: 00000084 (idx: 19) IThreadLocalVarUninit[TC]: +; DIS-NEXT: 00000080: R_TLS_LE (idx: 29) IThreadLocalVarUninit[UL] +; DIS: 00000084 (idx: 21) ThreadLocalVarInit[TC]: ; DIS-NEXT: 84: 00 00 00 00 -; DIS-NEXT: 00000084: R_TLS_LE (idx: 29) IThreadLocalVarUninit[UL] -; DIS: 00000088 (idx: 21) ThreadLocalVarInit[TC]: -; DIS-NEXT: 88: 00 00 00 00 -; DIS-NEXT: 00000088: R_TLS_LE (idx: 27) ThreadLocalVarInit -; DIS: 0000008c (idx: 23) VarInit[TC]: -; DIS-NEXT: 8c: 00 00 00 68 -; DIS-NEXT: 0000008c: R_POS (idx: 11) VarInit +; DIS-NEXT: 00000084: R_TLS_LE (idx: 27) ThreadLocalVarInit +; DIS: 00000088 (idx: 23) VarInit[TC]: +; DIS-NEXT: 88: 00 00 00 64 +; DIS-NEXT: 00000088: R_POS (idx: 11) VarInit ; DIS: Disassembly of section .tdata: ; DIS: 00000000 (idx: 27) ThreadLocalVarInit: