diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -817,6 +817,10 @@ return false; } + /// Return true if the target can reuse part of constant vector for extract + /// and store. + virtual bool shouldReuseConstVectorForExtractStore() const { return false; } + /// Return true if inserting a scalar into a variable element of an undef /// vector is more efficiently handled by splatting the scalar instead. virtual bool shouldSplatInsEltVarIndex(EVT) const { @@ -843,6 +847,11 @@ virtual MVT::SimpleValueType getCmpLibcallReturnType() const; + /// Return the index of vector element for efficient store(extractelement). + virtual unsigned getCombineStoreAndExtractIdx(unsigned BitWidth) const { + return -1U; + } + /// For targets without i1 registers, this gives the nature of the high-bits /// of boolean values held in types wider than i1. /// diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -744,10 +744,13 @@ /// This is a helper function for mergeConsecutiveStores. Stores that /// potentially may be merged with St are placed in StoreNodes. RootNode is - /// a chain predecessor to all store candidates. + /// a chain predecessor to all store candidates. Optionally constant vector + /// which has element bit wise match the store source is placed in VectorVal + /// and VectorVT, the element index is placed in ExtractIdx. void getStoreMergeCandidates(StoreSDNode *St, SmallVectorImpl &StoreNodes, - SDNode *&Root); + SDNode *&Root, SDValue &VectorVal, + EVT &VectorVT, unsigned &ExtractIdx); /// Helper function for mergeConsecutiveStores. Checks if candidate stores /// have indirect dependency through their operands. RootNode is the @@ -784,9 +787,13 @@ SDNode *Root, bool AllowVectors, bool IsNonTemporalStore, bool IsNonTemporalLoad); - /// Merge consecutive store operations into a wide store. - /// This optimization uses wide integers or vectors when possible. - /// \return true if stores were merged. + /// Merge consecutive store operations into a wide store. If no such merge + /// is possible, and the constant integer or float-point source bit wise + /// matches part of constant vector source which will be stored under the + /// same root node, extract-and-store pattern will be generated if backend + /// can support the combine. This optimization uses wide integers or vectors + /// when possible. \return true if stores were merged or extract-and-store + /// pattern generated. bool mergeConsecutiveStores(StoreSDNode *St); /// Try to transform a truncation where C is a constant: @@ -19320,8 +19327,8 @@ } void DAGCombiner::getStoreMergeCandidates( - StoreSDNode *St, SmallVectorImpl &StoreNodes, - SDNode *&RootNode) { + StoreSDNode *St, SmallVectorImpl &StoreNodes, SDNode *&RootNode, + SDValue &VectorVal, EVT &VectorVT, unsigned &ExtractIdx) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. We must have a base and an offset. Do not handle stores to undef // base pointers. @@ -19352,6 +19359,113 @@ if (!Ld->isSimple() || Ld->isIndexed()) return; } + // Check if OtherBC is constant vector which has one element bit wise matches + // constant store source of St, and backend supports the extract-and-store + // pattern. + auto ConstVectorCandidateMatch = [&](SDValue &OtherBC) -> void { + // May improve only if the stored value has no use other than St and + // OtherBC. + for (auto I = Val.getNode()->use_begin(), E = Val.getNode()->use_end(); + I != E; ++I) { + if (*I != St && *I != OtherBC.getNode()) + return; + } + if (VectorVal != SDValue()) + return; + if (OtherBC.getOpcode() != ISD::BUILD_VECTOR) + return; + if (!isTypeLegal(OtherBC.getValueType())) + return; + // Truncated stores are likely simple constants. Stores that return values + // are out-of-scope. Do not bother reuse. + if (St->isTruncatingStore() || St->getNumValues() != 1) + return; + unsigned StoreSizeInBits = MemVT.getSizeInBits(); + // Only for full word power of two types. + if (StoreSizeInBits < 32 || !llvm::isPowerOf2_64(StoreSizeInBits)) + return; + // Get the constant source value for St. + APInt ConstVal; + if (auto *Const = dyn_cast(Val)) { + ConstVal = Const->getAPIntValue(); + } else if (auto *Const = dyn_cast(Val)) { + ConstVal = Const->getValueAPF().bitcastToAPInt(); + } else + return; + // Do not combine if active bits fit in halfword or -1U + if (ConstVal.isIntN(16) || ConstVal.isAllOnes()) + return; + + // Create vector type for extract. + unsigned NElts = OtherBC.getValueType().getSizeInBits() / StoreSizeInBits; + if (NElts * StoreSizeInBits != OtherBC.getValueType().getSizeInBits()) + return; + EVT SVT = EVT::getVectorVT(*DAG.getContext(), Val.getValueType(), NElts); + if (!isTypeLegal(SVT)) + return; + + ExtractIdx = TLI.getCombineStoreAndExtractIdx(StoreSizeInBits); + if (ExtractIdx == -1U) + return; + + // Collect to-be-extracted bits from vector elements and check. + SmallVector Fragments; + unsigned VecElemSizeInBits = + OtherBC.getValueType().getScalarType().getSizeInBits(); + unsigned ExtractLowIdx = ExtractIdx * StoreSizeInBits; + unsigned ExtractHighIdx = ExtractLowIdx + StoreSizeInBits; + unsigned BitLow = 0; + unsigned BitHigh = BitLow + VecElemSizeInBits; + for (const SDValue &Op : OtherBC->op_values()) { + // Bail out if any lane is not defined. + if (Op.isUndef()) + return; + // Element should be constant integer or float-point. + APInt ElemVal; + if (auto *Const = dyn_cast(Op)) { + ElemVal = Const->getAPIntValue(); + } else if (auto *Const = dyn_cast(Op)) { + ElemVal = Const->getValueAPF().bitcastToAPInt(); + } else + return; + // Push part of element that intersects with [ExtractLowIdx, + // ExtractHighIdx) into Fragments. + if (BitHigh >= ExtractLowIdx && ExtractHighIdx >= BitLow && + BitHigh != ExtractLowIdx && BitLow != ExtractHighIdx) + Fragments.push_back(ElemVal.extractBits( + ((BitHigh > ExtractHighIdx ? ExtractHighIdx : BitHigh) - + (BitLow > ExtractLowIdx ? BitLow : ExtractLowIdx)), + (BitLow >= ExtractLowIdx ? 0 : (ExtractLowIdx - BitLow)))); + // Update element position for the next one. + BitLow = BitHigh; + BitHigh = BitLow + VecElemSizeInBits; + } + + // Make sure we got fragments, and swap for BigEndian. + if (Fragments.size() == 0) + return; + if (DAG.getDataLayout().isBigEndian()) + std::reverse(Fragments.begin(), Fragments.end()); + + // Merge all fragments and check identical. + APInt Concat = *Fragments.rbegin(); + for (APInt I : make_range(Fragments.rbegin() + 1, Fragments.rend())) + Concat = Concat.concat(I); + assert(Concat.getBitWidth() == ConstVal.getBitWidth()); + if (Concat != ConstVal) + return; + + // See if backend supports the combine, and check for least cost. + unsigned TmpC; + if (TLI.canCombineStoreAndExtract( + SVT.getTypeForEVT(*DAG.getContext()), + ConstantInt::get(*DAG.getContext(), APInt(16, ExtractIdx)), TmpC)) { + VectorVal = OtherBC; + VectorVT = SVT; + } + return; + }; + auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr, int64_t &Offset) -> bool { // The memory operands must not be volatile/indexed/atomic. @@ -19391,6 +19505,10 @@ break; } case StoreSource::Constant: + // Optionally check if there is opportunity to reuse constant. + if (TLI.shouldReuseConstVectorForExtractStore() && Other != St) { + ConstVectorCandidateMatch(OtherBC); + } if (NoTypeMatch) return false; if (!isIntOrFPConstant(OtherBC)) @@ -20101,12 +20219,41 @@ SmallVector StoreNodes; SDNode *RootNode; + SDValue VectorVal; + EVT VectorVT; + unsigned ExtractIdx = -1; // Find potential store merge candidates by searching through chain sub-DAG - getStoreMergeCandidates(St, StoreNodes, RootNode); + getStoreMergeCandidates(St, StoreNodes, RootNode, VectorVal, VectorVT, + ExtractIdx); // Check if there is anything to merge. - if (StoreNodes.size() < 2) - return false; + if (StoreNodes.size() < 2) { + // Explore constant reuse only when nothing to merge. + if (!TLI.shouldReuseConstVectorForExtractStore() || VectorVal == SDValue()) + return false; + // Constant Vector for extract-and-store has been identified. Generate + // pattern for isel. + SDLoc DL(St); + SDValue ValToExtract = DAG.getNode(ISD::BITCAST, DL, VectorVT, VectorVal); + if (ValToExtract.isUndef()) + return false; + SDValue Value = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoredVal.getValueType(), + ValToExtract, DAG.getVectorIdxConstant(ExtractIdx, DL)); + if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return false; + SDValue ValToStore = + DAG.getNode(ISD::BITCAST, DL, St->getValue().getValueType(), Value); + if (ValToStore.isUndef()) + return false; + SDValue NewStore = DAG.getStore(St->getChain(), DL, ValToStore, + St->getBasePtr(), St->getMemOperand()); + if (NewStore.getOpcode() != ISD::STORE) + return false; + CombineTo(St, NewStore); + AddToWorklist(NewStore.getNode()); + return true; + } // Sort the memory operands according to their distance from the // base pointer. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6198,6 +6198,33 @@ ConstantSDNode *N2CV = isConstOrConstSplat(N2, /*AllowUndefs*/ false, /*AllowTruncation*/ true); + // Explore the opportunity that part of constant vector to be stored can be + // reused as source for another store during combine. + auto CanReuseConstVectorElemForStore = [&](SDValue N1, ConstantSDNode *N2C) { + if (!TLI->shouldReuseConstVectorForExtractStore()) + return false; + if (N1.getOpcode() != ISD::BUILD_VECTOR) + return false; + // The source vector need be constant. + if (!(ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) || + ISD::isBuildVectorOfConstantFPSDNodes(N1.getNode()))) + return false; + // Check if there is pattern for extract and store. + unsigned TmpC; + if (!TLI->canCombineStoreAndExtract( + N1.getValueType().getTypeForEVT(*getContext()), + ConstantInt::get(*getContext(), APInt(16, N2C->getZExtValue())), + TmpC)) + return false; + // Constant vector need have a store use to make sure it will not be + // combined away. + for (auto UI = N1->use_begin(), UE = N1->use_end(); UI != UE; ++UI) { + if (UI->getOpcode() == ISD::STORE) + return true; + } + return false; + }; + switch (Opcode) { default: break; case ISD::TokenFactor: @@ -6467,8 +6494,10 @@ // EXTRACT_VECTOR_ELT of BUILD_VECTOR or SPLAT_VECTOR is often formed while // lowering is expanding large vector constants. - if (N2C && (N1.getOpcode() == ISD::BUILD_VECTOR || - N1.getOpcode() == ISD::SPLAT_VECTOR)) { + if (N2C && + (N1.getOpcode() == ISD::BUILD_VECTOR || + N1.getOpcode() == ISD::SPLAT_VECTOR) && + !CanReuseConstVectorElemForStore(N1, N2C)) { assert((N1.getOpcode() != ISD::BUILD_VECTOR || N1.getValueType().isFixedLengthVector()) && "BUILD_VECTOR used for scalable vectors"); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -793,6 +793,10 @@ bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const override; + unsigned getCombineStoreAndExtractIdx(unsigned BitWidth) const override; + + bool shouldReuseConstVectorForExtractStore() const override { return true; } + bool isCtlzFast() const override { return true; } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1650,6 +1650,18 @@ return false; } +unsigned PPCTargetLowering::getCombineStoreAndExtractIdx(unsigned BitWidth) const { + unsigned Idx = -1U; + if (!Subtarget.isPPC64() || !Subtarget.hasVSX()) + return Idx; + if (BitWidth == 32 && Subtarget.hasP8Vector()) { + Idx = Subtarget.isLittleEndian() ? 2 : 1; + } else if (BitWidth == 64) { + Idx = Subtarget.isLittleEndian() ? 1 : 0; + } + return Idx; +} + const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((PPCISD::NodeType)Opcode) { case PPCISD::FIRST_NUMBER: break; diff --git a/llvm/test/CodeGen/PowerPC/const-nonsplat-array-init.ll b/llvm/test/CodeGen/PowerPC/const-nonsplat-array-init.ll --- a/llvm/test/CodeGen/PowerPC/const-nonsplat-array-init.ll +++ b/llvm/test/CodeGen/PowerPC/const-nonsplat-array-init.ll @@ -16,22 +16,20 @@ ; P8-BE-LABEL: foo1_int_be_reuse4B: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C0(2) # %const.0 +; P8-BE-NEXT: li 5, 2057 +; P8-BE-NEXT: sth 5, 20(3) ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 1029 -; P8-BE-NEXT: ori 4, 4, 1543 -; P8-BE-NEXT: stw 4, 16(3) -; P8-BE-NEXT: li 4, 2057 +; P8-BE-NEXT: li 4, 16 +; P8-BE-NEXT: stfiwx 0, 3, 4 ; P8-BE-NEXT: stxvw4x 0, 0, 3 -; P8-BE-NEXT: sth 4, 20(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo1_int_be_reuse4B: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C0(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 1029 -; P9-BE-NEXT: ori 4, 4, 1543 -; P9-BE-NEXT: stw 4, 16(3) +; P9-BE-NEXT: li 4, 16 +; P9-BE-NEXT: stfiwx 0, 3, 4 ; P9-BE-NEXT: li 4, 2057 ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: sth 4, 20(3) @@ -41,11 +39,11 @@ ; P10-BE: # %bb.0: # %entry ; P10-BE-NEXT: ld 4, L..C0(2) # %const.0 ; P10-BE-NEXT: lxv 0, 0(4) -; P10-BE-NEXT: pli 4, 67438087 -; P10-BE-NEXT: stw 4, 16(3) +; P10-BE-NEXT: li 4, 16 +; P10-BE-NEXT: stfiwx 0, 3, 4 ; P10-BE-NEXT: li 4, 2057 -; P10-BE-NEXT: sth 4, 20(3) ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: sth 4, 20(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo1_int_be_reuse4B: @@ -142,11 +140,11 @@ ; P8-LE-NEXT: li 5, 3340 ; P8-LE-NEXT: addi 4, 4, .LCPI1_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 2826 -; P8-LE-NEXT: ori 4, 4, 2312 +; P8-LE-NEXT: li 4, 16 +; P8-LE-NEXT: xxswapd 1, 0 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: stw 4, 16(3) ; P8-LE-NEXT: sth 5, 20(3) +; P8-LE-NEXT: stfiwx 1, 3, 4 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo2_int_le_reuse4B: @@ -154,9 +152,8 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI1_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI1_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 2826 -; P9-LE-NEXT: ori 4, 4, 2312 -; P9-LE-NEXT: stw 4, 16(3) +; P9-LE-NEXT: li 4, 16 +; P9-LE-NEXT: stfiwx 0, 3, 4 ; P9-LE-NEXT: li 4, 3340 ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: sth 4, 20(3) @@ -165,11 +162,11 @@ ; P10-LE-LABEL: foo2_int_le_reuse4B: ; P10-LE: # %bb.0: # %entry ; P10-LE-NEXT: plxv 0, .LCPI1_0@PCREL(0), 1 -; P10-LE-NEXT: pli 4, 185207048 -; P10-LE-NEXT: stw 4, 16(3) +; P10-LE-NEXT: li 4, 16 +; P10-LE-NEXT: stfiwx 0, 3, 4 ; P10-LE-NEXT: li 4, 3340 -; P10-LE-NEXT: sth 4, 20(3) ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: sth 4, 20(3) ; P10-LE-NEXT: blr entry: store <16 x i8> , ptr %a, align 1 @@ -192,22 +189,20 @@ ; P8-BE-LABEL: foo3_int_be_reuse4B: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C2(2) # %const.0 +; P8-BE-NEXT: li 5, 2057 +; P8-BE-NEXT: sth 5, 20(3) ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 1029 -; P8-BE-NEXT: ori 4, 4, 1543 -; P8-BE-NEXT: stw 4, 16(3) -; P8-BE-NEXT: li 4, 2057 +; P8-BE-NEXT: li 4, 16 +; P8-BE-NEXT: stfiwx 0, 3, 4 ; P8-BE-NEXT: stxvw4x 0, 0, 3 -; P8-BE-NEXT: sth 4, 20(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo3_int_be_reuse4B: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C2(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 1029 -; P9-BE-NEXT: ori 4, 4, 1543 -; P9-BE-NEXT: stw 4, 16(3) +; P9-BE-NEXT: li 4, 16 +; P9-BE-NEXT: stfiwx 0, 3, 4 ; P9-BE-NEXT: li 4, 2057 ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: sth 4, 20(3) @@ -217,11 +212,11 @@ ; P10-BE: # %bb.0: # %entry ; P10-BE-NEXT: ld 4, L..C2(2) # %const.0 ; P10-BE-NEXT: lxv 0, 0(4) -; P10-BE-NEXT: pli 4, 67438087 -; P10-BE-NEXT: stw 4, 16(3) +; P10-BE-NEXT: li 4, 16 +; P10-BE-NEXT: stfiwx 0, 3, 4 ; P10-BE-NEXT: li 4, 2057 -; P10-BE-NEXT: sth 4, 20(3) ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: sth 4, 20(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo3_int_be_reuse4B: @@ -312,11 +307,11 @@ ; P8-LE-NEXT: li 5, 3085 ; P8-LE-NEXT: addi 4, 4, .LCPI3_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 2571 -; P8-LE-NEXT: ori 4, 4, 2057 +; P8-LE-NEXT: li 4, 16 +; P8-LE-NEXT: xxswapd 1, 0 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: stw 4, 16(3) ; P8-LE-NEXT: sth 5, 20(3) +; P8-LE-NEXT: stfiwx 1, 3, 4 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo4_int_le_reuse4B: @@ -324,9 +319,8 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI3_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI3_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 2571 -; P9-LE-NEXT: ori 4, 4, 2057 -; P9-LE-NEXT: stw 4, 16(3) +; P9-LE-NEXT: li 4, 16 +; P9-LE-NEXT: stfiwx 0, 3, 4 ; P9-LE-NEXT: li 4, 3085 ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: sth 4, 20(3) @@ -335,11 +329,11 @@ ; P10-LE-LABEL: foo4_int_le_reuse4B: ; P10-LE: # %bb.0: # %entry ; P10-LE-NEXT: plxv 0, .LCPI3_0@PCREL(0), 1 -; P10-LE-NEXT: pli 4, 168495113 -; P10-LE-NEXT: stw 4, 16(3) +; P10-LE-NEXT: li 4, 16 +; P10-LE-NEXT: stfiwx 0, 3, 4 ; P10-LE-NEXT: li 4, 3085 -; P10-LE-NEXT: sth 4, 20(3) ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: sth 4, 20(3) ; P10-LE-NEXT: blr entry: store <8 x i16> , ptr %a, align 2 @@ -357,29 +351,27 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C4(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 1029 -; P8-BE-NEXT: ori 4, 4, 1543 -; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: li 4, 16 ; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: stfiwx 0, 3, 4 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo5_int_be_reuse4B: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C4(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 1029 -; P9-BE-NEXT: ori 4, 4, 1543 -; P9-BE-NEXT: stw 4, 16(3) +; P9-BE-NEXT: li 4, 16 ; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: stfiwx 0, 3, 4 ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo5_int_be_reuse4B: ; P10-BE: # %bb.0: # %entry ; P10-BE-NEXT: ld 4, L..C4(2) # %const.0 ; P10-BE-NEXT: lxv 0, 0(4) -; P10-BE-NEXT: pli 4, 67438087 -; P10-BE-NEXT: stw 4, 16(3) +; P10-BE-NEXT: li 4, 16 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfiwx 0, 3, 4 ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo5_int_be_reuse4B: @@ -453,10 +445,10 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI5_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI5_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 2057 -; P8-LE-NEXT: ori 4, 4, 2571 +; P8-LE-NEXT: li 4, 16 +; P8-LE-NEXT: xxswapd 1, 0 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: stw 4, 16(3) +; P8-LE-NEXT: stfiwx 1, 3, 4 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo6_int_le_reuse4B: @@ -464,18 +456,17 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI5_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI5_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 2057 -; P9-LE-NEXT: ori 4, 4, 2571 -; P9-LE-NEXT: stw 4, 16(3) +; P9-LE-NEXT: li 4, 16 ; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: stfiwx 0, 3, 4 ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo6_int_le_reuse4B: ; P10-LE: # %bb.0: # %entry ; P10-LE-NEXT: plxv 0, .LCPI5_0@PCREL(0), 1 -; P10-LE-NEXT: pli 4, 134810123 -; P10-LE-NEXT: stw 4, 16(3) +; P10-LE-NEXT: li 4, 16 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfiwx 0, 3, 4 ; P10-LE-NEXT: blr entry: store <4 x i32> , ptr %a, align 4 @@ -488,38 +479,25 @@ ; P8-BE-LABEL: foo7_int_be_reuse8B: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C6(2) # %const.0 -; P8-BE-NEXT: lis 5, 1 -; P8-BE-NEXT: ori 5, 5, 515 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: rldic 4, 5, 32, 15 -; P8-BE-NEXT: oris 4, 4, 1029 -; P8-BE-NEXT: ori 4, 4, 1543 ; P8-BE-NEXT: stxvw4x 0, 0, 3 -; P8-BE-NEXT: std 4, 16(3) +; P8-BE-NEXT: stfd 0, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo7_int_be_reuse8B: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C6(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 1 -; P9-BE-NEXT: ori 4, 4, 515 -; P9-BE-NEXT: rldic 4, 4, 32, 15 ; P9-BE-NEXT: stxv 0, 0(3) -; P9-BE-NEXT: oris 4, 4, 1029 -; P9-BE-NEXT: ori 4, 4, 1543 -; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo7_int_be_reuse8B: ; P10-BE: # %bb.0: # %entry ; P10-BE-NEXT: ld 4, L..C6(2) # %const.0 -; P10-BE-NEXT: pli 5, 67438087 ; P10-BE-NEXT: lxv 0, 0(4) -; P10-BE-NEXT: pli 4, 66051 -; P10-BE-NEXT: rldimi 5, 4, 32, 0 -; P10-BE-NEXT: std 5, 16(3) ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo7_int_be_reuse8B: @@ -621,15 +599,11 @@ ; P8-LE-LABEL: foo8_int_le_reuse8B: ; P8-LE: # %bb.0: # %entry ; P8-LE-NEXT: addis 4, 2, .LCPI7_0@toc@ha -; P8-LE-NEXT: lis 5, 963 ; P8-LE-NEXT: addi 4, 4, .LCPI7_0@toc@l -; P8-LE-NEXT: ori 5, 5, 33603 ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: rldic 4, 5, 34, 4 -; P8-LE-NEXT: oris 4, 4, 2826 -; P8-LE-NEXT: ori 4, 4, 2312 +; P8-LE-NEXT: xxswapd 1, 0 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: stfd 1, 16(3) ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo8_int_le_reuse8B: @@ -637,23 +611,15 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI7_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI7_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 963 -; P9-LE-NEXT: ori 4, 4, 33603 -; P9-LE-NEXT: rldic 4, 4, 34, 4 ; P9-LE-NEXT: stxv 0, 0(3) -; P9-LE-NEXT: oris 4, 4, 2826 -; P9-LE-NEXT: ori 4, 4, 2312 -; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo8_int_le_reuse8B: ; P10-LE: # %bb.0: # %entry ; P10-LE-NEXT: plxv 0, .LCPI7_0@PCREL(0), 1 -; P10-LE-NEXT: pli 4, 252579084 -; P10-LE-NEXT: pli 5, 185207048 -; P10-LE-NEXT: rldimi 5, 4, 32, 0 -; P10-LE-NEXT: std 5, 16(3) ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: store <16 x i8> , ptr %a, align 1 @@ -680,38 +646,25 @@ ; P8-BE-LABEL: foo9_int_be_reuse8B: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C8(2) # %const.0 -; P8-BE-NEXT: lis 5, 1 -; P8-BE-NEXT: ori 5, 5, 515 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: rldic 4, 5, 32, 15 -; P8-BE-NEXT: oris 4, 4, 1029 -; P8-BE-NEXT: ori 4, 4, 1543 ; P8-BE-NEXT: stxvw4x 0, 0, 3 -; P8-BE-NEXT: std 4, 16(3) +; P8-BE-NEXT: stfd 0, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo9_int_be_reuse8B: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C8(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 1 -; P9-BE-NEXT: ori 4, 4, 515 -; P9-BE-NEXT: rldic 4, 4, 32, 15 ; P9-BE-NEXT: stxv 0, 0(3) -; P9-BE-NEXT: oris 4, 4, 1029 -; P9-BE-NEXT: ori 4, 4, 1543 -; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo9_int_be_reuse8B: ; P10-BE: # %bb.0: # %entry ; P10-BE-NEXT: ld 4, L..C8(2) # %const.0 -; P10-BE-NEXT: pli 5, 67438087 ; P10-BE-NEXT: lxv 0, 0(4) -; P10-BE-NEXT: pli 4, 66051 -; P10-BE-NEXT: rldimi 5, 4, 32, 0 -; P10-BE-NEXT: std 5, 16(3) ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo9_int_be_reuse8B: @@ -805,15 +758,11 @@ ; P8-LE-LABEL: foo10_int_le_reuse8B: ; P8-LE: # %bb.0: # %entry ; P8-LE-NEXT: addis 4, 2, .LCPI9_0@toc@ha -; P8-LE-NEXT: lis 5, 3599 ; P8-LE-NEXT: addi 4, 4, .LCPI9_0@toc@l -; P8-LE-NEXT: ori 5, 5, 3085 ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: rldic 4, 5, 32, 4 -; P8-LE-NEXT: oris 4, 4, 2571 -; P8-LE-NEXT: ori 4, 4, 2057 +; P8-LE-NEXT: xxswapd 1, 0 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: stfd 1, 16(3) ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo10_int_le_reuse8B: @@ -821,23 +770,15 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI9_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI9_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 3599 -; P9-LE-NEXT: ori 4, 4, 3085 -; P9-LE-NEXT: rldic 4, 4, 32, 4 ; P9-LE-NEXT: stxv 0, 0(3) -; P9-LE-NEXT: oris 4, 4, 2571 -; P9-LE-NEXT: ori 4, 4, 2057 -; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo10_int_le_reuse8B: ; P10-LE: # %bb.0: # %entry ; P10-LE-NEXT: plxv 0, .LCPI9_0@PCREL(0), 1 -; P10-LE-NEXT: pli 4, 235867149 -; P10-LE-NEXT: pli 5, 168495113 -; P10-LE-NEXT: rldimi 5, 4, 32, 0 -; P10-LE-NEXT: std 5, 16(3) ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: store <8 x i16> , ptr %a, align 2 @@ -856,38 +797,25 @@ ; P8-BE-LABEL: foo11_int_be_reuse8B: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C10(2) # %const.0 -; P8-BE-NEXT: lis 5, 1 -; P8-BE-NEXT: ori 5, 5, 515 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: rldic 4, 5, 32, 15 -; P8-BE-NEXT: oris 4, 4, 1029 -; P8-BE-NEXT: ori 4, 4, 1543 ; P8-BE-NEXT: stxvw4x 0, 0, 3 -; P8-BE-NEXT: std 4, 16(3) +; P8-BE-NEXT: stfd 0, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo11_int_be_reuse8B: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C10(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 1 -; P9-BE-NEXT: ori 4, 4, 515 -; P9-BE-NEXT: rldic 4, 4, 32, 15 ; P9-BE-NEXT: stxv 0, 0(3) -; P9-BE-NEXT: oris 4, 4, 1029 -; P9-BE-NEXT: ori 4, 4, 1543 -; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo11_int_be_reuse8B: ; P10-BE: # %bb.0: # %entry ; P10-BE-NEXT: ld 4, L..C10(2) # %const.0 -; P10-BE-NEXT: pli 5, 67438087 ; P10-BE-NEXT: lxv 0, 0(4) -; P10-BE-NEXT: pli 4, 66051 -; P10-BE-NEXT: rldimi 5, 4, 32, 0 -; P10-BE-NEXT: std 5, 16(3) ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo11_int_be_reuse8B: @@ -977,15 +905,11 @@ ; P8-LE-LABEL: foo12_int_le_reuse8B: ; P8-LE: # %bb.0: # %entry ; P8-LE-NEXT: addis 4, 2, .LCPI11_0@toc@ha -; P8-LE-NEXT: lis 5, 3085 ; P8-LE-NEXT: addi 4, 4, .LCPI11_0@toc@l -; P8-LE-NEXT: ori 5, 5, 3599 ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: rldic 4, 5, 32, 4 -; P8-LE-NEXT: oris 4, 4, 2057 -; P8-LE-NEXT: ori 4, 4, 2571 +; P8-LE-NEXT: xxswapd 1, 0 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: stfd 1, 16(3) ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo12_int_le_reuse8B: @@ -993,23 +917,15 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI11_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI11_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 3085 -; P9-LE-NEXT: ori 4, 4, 3599 -; P9-LE-NEXT: rldic 4, 4, 32, 4 ; P9-LE-NEXT: stxv 0, 0(3) -; P9-LE-NEXT: oris 4, 4, 2057 -; P9-LE-NEXT: ori 4, 4, 2571 -; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo12_int_le_reuse8B: ; P10-LE: # %bb.0: # %entry ; P10-LE-NEXT: plxv 0, .LCPI11_0@PCREL(0), 1 -; P10-LE-NEXT: pli 4, 202182159 -; P10-LE-NEXT: pli 5, 134810123 -; P10-LE-NEXT: rldimi 5, 4, 32, 0 -; P10-LE-NEXT: std 5, 16(3) ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: store <4 x i32> , ptr %a, align 4 @@ -1024,38 +940,25 @@ ; P8-BE-LABEL: foo13_int_be_reuse8B: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C12(2) # %const.0 -; P8-BE-NEXT: lis 5, 1 -; P8-BE-NEXT: ori 5, 5, 515 ; P8-BE-NEXT: lxvd2x 0, 0, 4 -; P8-BE-NEXT: rldic 4, 5, 32, 15 -; P8-BE-NEXT: oris 4, 4, 1029 -; P8-BE-NEXT: ori 4, 4, 1543 ; P8-BE-NEXT: stxvd2x 0, 0, 3 -; P8-BE-NEXT: std 4, 16(3) +; P8-BE-NEXT: stfd 0, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo13_int_be_reuse8B: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C12(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 1 -; P9-BE-NEXT: ori 4, 4, 515 -; P9-BE-NEXT: rldic 4, 4, 32, 15 ; P9-BE-NEXT: stxv 0, 0(3) -; P9-BE-NEXT: oris 4, 4, 1029 -; P9-BE-NEXT: ori 4, 4, 1543 -; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo13_int_be_reuse8B: ; P10-BE: # %bb.0: # %entry ; P10-BE-NEXT: ld 4, L..C12(2) # %const.0 -; P10-BE-NEXT: pli 5, 67438087 ; P10-BE-NEXT: lxv 0, 0(4) -; P10-BE-NEXT: pli 4, 66051 -; P10-BE-NEXT: rldimi 5, 4, 32, 0 -; P10-BE-NEXT: std 5, 16(3) ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo13_int_be_reuse8B: @@ -1143,15 +1046,11 @@ ; P8-LE-LABEL: foo14_int_le_reuse8B: ; P8-LE: # %bb.0: # %entry ; P8-LE-NEXT: addis 4, 2, .LCPI13_0@toc@ha -; P8-LE-NEXT: lis 5, 2057 ; P8-LE-NEXT: addi 4, 4, .LCPI13_0@toc@l -; P8-LE-NEXT: ori 5, 5, 2571 ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: rldic 4, 5, 32, 4 -; P8-LE-NEXT: oris 4, 4, 3085 -; P8-LE-NEXT: ori 4, 4, 3599 +; P8-LE-NEXT: xxswapd 1, 0 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: stfd 1, 16(3) ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo14_int_le_reuse8B: @@ -1159,23 +1058,15 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI13_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI13_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 2057 -; P9-LE-NEXT: ori 4, 4, 2571 -; P9-LE-NEXT: rldic 4, 4, 32, 4 ; P9-LE-NEXT: stxv 0, 0(3) -; P9-LE-NEXT: oris 4, 4, 3085 -; P9-LE-NEXT: ori 4, 4, 3599 -; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo14_int_le_reuse8B: ; P10-LE: # %bb.0: # %entry ; P10-LE-NEXT: plxv 0, .LCPI13_0@PCREL(0), 1 -; P10-LE-NEXT: pli 4, 134810123 -; P10-LE-NEXT: pli 5, 202182159 -; P10-LE-NEXT: rldimi 5, 4, 32, 0 -; P10-LE-NEXT: std 5, 16(3) ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: store <2 x i64> , ptr %a, align 8 @@ -1339,29 +1230,27 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C16(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 16673 -; P8-BE-NEXT: ori 4, 4, 39322 -; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: li 4, 16 ; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: stfiwx 0, 3, 4 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo17_fp_be_reuse4B: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C16(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 16673 -; P9-BE-NEXT: ori 4, 4, 39322 -; P9-BE-NEXT: stw 4, 16(3) +; P9-BE-NEXT: li 4, 16 ; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: stfiwx 0, 3, 4 ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo17_fp_be_reuse4B: ; P10-BE: # %bb.0: # %entry ; P10-BE-NEXT: ld 4, L..C16(2) # %const.0 ; P10-BE-NEXT: lxv 0, 0(4) -; P10-BE-NEXT: pli 4, 1092721050 -; P10-BE-NEXT: stw 4, 16(3) +; P10-BE-NEXT: li 4, 16 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfiwx 0, 3, 4 ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo17_fp_be_reuse4B: @@ -1435,10 +1324,10 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI17_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI17_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 16675 -; P8-LE-NEXT: ori 4, 4, 13107 +; P8-LE-NEXT: li 4, 16 +; P8-LE-NEXT: xxswapd 1, 0 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: stw 4, 16(3) +; P8-LE-NEXT: stfiwx 1, 3, 4 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo18_fp_le_reuse4B: @@ -1446,18 +1335,17 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI17_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI17_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 16675 -; P9-LE-NEXT: ori 4, 4, 13107 -; P9-LE-NEXT: stw 4, 16(3) +; P9-LE-NEXT: li 4, 16 ; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: stfiwx 0, 3, 4 ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo18_fp_le_reuse4B: ; P10-LE: # %bb.0: # %entry ; P10-LE-NEXT: plxv 0, .LCPI17_0@PCREL(0), 1 -; P10-LE-NEXT: pli 4, 1092825907 -; P10-LE-NEXT: stw 4, 16(3) +; P10-LE-NEXT: li 4, 16 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfiwx 0, 3, 4 ; P10-LE-NEXT: blr entry: store <4 x float> , ptr %a, align 4 @@ -1471,30 +1359,24 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C18(2) # %const.0 ; P8-BE-NEXT: lxvd2x 0, 0, 4 -; P8-BE-NEXT: li 4, 4105 -; P8-BE-NEXT: rldic 4, 4, 50, 1 -; P8-BE-NEXT: std 4, 16(3) ; P8-BE-NEXT: stxvd2x 0, 0, 3 +; P8-BE-NEXT: stfd 0, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo19_fp_be_reuse8B: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C18(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: li 4, 4105 -; P9-BE-NEXT: rldic 4, 4, 50, 1 -; P9-BE-NEXT: std 4, 16(3) ; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo19_fp_be_reuse8B: ; P10-BE: # %bb.0: # %entry ; P10-BE-NEXT: ld 4, L..C18(2) # %const.0 ; P10-BE-NEXT: lxv 0, 0(4) -; P10-BE-NEXT: li 4, 4105 -; P10-BE-NEXT: rldic 4, 4, 50, 1 -; P10-BE-NEXT: std 4, 16(3) ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo19_fp_be_reuse8B: @@ -1573,14 +1455,11 @@ ; P8-LE-LABEL: foo20_fp_le_reuse8B: ; P8-LE: # %bb.0: # %entry ; P8-LE-NEXT: addis 4, 2, .LCPI19_0@toc@ha -; P8-LE-NEXT: lis 5, 16420 ; P8-LE-NEXT: addi 4, 4, .LCPI19_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: ori 4, 5, 13107 -; P8-LE-NEXT: rldimi 4, 4, 32, 0 -; P8-LE-NEXT: rlwimi 4, 4, 16, 0, 15 +; P8-LE-NEXT: xxswapd 1, 0 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: stfd 1, 16(3) ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo20_fp_le_reuse8B: @@ -1588,22 +1467,15 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI19_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI19_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 16420 -; P9-LE-NEXT: ori 4, 4, 13107 -; P9-LE-NEXT: rldimi 4, 4, 32, 0 ; P9-LE-NEXT: stxv 0, 0(3) -; P9-LE-NEXT: rlwimi 4, 4, 16, 0, 15 -; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo20_fp_le_reuse8B: ; P10-LE: # %bb.0: # %entry ; P10-LE-NEXT: plxv 0, .LCPI19_0@PCREL(0), 1 -; P10-LE-NEXT: pli 4, 1076114227 -; P10-LE-NEXT: pli 5, 858993459 -; P10-LE-NEXT: rldimi 5, 4, 32, 0 -; P10-LE-NEXT: std 5, 16(3) ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: store <2 x double> , ptr %a, align 8 diff --git a/llvm/test/CodeGen/PowerPC/const-splat-array-init.ll b/llvm/test/CodeGen/PowerPC/const-splat-array-init.ll --- a/llvm/test/CodeGen/PowerPC/const-splat-array-init.ll +++ b/llvm/test/CodeGen/PowerPC/const-splat-array-init.ll @@ -78,28 +78,26 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C1(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 3333 -; P8-BE-NEXT: ori 4, 4, 3333 -; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: li 4, 16 ; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: stfiwx 0, 3, 4 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo2: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C1(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 3333 -; P9-BE-NEXT: ori 4, 4, 3333 -; P9-BE-NEXT: stw 4, 16(3) +; P9-BE-NEXT: li 4, 16 ; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: stfiwx 0, 3, 4 ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo2: ; P10-BE: # %bb.0: # %entry ; P10-BE-NEXT: xxspltiw 0, 218434821 -; P10-BE-NEXT: pli 4, 218434821 -; P10-BE-NEXT: stw 4, 16(3) +; P10-BE-NEXT: li 4, 16 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfiwx 0, 3, 4 ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo2: @@ -107,10 +105,9 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI1_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI1_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 3333 -; P8-LE-NEXT: ori 4, 4, 3333 +; P8-LE-NEXT: li 4, 16 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: stw 4, 16(3) +; P8-LE-NEXT: stfiwx 0, 3, 4 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo2: @@ -118,18 +115,17 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI1_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI1_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 3333 -; P9-LE-NEXT: ori 4, 4, 3333 -; P9-LE-NEXT: stw 4, 16(3) +; P9-LE-NEXT: li 4, 16 ; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: stfiwx 0, 3, 4 ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo2: ; P10-LE: # %bb.0: # %entry ; P10-LE-NEXT: xxspltiw 0, 218434821 -; P10-LE-NEXT: pli 4, 218434821 -; P10-LE-NEXT: stw 4, 16(3) +; P10-LE-NEXT: li 4, 16 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfiwx 0, 3, 4 ; P10-LE-NEXT: blr entry: store <8 x i16> , ptr %a, align 2 @@ -144,22 +140,20 @@ ; P8-BE-LABEL: foo3: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C2(2) # %const.0 +; P8-BE-NEXT: li 5, 3333 +; P8-BE-NEXT: sth 5, 20(3) ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 3333 -; P8-BE-NEXT: ori 4, 4, 3333 -; P8-BE-NEXT: stw 4, 16(3) -; P8-BE-NEXT: li 4, 3333 +; P8-BE-NEXT: li 4, 16 +; P8-BE-NEXT: stfiwx 0, 3, 4 ; P8-BE-NEXT: stxvw4x 0, 0, 3 -; P8-BE-NEXT: sth 4, 20(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo3: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C2(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 3333 -; P9-BE-NEXT: ori 4, 4, 3333 -; P9-BE-NEXT: stw 4, 16(3) +; P9-BE-NEXT: li 4, 16 +; P9-BE-NEXT: stfiwx 0, 3, 4 ; P9-BE-NEXT: li 4, 3333 ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: sth 4, 20(3) @@ -167,9 +161,9 @@ ; ; P10-BE-LABEL: foo3: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 218434821 ; P10-BE-NEXT: xxspltiw 0, 218434821 -; P10-BE-NEXT: stw 4, 16(3) +; P10-BE-NEXT: li 4, 16 +; P10-BE-NEXT: stfiwx 0, 3, 4 ; P10-BE-NEXT: li 4, 3333 ; P10-BE-NEXT: stxv 0, 0(3) ; P10-BE-NEXT: sth 4, 20(3) @@ -181,10 +175,9 @@ ; P8-LE-NEXT: li 5, 3333 ; P8-LE-NEXT: addi 4, 4, .LCPI2_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 3333 -; P8-LE-NEXT: ori 4, 4, 3333 +; P8-LE-NEXT: li 4, 16 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: stw 4, 16(3) +; P8-LE-NEXT: stfiwx 0, 3, 4 ; P8-LE-NEXT: sth 5, 20(3) ; P8-LE-NEXT: blr ; @@ -193,9 +186,8 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI2_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI2_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 3333 -; P9-LE-NEXT: ori 4, 4, 3333 -; P9-LE-NEXT: stw 4, 16(3) +; P9-LE-NEXT: li 4, 16 +; P9-LE-NEXT: stfiwx 0, 3, 4 ; P9-LE-NEXT: li 4, 3333 ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: sth 4, 20(3) @@ -203,9 +195,9 @@ ; ; P10-LE-LABEL: foo3: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 218434821 ; P10-LE-NEXT: xxspltiw 0, 218434821 -; P10-LE-NEXT: stw 4, 16(3) +; P10-LE-NEXT: li 4, 16 +; P10-LE-NEXT: stfiwx 0, 3, 4 ; P10-LE-NEXT: li 4, 3333 ; P10-LE-NEXT: stxv 0, 0(3) ; P10-LE-NEXT: sth 4, 20(3) @@ -226,31 +218,23 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C3(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 3333 -; P8-BE-NEXT: ori 4, 4, 3333 -; P8-BE-NEXT: rldimi 4, 4, 32, 0 ; P8-BE-NEXT: stxvw4x 0, 0, 3 -; P8-BE-NEXT: std 4, 16(3) +; P8-BE-NEXT: stfd 0, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo4: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C3(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 3333 -; P9-BE-NEXT: ori 4, 4, 3333 -; P9-BE-NEXT: rldimi 4, 4, 32, 0 ; P9-BE-NEXT: stxv 0, 0(3) -; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo4: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 218434821 ; P10-BE-NEXT: xxspltiw 0, 218434821 -; P10-BE-NEXT: rldimi 4, 4, 32, 0 ; P10-BE-NEXT: stxv 0, 0(3) -; P10-BE-NEXT: std 4, 16(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo4: @@ -258,11 +242,8 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI3_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI3_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 3333 -; P8-LE-NEXT: ori 4, 4, 3333 -; P8-LE-NEXT: rldimi 4, 4, 32, 0 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: stfd 0, 16(3) ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo4: @@ -270,20 +251,15 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI3_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI3_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 3333 -; P9-LE-NEXT: ori 4, 4, 3333 -; P9-LE-NEXT: rldimi 4, 4, 32, 0 ; P9-LE-NEXT: stxv 0, 0(3) -; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo4: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 218434821 ; P10-LE-NEXT: xxspltiw 0, 218434821 -; P10-LE-NEXT: rldimi 4, 4, 32, 0 ; P10-LE-NEXT: stxv 0, 0(3) -; P10-LE-NEXT: std 4, 16(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: store <8 x i16> , ptr %a, align 2 @@ -303,28 +279,26 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C4(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 5 -; P8-BE-NEXT: ori 4, 4, 5653 -; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: li 4, 16 ; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: stfiwx 0, 3, 4 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo5: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C4(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 5 -; P9-BE-NEXT: ori 4, 4, 5653 -; P9-BE-NEXT: stw 4, 16(3) +; P9-BE-NEXT: li 4, 16 ; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: stfiwx 0, 3, 4 ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo5: ; P10-BE: # %bb.0: # %entry ; P10-BE-NEXT: xxspltiw 0, 333333 -; P10-BE-NEXT: pli 4, 333333 -; P10-BE-NEXT: stw 4, 16(3) +; P10-BE-NEXT: li 4, 16 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfiwx 0, 3, 4 ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo5: @@ -332,10 +306,9 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI4_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI4_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 5 -; P8-LE-NEXT: ori 4, 4, 5653 +; P8-LE-NEXT: li 4, 16 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: stw 4, 16(3) +; P8-LE-NEXT: stfiwx 0, 3, 4 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo5: @@ -343,18 +316,17 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI4_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI4_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 5 -; P9-LE-NEXT: ori 4, 4, 5653 -; P9-LE-NEXT: stw 4, 16(3) +; P9-LE-NEXT: li 4, 16 ; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: stfiwx 0, 3, 4 ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo5: ; P10-LE: # %bb.0: # %entry ; P10-LE-NEXT: xxspltiw 0, 333333 -; P10-LE-NEXT: pli 4, 333333 -; P10-LE-NEXT: stw 4, 16(3) +; P10-LE-NEXT: li 4, 16 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfiwx 0, 3, 4 ; P10-LE-NEXT: blr entry: store <4 x i32> , ptr %a, align 4 @@ -368,31 +340,23 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C5(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 5 -; P8-BE-NEXT: ori 4, 4, 5653 -; P8-BE-NEXT: rldimi 4, 4, 32, 0 ; P8-BE-NEXT: stxvw4x 0, 0, 3 -; P8-BE-NEXT: std 4, 16(3) +; P8-BE-NEXT: stfd 0, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo6: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C5(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 5 -; P9-BE-NEXT: ori 4, 4, 5653 -; P9-BE-NEXT: rldimi 4, 4, 32, 0 ; P9-BE-NEXT: stxv 0, 0(3) -; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo6: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 333333 ; P10-BE-NEXT: xxspltiw 0, 333333 -; P10-BE-NEXT: rldimi 4, 4, 32, 0 ; P10-BE-NEXT: stxv 0, 0(3) -; P10-BE-NEXT: std 4, 16(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo6: @@ -400,11 +364,8 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI5_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI5_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 5 -; P8-LE-NEXT: ori 4, 4, 5653 -; P8-LE-NEXT: rldimi 4, 4, 32, 0 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: stfd 0, 16(3) ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo6: @@ -412,20 +373,15 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI5_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI5_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 5 -; P9-LE-NEXT: ori 4, 4, 5653 -; P9-LE-NEXT: rldimi 4, 4, 32, 0 ; P9-LE-NEXT: stxv 0, 0(3) -; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo6: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 333333 ; P10-LE-NEXT: xxspltiw 0, 333333 -; P10-LE-NEXT: rldimi 4, 4, 32, 0 ; P10-LE-NEXT: stxv 0, 0(3) -; P10-LE-NEXT: std 4, 16(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: store <4 x i32> , ptr %a, align 4 @@ -441,29 +397,24 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C6(2) # %const.0 ; P8-BE-NEXT: lxvd2x 0, 0, 4 -; P8-BE-NEXT: lis 4, 508 -; P8-BE-NEXT: ori 4, 4, 41045 -; P8-BE-NEXT: std 4, 16(3) ; P8-BE-NEXT: stxvd2x 0, 0, 3 +; P8-BE-NEXT: stfd 0, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo7: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C6(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 508 -; P9-BE-NEXT: ori 4, 4, 41045 -; P9-BE-NEXT: std 4, 16(3) ; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo7: ; P10-BE: # %bb.0: # %entry ; P10-BE-NEXT: xxlxor 0, 0, 0 -; P10-BE-NEXT: pli 4, 33333333 ; P10-BE-NEXT: xxsplti32dx 0, 1, 33333333 -; P10-BE-NEXT: std 4, 16(3) ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo7: @@ -471,10 +422,8 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI6_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI6_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 508 -; P8-LE-NEXT: ori 4, 4, 41045 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: stfd 0, 16(3) ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo7: @@ -482,19 +431,16 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI6_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI6_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 508 -; P9-LE-NEXT: ori 4, 4, 41045 -; P9-LE-NEXT: std 4, 16(3) ; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo7: ; P10-LE: # %bb.0: # %entry ; P10-LE-NEXT: xxlxor 0, 0, 0 -; P10-LE-NEXT: pli 4, 33333333 ; P10-LE-NEXT: xxsplti32dx 0, 1, 33333333 -; P10-LE-NEXT: std 4, 16(3) ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: store <2 x i64> , ptr %a, align 8 @@ -508,28 +454,26 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C7(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 16469 -; P8-BE-NEXT: ori 4, 4, 7864 -; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: li 4, 16 ; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: stfiwx 0, 3, 4 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo8: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C7(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 16469 -; P9-BE-NEXT: ori 4, 4, 7864 -; P9-BE-NEXT: stw 4, 16(3) +; P9-BE-NEXT: li 4, 16 ; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: stfiwx 0, 3, 4 ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo8: ; P10-BE: # %bb.0: # %entry ; P10-BE-NEXT: xxspltiw 0, 1079320248 -; P10-BE-NEXT: pli 4, 1079320248 -; P10-BE-NEXT: stw 4, 16(3) +; P10-BE-NEXT: li 4, 16 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfiwx 0, 3, 4 ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo8: @@ -537,10 +481,9 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI7_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI7_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 16469 -; P8-LE-NEXT: ori 4, 4, 7864 +; P8-LE-NEXT: li 4, 16 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: stw 4, 16(3) +; P8-LE-NEXT: stfiwx 0, 3, 4 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo8: @@ -548,18 +491,17 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI7_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI7_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 16469 -; P9-LE-NEXT: ori 4, 4, 7864 -; P9-LE-NEXT: stw 4, 16(3) +; P9-LE-NEXT: li 4, 16 ; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: stfiwx 0, 3, 4 ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo8: ; P10-LE: # %bb.0: # %entry ; P10-LE-NEXT: xxspltiw 0, 1079320248 -; P10-LE-NEXT: pli 4, 1079320248 -; P10-LE-NEXT: stw 4, 16(3) +; P10-LE-NEXT: li 4, 16 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfiwx 0, 3, 4 ; P10-LE-NEXT: blr entry: store <4 x float> , ptr %a, align 4 @@ -572,52 +514,34 @@ ; P8-BE-LABEL: foo9: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C8(2) # %const.0 -; P8-BE-NEXT: lis 5, 16394 -; P8-BE-NEXT: ori 5, 5, 41943 ; P8-BE-NEXT: lxvd2x 0, 0, 4 -; P8-BE-NEXT: rldic 4, 5, 32, 1 -; P8-BE-NEXT: oris 4, 4, 2621 -; P8-BE-NEXT: ori 4, 4, 28836 ; P8-BE-NEXT: stxvd2x 0, 0, 3 -; P8-BE-NEXT: std 4, 16(3) +; P8-BE-NEXT: stfd 0, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo9: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C8(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 16394 -; P9-BE-NEXT: ori 4, 4, 41943 -; P9-BE-NEXT: rldic 4, 4, 32, 1 ; P9-BE-NEXT: stxv 0, 0(3) -; P9-BE-NEXT: oris 4, 4, 2621 -; P9-BE-NEXT: ori 4, 4, 28836 -; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo9: ; P10-BE: # %bb.0: # %entry ; P10-BE-NEXT: xxsplti32dx 0, 0, 1074439127 -; P10-BE-NEXT: pli 4, 1074439127 -; P10-BE-NEXT: pli 5, 171798692 -; P10-BE-NEXT: rldimi 5, 4, 32, 0 ; P10-BE-NEXT: xxsplti32dx 0, 1, 171798692 -; P10-BE-NEXT: std 5, 16(3) ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo9: ; P8-LE: # %bb.0: # %entry ; P8-LE-NEXT: addis 4, 2, .LCPI8_0@toc@ha -; P8-LE-NEXT: lis 5, 16394 ; P8-LE-NEXT: addi 4, 4, .LCPI8_0@toc@l -; P8-LE-NEXT: ori 5, 5, 41943 ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: rldic 4, 5, 32, 1 -; P8-LE-NEXT: oris 4, 4, 2621 -; P8-LE-NEXT: ori 4, 4, 28836 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: stfd 0, 16(3) ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo9: @@ -625,24 +549,16 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI8_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI8_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 16394 -; P9-LE-NEXT: ori 4, 4, 41943 -; P9-LE-NEXT: rldic 4, 4, 32, 1 ; P9-LE-NEXT: stxv 0, 0(3) -; P9-LE-NEXT: oris 4, 4, 2621 -; P9-LE-NEXT: ori 4, 4, 28836 -; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo9: ; P10-LE: # %bb.0: # %entry ; P10-LE-NEXT: xxsplti32dx 0, 0, 1074439127 -; P10-LE-NEXT: pli 4, 1074439127 -; P10-LE-NEXT: pli 5, 171798692 -; P10-LE-NEXT: rldimi 5, 4, 32, 0 ; P10-LE-NEXT: xxsplti32dx 0, 1, 171798692 -; P10-LE-NEXT: std 5, 16(3) ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: store <2 x double> , ptr %a, align 8 @@ -656,31 +572,23 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C9(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 16469 -; P8-BE-NEXT: ori 4, 4, 7864 -; P8-BE-NEXT: rldimi 4, 4, 32, 0 ; P8-BE-NEXT: stxvw4x 0, 0, 3 -; P8-BE-NEXT: std 4, 16(3) +; P8-BE-NEXT: stfd 0, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo10: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C9(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 16469 -; P9-BE-NEXT: ori 4, 4, 7864 -; P9-BE-NEXT: rldimi 4, 4, 32, 0 ; P9-BE-NEXT: stxv 0, 0(3) -; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo10: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 1079320248 ; P10-BE-NEXT: xxspltiw 0, 1079320248 -; P10-BE-NEXT: rldimi 4, 4, 32, 0 ; P10-BE-NEXT: stxv 0, 0(3) -; P10-BE-NEXT: std 4, 16(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo10: @@ -688,11 +596,8 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI9_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI9_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 16469 -; P8-LE-NEXT: ori 4, 4, 7864 -; P8-LE-NEXT: rldimi 4, 4, 32, 0 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: stfd 0, 16(3) ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo10: @@ -700,20 +605,15 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI9_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI9_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 16469 -; P9-LE-NEXT: ori 4, 4, 7864 -; P9-LE-NEXT: rldimi 4, 4, 32, 0 ; P9-LE-NEXT: stxv 0, 0(3) -; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo10: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 1079320248 ; P10-LE-NEXT: xxspltiw 0, 1079320248 -; P10-LE-NEXT: rldimi 4, 4, 32, 0 ; P10-LE-NEXT: stxv 0, 0(3) -; P10-LE-NEXT: std 4, 16(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: store <4 x float> , ptr %a, align 4 diff --git a/llvm/test/CodeGen/PowerPC/memset-tail.ll b/llvm/test/CodeGen/PowerPC/memset-tail.ll --- a/llvm/test/CodeGen/PowerPC/memset-tail.ll +++ b/llvm/test/CodeGen/PowerPC/memset-tail.ll @@ -169,59 +169,46 @@ ; P8-BE-LABEL: memsetTailV1B8: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: vspltisb 2, 15 -; P8-BE-NEXT: lis 4, 3855 -; P8-BE-NEXT: ori 4, 4, 3855 -; P8-BE-NEXT: rldimi 4, 4, 32, 0 +; P8-BE-NEXT: li 4, 16 +; P8-BE-NEXT: stxsdx 34, 3, 4 ; P8-BE-NEXT: stxvw4x 34, 0, 3 -; P8-BE-NEXT: std 4, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memsetTailV1B8: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, 3855 ; P9-BE-NEXT: xxspltib 0, 15 -; P9-BE-NEXT: ori 4, 4, 3855 ; P9-BE-NEXT: stxv 0, 0(3) -; P9-BE-NEXT: rldimi 4, 4, 32, 0 -; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memsetTailV1B8: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 252645135 -; P10-BE-NEXT: rldimi 4, 4, 32, 0 -; P10-BE-NEXT: std 4, 16(3) ; P10-BE-NEXT: xxspltib 0, 15 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: memsetTailV1B8: ; P8-LE: # %bb.0: # %entry -; P8-LE-NEXT: lis 4, 3855 ; P8-LE-NEXT: vspltisb 2, 15 -; P8-LE-NEXT: ori 4, 4, 3855 -; P8-LE-NEXT: rldimi 4, 4, 32, 0 -; P8-LE-NEXT: std 4, 16(3) -; P8-LE-NEXT: stxvd2x 34, 0, 3 +; P8-LE-NEXT: li 4, 16 +; P8-LE-NEXT: xxswapd 0, 34 +; P8-LE-NEXT: stxsdx 34, 3, 4 +; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memsetTailV1B8: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, 3855 ; P9-LE-NEXT: xxspltib 0, 15 -; P9-LE-NEXT: ori 4, 4, 3855 ; P9-LE-NEXT: stxv 0, 0(3) -; P9-LE-NEXT: rldimi 4, 4, 32, 0 -; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memsetTailV1B8: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 252645135 -; P10-LE-NEXT: rldimi 4, 4, 32, 0 -; P10-LE-NEXT: std 4, 16(3) ; P10-LE-NEXT: xxspltib 0, 15 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: tail call void @llvm.memset.p0.i64(ptr %p, i8 15, i64 24, i1 false) @@ -231,64 +218,49 @@ define dso_local void @memsetTailV1B7(ptr nocapture noundef writeonly %p) local_unnamed_addr { ; P8-BE-LABEL: memsetTailV1B7: ; P8-BE: # %bb.0: # %entry -; P8-BE-NEXT: lis 4, 3855 ; P8-BE-NEXT: vspltisb 2, 15 -; P8-BE-NEXT: li 5, 15 -; P8-BE-NEXT: ori 4, 4, 3855 -; P8-BE-NEXT: rldimi 4, 4, 32, 0 -; P8-BE-NEXT: stdx 4, 3, 5 +; P8-BE-NEXT: li 4, 15 +; P8-BE-NEXT: stxsdx 34, 3, 4 ; P8-BE-NEXT: stxvw4x 34, 0, 3 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memsetTailV1B7: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, 3855 -; P9-BE-NEXT: li 5, 15 -; P9-BE-NEXT: ori 4, 4, 3855 -; P9-BE-NEXT: rldimi 4, 4, 32, 0 -; P9-BE-NEXT: stdx 4, 3, 5 ; P9-BE-NEXT: xxspltib 0, 15 +; P9-BE-NEXT: stfd 0, 15(3) ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memsetTailV1B7: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 252645135 -; P10-BE-NEXT: rldimi 4, 4, 32, 0 -; P10-BE-NEXT: pstd 4, 15(3), 0 ; P10-BE-NEXT: xxspltib 0, 15 +; P10-BE-NEXT: mffprd 4, 0 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: pstd 4, 15(3), 0 ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: memsetTailV1B7: ; P8-LE: # %bb.0: # %entry -; P8-LE-NEXT: lis 4, 3855 ; P8-LE-NEXT: vspltisb 2, 15 -; P8-LE-NEXT: li 5, 15 -; P8-LE-NEXT: ori 4, 4, 3855 -; P8-LE-NEXT: rldimi 4, 4, 32, 0 -; P8-LE-NEXT: stdx 4, 3, 5 -; P8-LE-NEXT: stxvd2x 34, 0, 3 +; P8-LE-NEXT: li 4, 15 +; P8-LE-NEXT: xxswapd 0, 34 +; P8-LE-NEXT: stxsdx 34, 3, 4 +; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memsetTailV1B7: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, 3855 -; P9-LE-NEXT: li 5, 15 -; P9-LE-NEXT: ori 4, 4, 3855 -; P9-LE-NEXT: rldimi 4, 4, 32, 0 -; P9-LE-NEXT: stdx 4, 3, 5 ; P9-LE-NEXT: xxspltib 0, 15 +; P9-LE-NEXT: stfd 0, 15(3) ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memsetTailV1B7: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 252645135 -; P10-LE-NEXT: rldimi 4, 4, 32, 0 -; P10-LE-NEXT: pstd 4, 15(3), 0 ; P10-LE-NEXT: xxspltib 0, 15 +; P10-LE-NEXT: mffprd 4, 0 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: pstd 4, 15(3), 0 ; P10-LE-NEXT: blr entry: tail call void @llvm.memset.p0.i64(ptr %p, i8 15, i64 23, i1 false) @@ -299,52 +271,49 @@ ; P8-BE-LABEL: memsetTailV1B4: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: vspltisb 2, 15 -; P8-BE-NEXT: lis 4, 3855 -; P8-BE-NEXT: ori 4, 4, 3855 -; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: li 4, 16 +; P8-BE-NEXT: stxsiwx 34, 3, 4 ; P8-BE-NEXT: stxvw4x 34, 0, 3 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memsetTailV1B4: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, 3855 -; P9-BE-NEXT: ori 4, 4, 3855 -; P9-BE-NEXT: stw 4, 16(3) ; P9-BE-NEXT: xxspltib 0, 15 +; P9-BE-NEXT: li 4, 16 +; P9-BE-NEXT: stfiwx 0, 3, 4 ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memsetTailV1B4: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 252645135 -; P10-BE-NEXT: stw 4, 16(3) ; P10-BE-NEXT: xxspltib 0, 15 +; P10-BE-NEXT: li 4, 16 +; P10-BE-NEXT: stfiwx 0, 3, 4 ; P10-BE-NEXT: stxv 0, 0(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: memsetTailV1B4: ; P8-LE: # %bb.0: # %entry ; P8-LE-NEXT: vspltisb 2, 15 -; P8-LE-NEXT: lis 4, 3855 -; P8-LE-NEXT: ori 4, 4, 3855 -; P8-LE-NEXT: stw 4, 16(3) -; P8-LE-NEXT: stxvd2x 34, 0, 3 +; P8-LE-NEXT: li 4, 16 +; P8-LE-NEXT: xxswapd 0, 34 +; P8-LE-NEXT: stxsiwx 34, 3, 4 +; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memsetTailV1B4: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, 3855 -; P9-LE-NEXT: ori 4, 4, 3855 -; P9-LE-NEXT: stw 4, 16(3) ; P9-LE-NEXT: xxspltib 0, 15 +; P9-LE-NEXT: li 4, 16 +; P9-LE-NEXT: stfiwx 0, 3, 4 ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memsetTailV1B4: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 252645135 -; P10-LE-NEXT: stw 4, 16(3) ; P10-LE-NEXT: xxspltib 0, 15 +; P10-LE-NEXT: li 4, 16 +; P10-LE-NEXT: stfiwx 0, 3, 4 ; P10-LE-NEXT: stxv 0, 0(3) ; P10-LE-NEXT: blr entry: @@ -356,52 +325,49 @@ ; P8-BE-LABEL: memsetTailV1B3: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: vspltisb 2, 15 -; P8-BE-NEXT: lis 4, 3855 -; P8-BE-NEXT: ori 4, 4, 3855 +; P8-BE-NEXT: li 4, 15 +; P8-BE-NEXT: stxsiwx 34, 3, 4 ; P8-BE-NEXT: stxvw4x 34, 0, 3 -; P8-BE-NEXT: stw 4, 15(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memsetTailV1B3: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, 3855 -; P9-BE-NEXT: ori 4, 4, 3855 -; P9-BE-NEXT: stw 4, 15(3) ; P9-BE-NEXT: xxspltib 0, 15 +; P9-BE-NEXT: li 4, 15 +; P9-BE-NEXT: stfiwx 0, 3, 4 ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memsetTailV1B3: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 252645135 -; P10-BE-NEXT: stw 4, 15(3) ; P10-BE-NEXT: xxspltib 0, 15 +; P10-BE-NEXT: li 4, 15 +; P10-BE-NEXT: stfiwx 0, 3, 4 ; P10-BE-NEXT: stxv 0, 0(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: memsetTailV1B3: ; P8-LE: # %bb.0: # %entry ; P8-LE-NEXT: vspltisb 2, 15 -; P8-LE-NEXT: lis 4, 3855 -; P8-LE-NEXT: ori 4, 4, 3855 -; P8-LE-NEXT: stw 4, 15(3) -; P8-LE-NEXT: stxvd2x 34, 0, 3 +; P8-LE-NEXT: li 4, 15 +; P8-LE-NEXT: xxswapd 0, 34 +; P8-LE-NEXT: stxsiwx 34, 3, 4 +; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memsetTailV1B3: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, 3855 -; P9-LE-NEXT: ori 4, 4, 3855 -; P9-LE-NEXT: stw 4, 15(3) ; P9-LE-NEXT: xxspltib 0, 15 +; P9-LE-NEXT: li 4, 15 +; P9-LE-NEXT: stfiwx 0, 3, 4 ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memsetTailV1B3: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 252645135 -; P10-LE-NEXT: stw 4, 15(3) ; P10-LE-NEXT: xxspltib 0, 15 +; P10-LE-NEXT: li 4, 15 +; P10-LE-NEXT: stfiwx 0, 3, 4 ; P10-LE-NEXT: stxv 0, 0(3) ; P10-LE-NEXT: blr entry: @@ -682,30 +648,22 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C3(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, -23131 -; P8-BE-NEXT: ori 4, 4, 42405 -; P8-BE-NEXT: rldimi 4, 4, 32, 0 +; P8-BE-NEXT: stfd 0, 16(3) ; P8-BE-NEXT: stxvw4x 0, 0, 3 -; P8-BE-NEXT: std 4, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memset2TailV1B8: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, -23131 ; P9-BE-NEXT: xxspltib 0, 165 -; P9-BE-NEXT: ori 4, 4, 42405 ; P9-BE-NEXT: stxv 0, 0(3) -; P9-BE-NEXT: rldimi 4, 4, 32, 0 -; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memset2TailV1B8: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 2779096485 -; P10-BE-NEXT: rldimi 4, 4, 32, 0 -; P10-BE-NEXT: std 4, 16(3) ; P10-BE-NEXT: xxspltib 0, 165 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: memset2TailV1B8: @@ -713,30 +671,22 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI12_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI12_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, -23131 -; P8-LE-NEXT: ori 4, 4, 42405 -; P8-LE-NEXT: rldimi 4, 4, 32, 0 -; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: stfd 0, 16(3) ; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memset2TailV1B8: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, -23131 ; P9-LE-NEXT: xxspltib 0, 165 -; P9-LE-NEXT: ori 4, 4, 42405 ; P9-LE-NEXT: stxv 0, 0(3) -; P9-LE-NEXT: rldimi 4, 4, 32, 0 -; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memset2TailV1B8: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 2779096485 -; P10-LE-NEXT: rldimi 4, 4, 32, 0 -; P10-LE-NEXT: std 4, 16(3) ; P10-LE-NEXT: xxspltib 0, 165 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: tail call void @llvm.memset.p0.i64(ptr %p, i8 165, i64 24, i1 false) @@ -747,66 +697,48 @@ ; P8-BE-LABEL: memset2TailV1B7: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C4(2) # %const.0 -; P8-BE-NEXT: lis 5, -23131 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: ori 4, 5, 42405 -; P8-BE-NEXT: li 5, 15 -; P8-BE-NEXT: rldimi 4, 4, 32, 0 -; P8-BE-NEXT: stdx 4, 3, 5 +; P8-BE-NEXT: stfd 0, 15(3) ; P8-BE-NEXT: stxvw4x 0, 0, 3 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memset2TailV1B7: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, -23131 -; P9-BE-NEXT: li 5, 15 -; P9-BE-NEXT: ori 4, 4, 42405 -; P9-BE-NEXT: rldimi 4, 4, 32, 0 -; P9-BE-NEXT: stdx 4, 3, 5 ; P9-BE-NEXT: xxspltib 0, 165 +; P9-BE-NEXT: stfd 0, 15(3) ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memset2TailV1B7: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 2779096485 -; P10-BE-NEXT: rldimi 4, 4, 32, 0 -; P10-BE-NEXT: pstd 4, 15(3), 0 ; P10-BE-NEXT: xxspltib 0, 165 +; P10-BE-NEXT: mffprd 4, 0 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: pstd 4, 15(3), 0 ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: memset2TailV1B7: ; P8-LE: # %bb.0: # %entry ; P8-LE-NEXT: addis 4, 2, .LCPI13_0@toc@ha -; P8-LE-NEXT: lis 5, -23131 ; P8-LE-NEXT: addi 4, 4, .LCPI13_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: ori 4, 5, 42405 -; P8-LE-NEXT: li 5, 15 -; P8-LE-NEXT: rldimi 4, 4, 32, 0 -; P8-LE-NEXT: stdx 4, 3, 5 +; P8-LE-NEXT: stfd 0, 15(3) ; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memset2TailV1B7: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, -23131 -; P9-LE-NEXT: li 5, 15 -; P9-LE-NEXT: ori 4, 4, 42405 -; P9-LE-NEXT: rldimi 4, 4, 32, 0 -; P9-LE-NEXT: stdx 4, 3, 5 ; P9-LE-NEXT: xxspltib 0, 165 +; P9-LE-NEXT: stfd 0, 15(3) ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memset2TailV1B7: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 2779096485 -; P10-LE-NEXT: rldimi 4, 4, 32, 0 -; P10-LE-NEXT: pstd 4, 15(3), 0 ; P10-LE-NEXT: xxspltib 0, 165 +; P10-LE-NEXT: mffprd 4, 0 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: pstd 4, 15(3), 0 ; P10-LE-NEXT: blr entry: tail call void @llvm.memset.p0.i64(ptr %p, i8 165, i64 23, i1 false) @@ -818,26 +750,24 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C5(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, -23131 -; P8-BE-NEXT: ori 4, 4, 42405 -; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: li 4, 16 +; P8-BE-NEXT: stfiwx 0, 3, 4 ; P8-BE-NEXT: stxvw4x 0, 0, 3 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memset2TailV1B4: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, -23131 -; P9-BE-NEXT: ori 4, 4, 42405 -; P9-BE-NEXT: stw 4, 16(3) ; P9-BE-NEXT: xxspltib 0, 165 +; P9-BE-NEXT: li 4, 16 +; P9-BE-NEXT: stfiwx 0, 3, 4 ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memset2TailV1B4: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, -1515870811 -; P10-BE-NEXT: stw 4, 16(3) ; P10-BE-NEXT: xxspltib 0, 165 +; P10-BE-NEXT: li 4, 16 +; P10-BE-NEXT: stfiwx 0, 3, 4 ; P10-BE-NEXT: stxv 0, 0(3) ; P10-BE-NEXT: blr ; @@ -846,26 +776,24 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI14_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI14_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, -23131 -; P8-LE-NEXT: ori 4, 4, 42405 -; P8-LE-NEXT: stw 4, 16(3) +; P8-LE-NEXT: li 4, 16 +; P8-LE-NEXT: stfiwx 0, 3, 4 ; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memset2TailV1B4: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, -23131 -; P9-LE-NEXT: ori 4, 4, 42405 -; P9-LE-NEXT: stw 4, 16(3) ; P9-LE-NEXT: xxspltib 0, 165 +; P9-LE-NEXT: li 4, 16 +; P9-LE-NEXT: stfiwx 0, 3, 4 ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memset2TailV1B4: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, -1515870811 -; P10-LE-NEXT: stw 4, 16(3) ; P10-LE-NEXT: xxspltib 0, 165 +; P10-LE-NEXT: li 4, 16 +; P10-LE-NEXT: stfiwx 0, 3, 4 ; P10-LE-NEXT: stxv 0, 0(3) ; P10-LE-NEXT: blr entry: @@ -878,26 +806,24 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C6(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, -23131 -; P8-BE-NEXT: ori 4, 4, 42405 -; P8-BE-NEXT: stw 4, 15(3) +; P8-BE-NEXT: li 4, 15 +; P8-BE-NEXT: stfiwx 0, 3, 4 ; P8-BE-NEXT: stxvw4x 0, 0, 3 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memset2TailV1B3: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, -23131 -; P9-BE-NEXT: ori 4, 4, 42405 -; P9-BE-NEXT: stw 4, 15(3) ; P9-BE-NEXT: xxspltib 0, 165 +; P9-BE-NEXT: li 4, 15 +; P9-BE-NEXT: stfiwx 0, 3, 4 ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memset2TailV1B3: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, -1515870811 -; P10-BE-NEXT: stw 4, 15(3) ; P10-BE-NEXT: xxspltib 0, 165 +; P10-BE-NEXT: li 4, 15 +; P10-BE-NEXT: stfiwx 0, 3, 4 ; P10-BE-NEXT: stxv 0, 0(3) ; P10-BE-NEXT: blr ; @@ -906,26 +832,24 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI15_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI15_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, -23131 -; P8-LE-NEXT: ori 4, 4, 42405 -; P8-LE-NEXT: stw 4, 15(3) +; P8-LE-NEXT: li 4, 15 +; P8-LE-NEXT: stfiwx 0, 3, 4 ; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memset2TailV1B3: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, -23131 -; P9-LE-NEXT: ori 4, 4, 42405 -; P9-LE-NEXT: stw 4, 15(3) ; P9-LE-NEXT: xxspltib 0, 165 +; P9-LE-NEXT: li 4, 15 +; P9-LE-NEXT: stfiwx 0, 3, 4 ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memset2TailV1B3: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, -1515870811 -; P10-LE-NEXT: stw 4, 15(3) ; P10-LE-NEXT: xxspltib 0, 165 +; P10-LE-NEXT: li 4, 15 +; P10-LE-NEXT: stfiwx 0, 3, 4 ; P10-LE-NEXT: stxv 0, 0(3) ; P10-LE-NEXT: blr entry: