diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -873,6 +873,8 @@ SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const; SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue findAndReuseSplatForConst(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -15038,6 +15038,125 @@ return Store; } +// Search for opportunity that store constant int/fp shares the same chain with +// another store constant vector, which is a splat of first store's constant. +// Convert the first store into store vector element. +SDValue +PPCTargetLowering::findAndReuseSplatForConst(SDNode *N, + DAGCombinerInfo &DCI) const { + StoreSDNode *ST = cast(N); + // Truncated stores are likely simple constants. Stores that return values are + // out-of-scope. Do not bother reuse. + if (ST->isTruncatingStore() || ST->getNumValues() != 1) + return SDValue(); + + EVT StoreVT = ST->getValue().getValueType(); + if (!StoreVT.isSimple() || StoreVT.isVector()) + return SDValue(); + if (!(StoreVT.isScalarInteger() || StoreVT.isFloatingPoint())) + return SDValue(); + // Expect Byte/Halfword/Word/Doubleword size, and check for supported + // subtarget. + auto StoreSizeInBits = ST->getMemoryVT().getSizeInBits(); + if (!llvm::isPowerOf2_64(StoreSizeInBits) || StoreSizeInBits > 64 || + StoreSizeInBits < 8) + return SDValue(); + if (StoreSizeInBits == 32 && !Subtarget.hasP8Vector()) + return SDValue(); + if ((StoreSizeInBits == 16 || StoreSizeInBits == 8) && + !Subtarget.hasP9Vector()) + return SDValue(); + + APInt ConstVal; + if (auto *Const = dyn_cast(N->getOperand(1))) { + ConstVal = Const->getAPIntValue(); + } else if (auto *Const = dyn_cast(N->getOperand(1))) { + ConstVal = Const->getValueAPF().bitcastToAPInt(); + } else + return SDValue(); + if (ConstVal.isZero() || ConstVal.isAllOnes()) + return SDValue(); + if (ConstVal.getBitWidth() != StoreSizeInBits) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + SDLoc dl(N); + + SDValue Chain = ST->getChain(); + SDValue Base = ST->getBasePtr(); + MachineMemOperand *MMO = ST->getMemOperand(); + + unsigned NumNodesExplored = 0; + const unsigned MaxSearchNodes = 3; + SDNode *RootNode = Chain.getNode(); + for (auto I = RootNode->use_begin(), E = RootNode->use_end(); + I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) { + if (*I == N) + continue; + + auto *OtherStore = dyn_cast(*I); + if (!OtherStore) + continue; + + SDValue OtherStoredVal = peekThroughBitcasts(OtherStore->getValue()); + APInt SplatVal; + if (!isTypeLegal(OtherStoredVal.getValueType()) || + !ISD::isConstantSplatVector(OtherStoredVal.getNode(), SplatVal)) + continue; + + // Check that the longer of (ConstVal/SplatVal) shall be splat of the + // shorter to allow reuse. + APInt ShorterVal = SplatVal; + APInt LongerVal = ConstVal; + if (ShorterVal.getBitWidth() > LongerVal.getBitWidth()) + std::swap(ShorterVal, LongerVal); + if (LongerVal.trunc(ShorterVal.getBitWidth()) != ShorterVal) + continue; + if (LongerVal.getBitWidth() % ShorterVal.getBitWidth() != 0) + continue; + if (!LongerVal.isSplat(ShorterVal.getBitWidth())) + continue; + + // Element type shall be f64 as expected by STXSIX and STFIWX, and the type + // should be reversed for extract_vector_elt from v2*64 types to allow + // extract_vector_elt node. + EVT ElemTy = EVT::getFloatingPointVT(64); + if (StoreSizeInBits == 64 && StoreVT.isFloatingPoint()) + ElemTy = EVT::getIntegerVT(*DAG.getContext(), 64); + + EVT VecTy = EVT::getVectorVT(*DAG.getContext(), ElemTy, 2); + SDValue Tmp1 = DAG.getNode(ISD::BITCAST, dl, VecTy, OtherStore->getValue()); + if (Tmp1.getOpcode() != ISD::BITCAST) + continue; + + unsigned ElemIdx = Subtarget.isLittleEndian() ? 1 : 0; + SDValue Tmp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemTy, Tmp1, + DAG.getVectorIdxConstant(ElemIdx, dl)); + if (Tmp2.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + continue; + + SDValue Store; + if (StoreSizeInBits == 32) { + SDValue StoreOps[] = {Chain, Tmp2, Base}; + Store = + DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, DAG.getVTList(MVT::Other), + StoreOps, StoreVT, MMO); + } else if (StoreSizeInBits == 64) { + Store = DAG.getStore(Chain, dl, Tmp2, Base, MMO); + } else { + SDValue StoreOps[] = { + Chain, Tmp2, Base, + DAG.getConstant((StoreSizeInBits / 8), dl, MVT::i32)}; + Store = + DAG.getMemIntrinsicNode(PPCISD::STXSIX, dl, DAG.getVTList(MVT::Other), + StoreOps, StoreVT, MMO); + } + DCI.AddToWorklist(Store.getNode()); + return Store; + } + return SDValue(); +} + // Handle DAG combine for STORE (FP_TO_INT F). SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N, DAGCombinerInfo &DCI) const { @@ -15597,6 +15716,12 @@ StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32)) return expandVSXStoreForLE(N, DCI); } + + // Find opportunity to reuse constant from ConstantSplatVector + if (Subtarget.hasVSX() && isa(N->getOperand(1))) + if (SDValue Val = findAndReuseSplatForConst(N, DCI)) + return Val; + break; } case ISD::LOAD: { diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -4290,6 +4290,8 @@ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), ForceXForm:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), ForceXForm:$dst), (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), ForceXForm:$dst)>; +def : Pat<(PPCstxsix (f64 (vector_extract v2f64:$S, 0)), ForceXForm:$dst, 1), + (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), ForceXForm:$dst)>; // Scalar stores of i16 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), ForceXForm:$dst), @@ -4308,6 +4310,8 @@ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), ForceXForm:$dst)>; def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), ForceXForm:$dst), (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), ForceXForm:$dst)>; +def : Pat<(PPCstxsix (f64 (vector_extract v2f64:$S, 0)), ForceXForm:$dst, 2), + (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), ForceXForm:$dst)>; } // HasVSX, HasP9Vector, IsBigEndian // Big endian 64Bit Power9 subtarget. @@ -4584,6 +4588,8 @@ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), ForceXForm:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), ForceXForm:$dst), (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), ForceXForm:$dst)>; +def : Pat<(PPCstxsix (f64 (vector_extract v2f64:$S, 1)), ForceXForm:$dst, 1), + (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), ForceXForm:$dst)>; // Scalar stores of i16 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), ForceXForm:$dst), @@ -4602,6 +4608,8 @@ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), ForceXForm:$dst)>; def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), ForceXForm:$dst), (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), ForceXForm:$dst)>; +def : Pat<(PPCstxsix (f64 (vector_extract v2f64:$S, 1)), ForceXForm:$dst, 2), + (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), ForceXForm:$dst)>; defm : ScalToVecWPermute< v2i64, (i64 (load DSForm:$src)), diff --git a/llvm/test/CodeGen/PowerPC/const-splat-array-init.ll b/llvm/test/CodeGen/PowerPC/const-splat-array-init.ll --- a/llvm/test/CodeGen/PowerPC/const-splat-array-init.ll +++ b/llvm/test/CodeGen/PowerPC/const-splat-array-init.ll @@ -26,17 +26,17 @@ ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C0(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: li 4, 3333 -; P9-BE-NEXT: sth 4, 16(3) +; P9-BE-NEXT: li 4, 16 ; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: stxsihx 0, 3, 4 ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo1: ; P10-BE: # %bb.0: # %entry ; P10-BE-NEXT: xxspltiw 0, 218434821 -; P10-BE-NEXT: li 4, 3333 -; P10-BE-NEXT: sth 4, 16(3) +; P10-BE-NEXT: li 4, 16 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stxsihx 0, 3, 4 ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo1: @@ -54,17 +54,17 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI0_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI0_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: li 4, 3333 -; P9-LE-NEXT: sth 4, 16(3) +; P9-LE-NEXT: li 4, 16 ; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: stxsihx 0, 3, 4 ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo1: ; P10-LE: # %bb.0: # %entry ; P10-LE-NEXT: xxspltiw 0, 218434821 -; P10-LE-NEXT: li 4, 3333 -; P10-LE-NEXT: sth 4, 16(3) +; P10-LE-NEXT: li 4, 16 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stxsihx 0, 3, 4 ; P10-LE-NEXT: blr entry: store <8 x i16> , ptr %a, align 2 @@ -78,28 +78,26 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C1(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 3333 -; P8-BE-NEXT: ori 4, 4, 3333 -; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: li 4, 16 ; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: stfiwx 0, 3, 4 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo2: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C1(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 3333 -; P9-BE-NEXT: ori 4, 4, 3333 -; P9-BE-NEXT: stw 4, 16(3) +; P9-BE-NEXT: li 4, 16 ; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: stfiwx 0, 3, 4 ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo2: ; P10-BE: # %bb.0: # %entry ; P10-BE-NEXT: xxspltiw 0, 218434821 -; P10-BE-NEXT: pli 4, 218434821 -; P10-BE-NEXT: stw 4, 16(3) +; P10-BE-NEXT: li 4, 16 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfiwx 0, 3, 4 ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo2: @@ -107,10 +105,9 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI1_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI1_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 3333 -; P8-LE-NEXT: ori 4, 4, 3333 +; P8-LE-NEXT: li 4, 16 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: stw 4, 16(3) +; P8-LE-NEXT: stfiwx 0, 3, 4 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo2: @@ -118,18 +115,17 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI1_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI1_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 3333 -; P9-LE-NEXT: ori 4, 4, 3333 -; P9-LE-NEXT: stw 4, 16(3) +; P9-LE-NEXT: li 4, 16 ; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: stfiwx 0, 3, 4 ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo2: ; P10-LE: # %bb.0: # %entry ; P10-LE-NEXT: xxspltiw 0, 218434821 -; P10-LE-NEXT: pli 4, 218434821 -; P10-LE-NEXT: stw 4, 16(3) +; P10-LE-NEXT: li 4, 16 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfiwx 0, 3, 4 ; P10-LE-NEXT: blr entry: store <8 x i16> , ptr %a, align 2 @@ -144,35 +140,33 @@ ; P8-BE-LABEL: foo3: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C2(2) # %const.0 +; P8-BE-NEXT: li 5, 3333 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 3333 -; P8-BE-NEXT: ori 4, 4, 3333 -; P8-BE-NEXT: stw 4, 16(3) -; P8-BE-NEXT: li 4, 3333 +; P8-BE-NEXT: li 4, 16 ; P8-BE-NEXT: stxvw4x 0, 0, 3 -; P8-BE-NEXT: sth 4, 20(3) +; P8-BE-NEXT: stfiwx 0, 3, 4 +; P8-BE-NEXT: sth 5, 20(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo3: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C2(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 3333 -; P9-BE-NEXT: ori 4, 4, 3333 -; P9-BE-NEXT: stw 4, 16(3) -; P9-BE-NEXT: li 4, 3333 +; P9-BE-NEXT: li 4, 16 ; P9-BE-NEXT: stxv 0, 0(3) -; P9-BE-NEXT: sth 4, 20(3) +; P9-BE-NEXT: stfiwx 0, 3, 4 +; P9-BE-NEXT: li 4, 20 +; P9-BE-NEXT: stxsihx 0, 3, 4 ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo3: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 218434821 ; P10-BE-NEXT: xxspltiw 0, 218434821 -; P10-BE-NEXT: stw 4, 16(3) -; P10-BE-NEXT: li 4, 3333 +; P10-BE-NEXT: li 4, 16 ; P10-BE-NEXT: stxv 0, 0(3) -; P10-BE-NEXT: sth 4, 20(3) +; P10-BE-NEXT: stfiwx 0, 3, 4 +; P10-BE-NEXT: li 4, 20 +; P10-BE-NEXT: stxsihx 0, 3, 4 ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo3: @@ -181,10 +175,9 @@ ; P8-LE-NEXT: li 5, 3333 ; P8-LE-NEXT: addi 4, 4, .LCPI2_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 3333 -; P8-LE-NEXT: ori 4, 4, 3333 +; P8-LE-NEXT: li 4, 16 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: stw 4, 16(3) +; P8-LE-NEXT: stfiwx 0, 3, 4 ; P8-LE-NEXT: sth 5, 20(3) ; P8-LE-NEXT: blr ; @@ -193,22 +186,21 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI2_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI2_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 3333 -; P9-LE-NEXT: ori 4, 4, 3333 -; P9-LE-NEXT: stw 4, 16(3) -; P9-LE-NEXT: li 4, 3333 +; P9-LE-NEXT: li 4, 16 ; P9-LE-NEXT: stxv 0, 0(3) -; P9-LE-NEXT: sth 4, 20(3) +; P9-LE-NEXT: stfiwx 0, 3, 4 +; P9-LE-NEXT: li 4, 20 +; P9-LE-NEXT: stxsihx 0, 3, 4 ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo3: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 218434821 ; P10-LE-NEXT: xxspltiw 0, 218434821 -; P10-LE-NEXT: stw 4, 16(3) -; P10-LE-NEXT: li 4, 3333 +; P10-LE-NEXT: li 4, 16 ; P10-LE-NEXT: stxv 0, 0(3) -; P10-LE-NEXT: sth 4, 20(3) +; P10-LE-NEXT: stfiwx 0, 3, 4 +; P10-LE-NEXT: li 4, 20 +; P10-LE-NEXT: stxsihx 0, 3, 4 ; P10-LE-NEXT: blr entry: store <8 x i16> , ptr %a, align 2 @@ -226,31 +218,23 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C3(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 3333 -; P8-BE-NEXT: ori 4, 4, 3333 -; P8-BE-NEXT: rldimi 4, 4, 32, 0 ; P8-BE-NEXT: stxvw4x 0, 0, 3 -; P8-BE-NEXT: std 4, 16(3) +; P8-BE-NEXT: stfd 0, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo4: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C3(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 3333 -; P9-BE-NEXT: ori 4, 4, 3333 -; P9-BE-NEXT: rldimi 4, 4, 32, 0 ; P9-BE-NEXT: stxv 0, 0(3) -; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo4: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 218434821 ; P10-BE-NEXT: xxspltiw 0, 218434821 -; P10-BE-NEXT: rldimi 4, 4, 32, 0 ; P10-BE-NEXT: stxv 0, 0(3) -; P10-BE-NEXT: std 4, 16(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo4: @@ -258,11 +242,8 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI3_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI3_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 3333 -; P8-LE-NEXT: ori 4, 4, 3333 -; P8-LE-NEXT: rldimi 4, 4, 32, 0 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: stfd 0, 16(3) ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo4: @@ -270,20 +251,15 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI3_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI3_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 3333 -; P9-LE-NEXT: ori 4, 4, 3333 -; P9-LE-NEXT: rldimi 4, 4, 32, 0 ; P9-LE-NEXT: stxv 0, 0(3) -; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo4: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 218434821 ; P10-LE-NEXT: xxspltiw 0, 218434821 -; P10-LE-NEXT: rldimi 4, 4, 32, 0 ; P10-LE-NEXT: stxv 0, 0(3) -; P10-LE-NEXT: std 4, 16(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: store <8 x i16> , ptr %a, align 2 @@ -303,28 +279,26 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C4(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 5 -; P8-BE-NEXT: ori 4, 4, 5653 -; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: li 4, 16 ; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: stfiwx 0, 3, 4 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo5: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C4(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 5 -; P9-BE-NEXT: ori 4, 4, 5653 -; P9-BE-NEXT: stw 4, 16(3) +; P9-BE-NEXT: li 4, 16 ; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: stfiwx 0, 3, 4 ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo5: ; P10-BE: # %bb.0: # %entry ; P10-BE-NEXT: xxspltiw 0, 333333 -; P10-BE-NEXT: pli 4, 333333 -; P10-BE-NEXT: stw 4, 16(3) +; P10-BE-NEXT: li 4, 16 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfiwx 0, 3, 4 ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo5: @@ -332,10 +306,9 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI4_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI4_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 5 -; P8-LE-NEXT: ori 4, 4, 5653 +; P8-LE-NEXT: li 4, 16 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: stw 4, 16(3) +; P8-LE-NEXT: stfiwx 0, 3, 4 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo5: @@ -343,18 +316,17 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI4_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI4_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 5 -; P9-LE-NEXT: ori 4, 4, 5653 -; P9-LE-NEXT: stw 4, 16(3) +; P9-LE-NEXT: li 4, 16 ; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: stfiwx 0, 3, 4 ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo5: ; P10-LE: # %bb.0: # %entry ; P10-LE-NEXT: xxspltiw 0, 333333 -; P10-LE-NEXT: pli 4, 333333 -; P10-LE-NEXT: stw 4, 16(3) +; P10-LE-NEXT: li 4, 16 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfiwx 0, 3, 4 ; P10-LE-NEXT: blr entry: store <4 x i32> , ptr %a, align 4 @@ -368,31 +340,23 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C5(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 5 -; P8-BE-NEXT: ori 4, 4, 5653 -; P8-BE-NEXT: rldimi 4, 4, 32, 0 ; P8-BE-NEXT: stxvw4x 0, 0, 3 -; P8-BE-NEXT: std 4, 16(3) +; P8-BE-NEXT: stfd 0, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo6: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C5(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 5 -; P9-BE-NEXT: ori 4, 4, 5653 -; P9-BE-NEXT: rldimi 4, 4, 32, 0 ; P9-BE-NEXT: stxv 0, 0(3) -; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo6: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 333333 ; P10-BE-NEXT: xxspltiw 0, 333333 -; P10-BE-NEXT: rldimi 4, 4, 32, 0 ; P10-BE-NEXT: stxv 0, 0(3) -; P10-BE-NEXT: std 4, 16(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo6: @@ -400,11 +364,8 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI5_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI5_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 5 -; P8-LE-NEXT: ori 4, 4, 5653 -; P8-LE-NEXT: rldimi 4, 4, 32, 0 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: stfd 0, 16(3) ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo6: @@ -412,20 +373,15 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI5_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI5_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 5 -; P9-LE-NEXT: ori 4, 4, 5653 -; P9-LE-NEXT: rldimi 4, 4, 32, 0 ; P9-LE-NEXT: stxv 0, 0(3) -; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo6: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 333333 ; P10-LE-NEXT: xxspltiw 0, 333333 -; P10-LE-NEXT: rldimi 4, 4, 32, 0 ; P10-LE-NEXT: stxv 0, 0(3) -; P10-LE-NEXT: std 4, 16(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: store <4 x i32> , ptr %a, align 4 @@ -441,29 +397,24 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C6(2) # %const.0 ; P8-BE-NEXT: lxvd2x 0, 0, 4 -; P8-BE-NEXT: lis 4, 508 -; P8-BE-NEXT: ori 4, 4, 41045 -; P8-BE-NEXT: std 4, 16(3) ; P8-BE-NEXT: stxvd2x 0, 0, 3 +; P8-BE-NEXT: stfd 0, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo7: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C6(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 508 -; P9-BE-NEXT: ori 4, 4, 41045 -; P9-BE-NEXT: std 4, 16(3) ; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo7: ; P10-BE: # %bb.0: # %entry ; P10-BE-NEXT: xxlxor 0, 0, 0 -; P10-BE-NEXT: pli 4, 33333333 ; P10-BE-NEXT: xxsplti32dx 0, 1, 33333333 -; P10-BE-NEXT: std 4, 16(3) ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo7: @@ -471,10 +422,8 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI6_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI6_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 508 -; P8-LE-NEXT: ori 4, 4, 41045 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: stfd 0, 16(3) ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo7: @@ -482,19 +431,16 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI6_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI6_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 508 -; P9-LE-NEXT: ori 4, 4, 41045 -; P9-LE-NEXT: std 4, 16(3) ; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo7: ; P10-LE: # %bb.0: # %entry ; P10-LE-NEXT: xxlxor 0, 0, 0 -; P10-LE-NEXT: pli 4, 33333333 ; P10-LE-NEXT: xxsplti32dx 0, 1, 33333333 -; P10-LE-NEXT: std 4, 16(3) ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: store <2 x i64> , ptr %a, align 8 @@ -508,28 +454,26 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C7(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 16469 -; P8-BE-NEXT: ori 4, 4, 7864 -; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: li 4, 16 ; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: stfiwx 0, 3, 4 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo8: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C7(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 16469 -; P9-BE-NEXT: ori 4, 4, 7864 -; P9-BE-NEXT: stw 4, 16(3) +; P9-BE-NEXT: li 4, 16 ; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: stfiwx 0, 3, 4 ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo8: ; P10-BE: # %bb.0: # %entry ; P10-BE-NEXT: xxspltiw 0, 1079320248 -; P10-BE-NEXT: pli 4, 1079320248 -; P10-BE-NEXT: stw 4, 16(3) +; P10-BE-NEXT: li 4, 16 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfiwx 0, 3, 4 ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo8: @@ -537,10 +481,9 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI7_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI7_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 16469 -; P8-LE-NEXT: ori 4, 4, 7864 +; P8-LE-NEXT: li 4, 16 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: stw 4, 16(3) +; P8-LE-NEXT: stfiwx 0, 3, 4 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo8: @@ -548,18 +491,17 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI7_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI7_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 16469 -; P9-LE-NEXT: ori 4, 4, 7864 -; P9-LE-NEXT: stw 4, 16(3) +; P9-LE-NEXT: li 4, 16 ; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: stfiwx 0, 3, 4 ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo8: ; P10-LE: # %bb.0: # %entry ; P10-LE-NEXT: xxspltiw 0, 1079320248 -; P10-LE-NEXT: pli 4, 1079320248 -; P10-LE-NEXT: stw 4, 16(3) +; P10-LE-NEXT: li 4, 16 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfiwx 0, 3, 4 ; P10-LE-NEXT: blr entry: store <4 x float> , ptr %a, align 4 @@ -656,31 +598,23 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C9(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 16469 -; P8-BE-NEXT: ori 4, 4, 7864 -; P8-BE-NEXT: rldimi 4, 4, 32, 0 ; P8-BE-NEXT: stxvw4x 0, 0, 3 -; P8-BE-NEXT: std 4, 16(3) +; P8-BE-NEXT: stfd 0, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo10: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C9(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 16469 -; P9-BE-NEXT: ori 4, 4, 7864 -; P9-BE-NEXT: rldimi 4, 4, 32, 0 ; P9-BE-NEXT: stxv 0, 0(3) -; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo10: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 1079320248 ; P10-BE-NEXT: xxspltiw 0, 1079320248 -; P10-BE-NEXT: rldimi 4, 4, 32, 0 ; P10-BE-NEXT: stxv 0, 0(3) -; P10-BE-NEXT: std 4, 16(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo10: @@ -688,11 +622,8 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI9_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI9_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 16469 -; P8-LE-NEXT: ori 4, 4, 7864 -; P8-LE-NEXT: rldimi 4, 4, 32, 0 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: stfd 0, 16(3) ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo10: @@ -700,20 +631,15 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI9_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI9_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 16469 -; P9-LE-NEXT: ori 4, 4, 7864 -; P9-LE-NEXT: rldimi 4, 4, 32, 0 ; P9-LE-NEXT: stxv 0, 0(3) -; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo10: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 1079320248 ; P10-LE-NEXT: xxspltiw 0, 1079320248 -; P10-LE-NEXT: rldimi 4, 4, 32, 0 ; P10-LE-NEXT: stxv 0, 0(3) -; P10-LE-NEXT: std 4, 16(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: store <4 x float> , ptr %a, align 4 diff --git a/llvm/test/CodeGen/PowerPC/memset-tail.ll b/llvm/test/CodeGen/PowerPC/memset-tail.ll --- a/llvm/test/CodeGen/PowerPC/memset-tail.ll +++ b/llvm/test/CodeGen/PowerPC/memset-tail.ll @@ -169,59 +169,46 @@ ; P8-BE-LABEL: memsetTailV1B8: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: vspltisb 2, 15 -; P8-BE-NEXT: lis 4, 3855 -; P8-BE-NEXT: ori 4, 4, 3855 -; P8-BE-NEXT: rldimi 4, 4, 32, 0 +; P8-BE-NEXT: li 4, 16 +; P8-BE-NEXT: stxsdx 34, 3, 4 ; P8-BE-NEXT: stxvw4x 34, 0, 3 -; P8-BE-NEXT: std 4, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memsetTailV1B8: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, 3855 ; P9-BE-NEXT: xxspltib 0, 15 -; P9-BE-NEXT: ori 4, 4, 3855 ; P9-BE-NEXT: stxv 0, 0(3) -; P9-BE-NEXT: rldimi 4, 4, 32, 0 -; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memsetTailV1B8: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 252645135 -; P10-BE-NEXT: rldimi 4, 4, 32, 0 -; P10-BE-NEXT: std 4, 16(3) ; P10-BE-NEXT: xxspltib 0, 15 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: memsetTailV1B8: ; P8-LE: # %bb.0: # %entry -; P8-LE-NEXT: lis 4, 3855 ; P8-LE-NEXT: vspltisb 2, 15 -; P8-LE-NEXT: ori 4, 4, 3855 -; P8-LE-NEXT: rldimi 4, 4, 32, 0 -; P8-LE-NEXT: std 4, 16(3) -; P8-LE-NEXT: stxvd2x 34, 0, 3 +; P8-LE-NEXT: li 4, 16 +; P8-LE-NEXT: xxswapd 0, 34 +; P8-LE-NEXT: stxsdx 34, 3, 4 +; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memsetTailV1B8: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, 3855 ; P9-LE-NEXT: xxspltib 0, 15 -; P9-LE-NEXT: ori 4, 4, 3855 ; P9-LE-NEXT: stxv 0, 0(3) -; P9-LE-NEXT: rldimi 4, 4, 32, 0 -; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memsetTailV1B8: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 252645135 -; P10-LE-NEXT: rldimi 4, 4, 32, 0 -; P10-LE-NEXT: std 4, 16(3) ; P10-LE-NEXT: xxspltib 0, 15 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: tail call void @llvm.memset.p0.i64(ptr %p, i8 15, i64 24, i1 false) @@ -231,64 +218,47 @@ define dso_local void @memsetTailV1B7(ptr nocapture noundef writeonly %p) local_unnamed_addr { ; P8-BE-LABEL: memsetTailV1B7: ; P8-BE: # %bb.0: # %entry -; P8-BE-NEXT: lis 4, 3855 ; P8-BE-NEXT: vspltisb 2, 15 -; P8-BE-NEXT: li 5, 15 -; P8-BE-NEXT: ori 4, 4, 3855 -; P8-BE-NEXT: rldimi 4, 4, 32, 0 -; P8-BE-NEXT: stdx 4, 3, 5 +; P8-BE-NEXT: li 4, 15 +; P8-BE-NEXT: stxsdx 34, 3, 4 ; P8-BE-NEXT: stxvw4x 34, 0, 3 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memsetTailV1B7: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, 3855 -; P9-BE-NEXT: li 5, 15 -; P9-BE-NEXT: ori 4, 4, 3855 -; P9-BE-NEXT: rldimi 4, 4, 32, 0 -; P9-BE-NEXT: stdx 4, 3, 5 ; P9-BE-NEXT: xxspltib 0, 15 +; P9-BE-NEXT: stfd 0, 15(3) ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memsetTailV1B7: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 252645135 -; P10-BE-NEXT: rldimi 4, 4, 32, 0 -; P10-BE-NEXT: pstd 4, 15(3), 0 ; P10-BE-NEXT: xxspltib 0, 15 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: pstfd 0, 15(3), 0 ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: memsetTailV1B7: ; P8-LE: # %bb.0: # %entry -; P8-LE-NEXT: lis 4, 3855 ; P8-LE-NEXT: vspltisb 2, 15 -; P8-LE-NEXT: li 5, 15 -; P8-LE-NEXT: ori 4, 4, 3855 -; P8-LE-NEXT: rldimi 4, 4, 32, 0 -; P8-LE-NEXT: stdx 4, 3, 5 -; P8-LE-NEXT: stxvd2x 34, 0, 3 +; P8-LE-NEXT: li 4, 15 +; P8-LE-NEXT: xxswapd 0, 34 +; P8-LE-NEXT: stxsdx 34, 3, 4 +; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memsetTailV1B7: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, 3855 -; P9-LE-NEXT: li 5, 15 -; P9-LE-NEXT: ori 4, 4, 3855 -; P9-LE-NEXT: rldimi 4, 4, 32, 0 -; P9-LE-NEXT: stdx 4, 3, 5 ; P9-LE-NEXT: xxspltib 0, 15 +; P9-LE-NEXT: stfd 0, 15(3) ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memsetTailV1B7: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 252645135 -; P10-LE-NEXT: rldimi 4, 4, 32, 0 -; P10-LE-NEXT: pstd 4, 15(3), 0 ; P10-LE-NEXT: xxspltib 0, 15 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: pstfd 0, 15(3), 0 ; P10-LE-NEXT: blr entry: tail call void @llvm.memset.p0.i64(ptr %p, i8 15, i64 23, i1 false) @@ -299,52 +269,49 @@ ; P8-BE-LABEL: memsetTailV1B4: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: vspltisb 2, 15 -; P8-BE-NEXT: lis 4, 3855 -; P8-BE-NEXT: ori 4, 4, 3855 -; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: li 4, 16 +; P8-BE-NEXT: stxsiwx 34, 3, 4 ; P8-BE-NEXT: stxvw4x 34, 0, 3 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memsetTailV1B4: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, 3855 -; P9-BE-NEXT: ori 4, 4, 3855 -; P9-BE-NEXT: stw 4, 16(3) ; P9-BE-NEXT: xxspltib 0, 15 +; P9-BE-NEXT: li 4, 16 +; P9-BE-NEXT: stfiwx 0, 3, 4 ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memsetTailV1B4: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 252645135 -; P10-BE-NEXT: stw 4, 16(3) ; P10-BE-NEXT: xxspltib 0, 15 +; P10-BE-NEXT: li 4, 16 +; P10-BE-NEXT: stfiwx 0, 3, 4 ; P10-BE-NEXT: stxv 0, 0(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: memsetTailV1B4: ; P8-LE: # %bb.0: # %entry ; P8-LE-NEXT: vspltisb 2, 15 -; P8-LE-NEXT: lis 4, 3855 -; P8-LE-NEXT: ori 4, 4, 3855 -; P8-LE-NEXT: stw 4, 16(3) -; P8-LE-NEXT: stxvd2x 34, 0, 3 +; P8-LE-NEXT: li 4, 16 +; P8-LE-NEXT: xxswapd 0, 34 +; P8-LE-NEXT: stxsiwx 34, 3, 4 +; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memsetTailV1B4: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, 3855 -; P9-LE-NEXT: ori 4, 4, 3855 -; P9-LE-NEXT: stw 4, 16(3) ; P9-LE-NEXT: xxspltib 0, 15 +; P9-LE-NEXT: li 4, 16 +; P9-LE-NEXT: stfiwx 0, 3, 4 ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memsetTailV1B4: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 252645135 -; P10-LE-NEXT: stw 4, 16(3) ; P10-LE-NEXT: xxspltib 0, 15 +; P10-LE-NEXT: li 4, 16 +; P10-LE-NEXT: stfiwx 0, 3, 4 ; P10-LE-NEXT: stxv 0, 0(3) ; P10-LE-NEXT: blr entry: @@ -356,52 +323,49 @@ ; P8-BE-LABEL: memsetTailV1B3: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: vspltisb 2, 15 -; P8-BE-NEXT: lis 4, 3855 -; P8-BE-NEXT: ori 4, 4, 3855 +; P8-BE-NEXT: li 4, 15 +; P8-BE-NEXT: stxsiwx 34, 3, 4 ; P8-BE-NEXT: stxvw4x 34, 0, 3 -; P8-BE-NEXT: stw 4, 15(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memsetTailV1B3: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, 3855 -; P9-BE-NEXT: ori 4, 4, 3855 -; P9-BE-NEXT: stw 4, 15(3) ; P9-BE-NEXT: xxspltib 0, 15 +; P9-BE-NEXT: li 4, 15 +; P9-BE-NEXT: stfiwx 0, 3, 4 ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memsetTailV1B3: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 252645135 -; P10-BE-NEXT: stw 4, 15(3) ; P10-BE-NEXT: xxspltib 0, 15 +; P10-BE-NEXT: li 4, 15 +; P10-BE-NEXT: stfiwx 0, 3, 4 ; P10-BE-NEXT: stxv 0, 0(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: memsetTailV1B3: ; P8-LE: # %bb.0: # %entry ; P8-LE-NEXT: vspltisb 2, 15 -; P8-LE-NEXT: lis 4, 3855 -; P8-LE-NEXT: ori 4, 4, 3855 -; P8-LE-NEXT: stw 4, 15(3) -; P8-LE-NEXT: stxvd2x 34, 0, 3 +; P8-LE-NEXT: li 4, 15 +; P8-LE-NEXT: xxswapd 0, 34 +; P8-LE-NEXT: stxsiwx 34, 3, 4 +; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memsetTailV1B3: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, 3855 -; P9-LE-NEXT: ori 4, 4, 3855 -; P9-LE-NEXT: stw 4, 15(3) ; P9-LE-NEXT: xxspltib 0, 15 +; P9-LE-NEXT: li 4, 15 +; P9-LE-NEXT: stfiwx 0, 3, 4 ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memsetTailV1B3: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 252645135 -; P10-LE-NEXT: stw 4, 15(3) ; P10-LE-NEXT: xxspltib 0, 15 +; P10-LE-NEXT: li 4, 15 +; P10-LE-NEXT: stfiwx 0, 3, 4 ; P10-LE-NEXT: stxv 0, 0(3) ; P10-LE-NEXT: blr entry: @@ -420,17 +384,17 @@ ; ; P9-BE-LABEL: memsetTailV1B2: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: li 4, 3855 -; P9-BE-NEXT: sth 4, 16(3) ; P9-BE-NEXT: xxspltib 0, 15 +; P9-BE-NEXT: li 4, 16 +; P9-BE-NEXT: stxsihx 0, 3, 4 ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memsetTailV1B2: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: li 4, 3855 -; P10-BE-NEXT: sth 4, 16(3) ; P10-BE-NEXT: xxspltib 0, 15 +; P10-BE-NEXT: li 4, 16 +; P10-BE-NEXT: stxsihx 0, 3, 4 ; P10-BE-NEXT: stxv 0, 0(3) ; P10-BE-NEXT: blr ; @@ -444,17 +408,17 @@ ; ; P9-LE-LABEL: memsetTailV1B2: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: li 4, 3855 -; P9-LE-NEXT: sth 4, 16(3) ; P9-LE-NEXT: xxspltib 0, 15 +; P9-LE-NEXT: li 4, 16 +; P9-LE-NEXT: stxsihx 0, 3, 4 ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memsetTailV1B2: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: li 4, 3855 -; P10-LE-NEXT: sth 4, 16(3) ; P10-LE-NEXT: xxspltib 0, 15 +; P10-LE-NEXT: li 4, 16 +; P10-LE-NEXT: stxsihx 0, 3, 4 ; P10-LE-NEXT: stxv 0, 0(3) ; P10-LE-NEXT: blr entry: @@ -473,17 +437,17 @@ ; ; P9-BE-LABEL: memsetTailV1B1: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: li 4, 15 -; P9-BE-NEXT: stb 4, 16(3) ; P9-BE-NEXT: xxspltib 0, 15 +; P9-BE-NEXT: li 4, 16 +; P9-BE-NEXT: stxsibx 0, 3, 4 ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memsetTailV1B1: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: li 4, 15 -; P10-BE-NEXT: stb 4, 16(3) ; P10-BE-NEXT: xxspltib 0, 15 +; P10-BE-NEXT: li 4, 16 +; P10-BE-NEXT: stxsibx 0, 3, 4 ; P10-BE-NEXT: stxv 0, 0(3) ; P10-BE-NEXT: blr ; @@ -497,17 +461,17 @@ ; ; P9-LE-LABEL: memsetTailV1B1: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: li 4, 15 -; P9-LE-NEXT: stb 4, 16(3) ; P9-LE-NEXT: xxspltib 0, 15 +; P9-LE-NEXT: li 4, 16 +; P9-LE-NEXT: stxsibx 0, 3, 4 ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memsetTailV1B1: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: li 4, 15 -; P10-LE-NEXT: stb 4, 16(3) ; P10-LE-NEXT: xxspltib 0, 15 +; P10-LE-NEXT: li 4, 16 +; P10-LE-NEXT: stxsibx 0, 3, 4 ; P10-LE-NEXT: stxv 0, 0(3) ; P10-LE-NEXT: blr entry: @@ -682,30 +646,22 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C3(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, -23131 -; P8-BE-NEXT: ori 4, 4, 42405 -; P8-BE-NEXT: rldimi 4, 4, 32, 0 +; P8-BE-NEXT: stfd 0, 16(3) ; P8-BE-NEXT: stxvw4x 0, 0, 3 -; P8-BE-NEXT: std 4, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memset2TailV1B8: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, -23131 ; P9-BE-NEXT: xxspltib 0, 165 -; P9-BE-NEXT: ori 4, 4, 42405 ; P9-BE-NEXT: stxv 0, 0(3) -; P9-BE-NEXT: rldimi 4, 4, 32, 0 -; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memset2TailV1B8: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 2779096485 -; P10-BE-NEXT: rldimi 4, 4, 32, 0 -; P10-BE-NEXT: std 4, 16(3) ; P10-BE-NEXT: xxspltib 0, 165 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: memset2TailV1B8: @@ -713,30 +669,22 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI12_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI12_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, -23131 -; P8-LE-NEXT: ori 4, 4, 42405 -; P8-LE-NEXT: rldimi 4, 4, 32, 0 -; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: stfd 0, 16(3) ; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memset2TailV1B8: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, -23131 ; P9-LE-NEXT: xxspltib 0, 165 -; P9-LE-NEXT: ori 4, 4, 42405 ; P9-LE-NEXT: stxv 0, 0(3) -; P9-LE-NEXT: rldimi 4, 4, 32, 0 -; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memset2TailV1B8: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 2779096485 -; P10-LE-NEXT: rldimi 4, 4, 32, 0 -; P10-LE-NEXT: std 4, 16(3) ; P10-LE-NEXT: xxspltib 0, 165 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: tail call void @llvm.memset.p0.i64(ptr %p, i8 165, i64 24, i1 false) @@ -747,66 +695,46 @@ ; P8-BE-LABEL: memset2TailV1B7: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C4(2) # %const.0 -; P8-BE-NEXT: lis 5, -23131 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: ori 4, 5, 42405 -; P8-BE-NEXT: li 5, 15 -; P8-BE-NEXT: rldimi 4, 4, 32, 0 -; P8-BE-NEXT: stdx 4, 3, 5 +; P8-BE-NEXT: stfd 0, 15(3) ; P8-BE-NEXT: stxvw4x 0, 0, 3 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memset2TailV1B7: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, -23131 -; P9-BE-NEXT: li 5, 15 -; P9-BE-NEXT: ori 4, 4, 42405 -; P9-BE-NEXT: rldimi 4, 4, 32, 0 -; P9-BE-NEXT: stdx 4, 3, 5 ; P9-BE-NEXT: xxspltib 0, 165 +; P9-BE-NEXT: stfd 0, 15(3) ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memset2TailV1B7: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 2779096485 -; P10-BE-NEXT: rldimi 4, 4, 32, 0 -; P10-BE-NEXT: pstd 4, 15(3), 0 ; P10-BE-NEXT: xxspltib 0, 165 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: pstfd 0, 15(3), 0 ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: memset2TailV1B7: ; P8-LE: # %bb.0: # %entry ; P8-LE-NEXT: addis 4, 2, .LCPI13_0@toc@ha -; P8-LE-NEXT: lis 5, -23131 ; P8-LE-NEXT: addi 4, 4, .LCPI13_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: ori 4, 5, 42405 -; P8-LE-NEXT: li 5, 15 -; P8-LE-NEXT: rldimi 4, 4, 32, 0 -; P8-LE-NEXT: stdx 4, 3, 5 +; P8-LE-NEXT: stfd 0, 15(3) ; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memset2TailV1B7: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, -23131 -; P9-LE-NEXT: li 5, 15 -; P9-LE-NEXT: ori 4, 4, 42405 -; P9-LE-NEXT: rldimi 4, 4, 32, 0 -; P9-LE-NEXT: stdx 4, 3, 5 ; P9-LE-NEXT: xxspltib 0, 165 +; P9-LE-NEXT: stfd 0, 15(3) ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memset2TailV1B7: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 2779096485 -; P10-LE-NEXT: rldimi 4, 4, 32, 0 -; P10-LE-NEXT: pstd 4, 15(3), 0 ; P10-LE-NEXT: xxspltib 0, 165 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: pstfd 0, 15(3), 0 ; P10-LE-NEXT: blr entry: tail call void @llvm.memset.p0.i64(ptr %p, i8 165, i64 23, i1 false) @@ -818,26 +746,24 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C5(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, -23131 -; P8-BE-NEXT: ori 4, 4, 42405 -; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: li 4, 16 +; P8-BE-NEXT: stfiwx 0, 3, 4 ; P8-BE-NEXT: stxvw4x 0, 0, 3 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memset2TailV1B4: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, -23131 -; P9-BE-NEXT: ori 4, 4, 42405 -; P9-BE-NEXT: stw 4, 16(3) ; P9-BE-NEXT: xxspltib 0, 165 +; P9-BE-NEXT: li 4, 16 +; P9-BE-NEXT: stfiwx 0, 3, 4 ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memset2TailV1B4: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, -1515870811 -; P10-BE-NEXT: stw 4, 16(3) ; P10-BE-NEXT: xxspltib 0, 165 +; P10-BE-NEXT: li 4, 16 +; P10-BE-NEXT: stfiwx 0, 3, 4 ; P10-BE-NEXT: stxv 0, 0(3) ; P10-BE-NEXT: blr ; @@ -846,26 +772,24 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI14_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI14_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, -23131 -; P8-LE-NEXT: ori 4, 4, 42405 -; P8-LE-NEXT: stw 4, 16(3) +; P8-LE-NEXT: li 4, 16 +; P8-LE-NEXT: stfiwx 0, 3, 4 ; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memset2TailV1B4: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, -23131 -; P9-LE-NEXT: ori 4, 4, 42405 -; P9-LE-NEXT: stw 4, 16(3) ; P9-LE-NEXT: xxspltib 0, 165 +; P9-LE-NEXT: li 4, 16 +; P9-LE-NEXT: stfiwx 0, 3, 4 ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memset2TailV1B4: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, -1515870811 -; P10-LE-NEXT: stw 4, 16(3) ; P10-LE-NEXT: xxspltib 0, 165 +; P10-LE-NEXT: li 4, 16 +; P10-LE-NEXT: stfiwx 0, 3, 4 ; P10-LE-NEXT: stxv 0, 0(3) ; P10-LE-NEXT: blr entry: @@ -878,26 +802,24 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C6(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, -23131 -; P8-BE-NEXT: ori 4, 4, 42405 -; P8-BE-NEXT: stw 4, 15(3) +; P8-BE-NEXT: li 4, 15 +; P8-BE-NEXT: stfiwx 0, 3, 4 ; P8-BE-NEXT: stxvw4x 0, 0, 3 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memset2TailV1B3: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, -23131 -; P9-BE-NEXT: ori 4, 4, 42405 -; P9-BE-NEXT: stw 4, 15(3) ; P9-BE-NEXT: xxspltib 0, 165 +; P9-BE-NEXT: li 4, 15 +; P9-BE-NEXT: stfiwx 0, 3, 4 ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memset2TailV1B3: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, -1515870811 -; P10-BE-NEXT: stw 4, 15(3) ; P10-BE-NEXT: xxspltib 0, 165 +; P10-BE-NEXT: li 4, 15 +; P10-BE-NEXT: stfiwx 0, 3, 4 ; P10-BE-NEXT: stxv 0, 0(3) ; P10-BE-NEXT: blr ; @@ -906,26 +828,24 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI15_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI15_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, -23131 -; P8-LE-NEXT: ori 4, 4, 42405 -; P8-LE-NEXT: stw 4, 15(3) +; P8-LE-NEXT: li 4, 15 +; P8-LE-NEXT: stfiwx 0, 3, 4 ; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memset2TailV1B3: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, -23131 -; P9-LE-NEXT: ori 4, 4, 42405 -; P9-LE-NEXT: stw 4, 15(3) ; P9-LE-NEXT: xxspltib 0, 165 +; P9-LE-NEXT: li 4, 15 +; P9-LE-NEXT: stfiwx 0, 3, 4 ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memset2TailV1B3: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, -1515870811 -; P10-LE-NEXT: stw 4, 15(3) ; P10-LE-NEXT: xxspltib 0, 165 +; P10-LE-NEXT: li 4, 15 +; P10-LE-NEXT: stfiwx 0, 3, 4 ; P10-LE-NEXT: stxv 0, 0(3) ; P10-LE-NEXT: blr entry: @@ -945,17 +865,17 @@ ; ; P9-BE-LABEL: memset2TailV1B2: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: li 4, -23131 -; P9-BE-NEXT: sth 4, 16(3) ; P9-BE-NEXT: xxspltib 0, 165 +; P9-BE-NEXT: li 4, 16 +; P9-BE-NEXT: stxsihx 0, 3, 4 ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memset2TailV1B2: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: li 4, -23131 -; P10-BE-NEXT: sth 4, 16(3) ; P10-BE-NEXT: xxspltib 0, 165 +; P10-BE-NEXT: li 4, 16 +; P10-BE-NEXT: stxsihx 0, 3, 4 ; P10-BE-NEXT: stxv 0, 0(3) ; P10-BE-NEXT: blr ; @@ -971,17 +891,17 @@ ; ; P9-LE-LABEL: memset2TailV1B2: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: li 4, -23131 -; P9-LE-NEXT: sth 4, 16(3) ; P9-LE-NEXT: xxspltib 0, 165 +; P9-LE-NEXT: li 4, 16 +; P9-LE-NEXT: stxsihx 0, 3, 4 ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memset2TailV1B2: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: li 4, -23131 -; P10-LE-NEXT: sth 4, 16(3) ; P10-LE-NEXT: xxspltib 0, 165 +; P10-LE-NEXT: li 4, 16 +; P10-LE-NEXT: stxsihx 0, 3, 4 ; P10-LE-NEXT: stxv 0, 0(3) ; P10-LE-NEXT: blr entry: @@ -1001,17 +921,17 @@ ; ; P9-BE-LABEL: memset2TailV1B1: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: li 4, -91 -; P9-BE-NEXT: stb 4, 16(3) ; P9-BE-NEXT: xxspltib 0, 165 +; P9-BE-NEXT: li 4, 16 +; P9-BE-NEXT: stxsibx 0, 3, 4 ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memset2TailV1B1: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: li 4, -91 -; P10-BE-NEXT: stb 4, 16(3) ; P10-BE-NEXT: xxspltib 0, 165 +; P10-BE-NEXT: li 4, 16 +; P10-BE-NEXT: stxsibx 0, 3, 4 ; P10-BE-NEXT: stxv 0, 0(3) ; P10-BE-NEXT: blr ; @@ -1027,17 +947,17 @@ ; ; P9-LE-LABEL: memset2TailV1B1: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: li 4, -91 -; P9-LE-NEXT: stb 4, 16(3) ; P9-LE-NEXT: xxspltib 0, 165 +; P9-LE-NEXT: li 4, 16 +; P9-LE-NEXT: stxsibx 0, 3, 4 ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memset2TailV1B1: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: li 4, -91 -; P10-LE-NEXT: stb 4, 16(3) ; P10-LE-NEXT: xxspltib 0, 165 +; P10-LE-NEXT: li 4, 16 +; P10-LE-NEXT: stxsibx 0, 3, 4 ; P10-LE-NEXT: stxv 0, 0(3) ; P10-LE-NEXT: blr entry: