diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -877,6 +877,8 @@ SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const; SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue findAndReuseSplatForConst(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -14883,6 +14883,136 @@ return Store; } +// findAndReuseSplatForConst - Search for opportunity that store constant int/fp +// shares the same chain with another store constant vector, which is a splat of +// first store's constant. Convert the first store into store vector element. +SDValue +PPCTargetLowering::findAndReuseSplatForConst(SDNode *N, + DAGCombinerInfo &DCI) const { + StoreSDNode *ST = cast(N); + + if (!ST->getValue().getValueType().isSimple()) + return SDValue(); + + if (ST->getValue().getValueType().isVector()) + return SDValue(); + + if (!ST->getValue().getValueType().isScalarInteger() && + !ST->getValue().getValueType().isFloatingPoint()) + return SDValue(); + + auto StoreSizeInBits = ST->getMemoryVT().getSizeInBits(); + if (dyn_cast(N->getOperand(1)) + ->getAPIntValue() + .getBitWidth() != StoreSizeInBits) + return SDValue(); + + if (StoreSizeInBits > 64) + return SDValue(); + + APInt BitWidth(8, StoreSizeInBits); + if (!BitWidth.isPowerOf2()) + return SDValue(); + + if (StoreSizeInBits == 32 && !Subtarget.hasP8Vector()) + return SDValue(); + + if ((StoreSizeInBits == 16 || StoreSizeInBits == 8) && + !Subtarget.hasP9Vector()) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + SDLoc dl(N); + APInt ConstVal = dyn_cast(N->getOperand(1))->getAPIntValue(); + if (ConstVal.isZero()) + return SDValue(); + + SDValue Chain = ST->getChain(); + SDValue Base = ST->getBasePtr(); + MachineMemOperand *MMO = ST->getMemOperand(); + EVT StoreValTy = ST->getValue().getValueType(); + + unsigned NumNodesExplored = 0; + const unsigned MaxSearchNodes = 4; + SDNode *RootNode = Chain.getNode(); + for (auto I = RootNode->use_begin(), E = RootNode->use_end(); + I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) { + if (*I == N) + continue; + + auto *OtherStore = dyn_cast(*I); + if (!OtherStore) + continue; + + SDValue OtherStoredVal = peekThroughBitcasts(OtherStore->getValue()); + APInt SplatVal; + if (!isTypeLegal(OtherStoredVal.getValueType()) || + !ISD::isConstantSplatVector(OtherStoredVal.getNode(), SplatVal)) + continue; + + APInt SubVal = + (SplatVal.getBitWidth() < ConstVal.getBitWidth()) ? SplatVal : ConstVal; + APInt Val = + (SplatVal.getBitWidth() < ConstVal.getBitWidth()) ? ConstVal : SplatVal; + if (Val.trunc(SubVal.getBitWidth()) != SubVal) + continue; + + if (!Val.isSplat(SubVal.getBitWidth())) + continue; + + EVT ElemTy; + // The value bit size for the new store is 64, the stored value type + // need be different from vector element type, otherwise cannot get + // extract_vector_elt node which is expected by pattern matcher. + if (OtherStoredVal.getValueType() + .getVectorElementType() + .isScalarInteger()) { + ElemTy = OtherStoredVal.getValueType() + .getVectorElementType() + .getSizeInBits() != 64 + ? EVT::getIntegerVT(*DAG.getContext(), 64) + : EVT::getFloatingPointVT(64); + } else { + ElemTy = OtherStoredVal.getValueType() + .getVectorElementType() + .getSizeInBits() != 64 + ? EVT::getFloatingPointVT(64) + : EVT::getIntegerVT(*DAG.getContext(), 64); + } + + EVT VecTy = EVT::getVectorVT(*DAG.getContext(), ElemTy, 2); + SDValue Tmp1 = DAG.getNode(ISD::BITCAST, dl, VecTy, OtherStore->getValue()); + if (Tmp1.getOpcode() != ISD::BITCAST) + continue; + + unsigned ElemIdx = Subtarget.isLittleEndian() ? 1 : 0; + SDValue Tmp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemTy, Tmp1, + DAG.getVectorIdxConstant(ElemIdx, dl)); + if (Tmp2.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + continue; + + SDValue Store; + if (StoreSizeInBits == 32) { + SDValue StoreOps[] = {Chain, Tmp2, Base}; + Store = + DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, DAG.getVTList(MVT::Other), + StoreOps, StoreValTy, MMO); + } else if (StoreSizeInBits == 64) { + Store = DAG.getStore(Chain, dl, Tmp2, Base, MMO); + } else { + SDValue StoreOps[] = { + Chain, Tmp2, Base, + DAG.getConstant((StoreSizeInBits / 8), dl, MVT::i32)}; + Store = + DAG.getMemIntrinsicNode(PPCISD::STXSIX, dl, DAG.getVTList(MVT::Other), + StoreOps, StoreValTy, MMO); + } + DCI.AddToWorklist(Store.getNode()); + return Store; + } + return SDValue(); +} + // Handle DAG combine for STORE (FP_TO_INT F). SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N, DAGCombinerInfo &DCI) const { @@ -15452,6 +15582,12 @@ StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32)) return expandVSXStoreForLE(N, DCI); } + + // Find opportunity to reuse constant from ConstantSplatVector + if (Subtarget.hasVSX() && isa(N->getOperand(1))) + if (SDValue Val = findAndReuseSplatForConst(N, DCI)) + return Val; + break; } case ISD::LOAD: { diff --git a/llvm/test/CodeGen/PowerPC/const-splat-array-init.ll b/llvm/test/CodeGen/PowerPC/const-splat-array-init.ll --- a/llvm/test/CodeGen/PowerPC/const-splat-array-init.ll +++ b/llvm/test/CodeGen/PowerPC/const-splat-array-init.ll @@ -25,18 +25,22 @@ ; P9-BE-LABEL: foo1: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C0(2) # %const.0 +; P9-BE-NEXT: li 5, 16 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: li 4, 3333 -; P9-BE-NEXT: sth 4, 16(3) +; P9-BE-NEXT: mffprd 4, 0 ; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: mtfprd 0, 4 +; P9-BE-NEXT: stxsihx 0, 3, 5 ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo1: ; P10-BE: # %bb.0: # %entry ; P10-BE-NEXT: xxspltiw 0, 218434821 -; P10-BE-NEXT: li 4, 3333 -; P10-BE-NEXT: sth 4, 16(3) +; P10-BE-NEXT: li 5, 16 +; P10-BE-NEXT: mffprd 4, 0 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: mtfprd 0, 4 +; P10-BE-NEXT: stxsihx 0, 3, 5 ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo1: @@ -52,19 +56,23 @@ ; P9-LE-LABEL: foo1: ; P9-LE: # %bb.0: # %entry ; P9-LE-NEXT: addis 4, 2, .LCPI0_0@toc@ha +; P9-LE-NEXT: li 5, 16 ; P9-LE-NEXT: addi 4, 4, .LCPI0_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: li 4, 3333 -; P9-LE-NEXT: sth 4, 16(3) +; P9-LE-NEXT: mffprd 4, 0 ; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: mtfprd 0, 4 +; P9-LE-NEXT: stxsihx 0, 3, 5 ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo1: ; P10-LE: # %bb.0: # %entry ; P10-LE-NEXT: xxspltiw 0, 218434821 -; P10-LE-NEXT: li 4, 3333 -; P10-LE-NEXT: sth 4, 16(3) +; P10-LE-NEXT: li 5, 16 +; P10-LE-NEXT: mffprd 4, 0 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: mtfprd 0, 4 +; P10-LE-NEXT: stxsihx 0, 3, 5 ; P10-LE-NEXT: blr entry: store <8 x i16> , ptr %a, align 2 @@ -77,59 +85,67 @@ ; P8-BE-LABEL: foo2: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C1(2) # %const.0 +; P8-BE-NEXT: li 5, 16 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 3333 -; P8-BE-NEXT: ori 4, 4, 3333 -; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: mffprd 4, 0 ; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: mtfprd 1, 4 +; P8-BE-NEXT: stfiwx 1, 3, 5 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo2: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C1(2) # %const.0 +; P9-BE-NEXT: li 5, 16 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 3333 -; P9-BE-NEXT: ori 4, 4, 3333 -; P9-BE-NEXT: stw 4, 16(3) +; P9-BE-NEXT: mffprd 4, 0 ; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: mtfprd 0, 4 +; P9-BE-NEXT: stfiwx 0, 3, 5 ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo2: ; P10-BE: # %bb.0: # %entry ; P10-BE-NEXT: xxspltiw 0, 218434821 -; P10-BE-NEXT: pli 4, 218434821 -; P10-BE-NEXT: stw 4, 16(3) +; P10-BE-NEXT: li 5, 16 +; P10-BE-NEXT: mffprd 4, 0 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: mtfprd 0, 4 +; P10-BE-NEXT: stfiwx 0, 3, 5 ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo2: ; P8-LE: # %bb.0: # %entry ; P8-LE-NEXT: addis 4, 2, .LCPI1_0@toc@ha +; P8-LE-NEXT: li 5, 16 ; P8-LE-NEXT: addi 4, 4, .LCPI1_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 3333 -; P8-LE-NEXT: ori 4, 4, 3333 +; P8-LE-NEXT: mffprd 4, 0 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: stw 4, 16(3) +; P8-LE-NEXT: mtfprd 1, 4 +; P8-LE-NEXT: stfiwx 1, 3, 5 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo2: ; P9-LE: # %bb.0: # %entry ; P9-LE-NEXT: addis 4, 2, .LCPI1_0@toc@ha +; P9-LE-NEXT: li 5, 16 ; P9-LE-NEXT: addi 4, 4, .LCPI1_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 3333 -; P9-LE-NEXT: ori 4, 4, 3333 -; P9-LE-NEXT: stw 4, 16(3) +; P9-LE-NEXT: mffprd 4, 0 ; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: mtfprd 0, 4 +; P9-LE-NEXT: stfiwx 0, 3, 5 ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo2: ; P10-LE: # %bb.0: # %entry ; P10-LE-NEXT: xxspltiw 0, 218434821 -; P10-LE-NEXT: pli 4, 218434821 -; P10-LE-NEXT: stw 4, 16(3) +; P10-LE-NEXT: li 5, 16 +; P10-LE-NEXT: mffprd 4, 0 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: mtfprd 0, 4 +; P10-LE-NEXT: stfiwx 0, 3, 5 ; P10-LE-NEXT: blr entry: store <8 x i16> , ptr %a, align 2 @@ -144,71 +160,79 @@ ; P8-BE-LABEL: foo3: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C2(2) # %const.0 +; P8-BE-NEXT: li 5, 16 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 3333 -; P8-BE-NEXT: ori 4, 4, 3333 -; P8-BE-NEXT: stw 4, 16(3) -; P8-BE-NEXT: li 4, 3333 +; P8-BE-NEXT: mffprd 4, 0 ; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: mtfprd 1, 4 +; P8-BE-NEXT: li 4, 3333 +; P8-BE-NEXT: stfiwx 1, 3, 5 ; P8-BE-NEXT: sth 4, 20(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo3: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C2(2) # %const.0 +; P9-BE-NEXT: li 5, 16 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 3333 -; P9-BE-NEXT: ori 4, 4, 3333 -; P9-BE-NEXT: stw 4, 16(3) -; P9-BE-NEXT: li 4, 3333 +; P9-BE-NEXT: mffprd 4, 0 ; P9-BE-NEXT: stxv 0, 0(3) -; P9-BE-NEXT: sth 4, 20(3) +; P9-BE-NEXT: mtfprd 0, 4 +; P9-BE-NEXT: li 4, 20 +; P9-BE-NEXT: stfiwx 0, 3, 5 +; P9-BE-NEXT: stxsihx 0, 3, 4 ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo3: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 218434821 ; P10-BE-NEXT: xxspltiw 0, 218434821 -; P10-BE-NEXT: stw 4, 16(3) -; P10-BE-NEXT: li 4, 3333 +; P10-BE-NEXT: li 5, 16 +; P10-BE-NEXT: mffprd 4, 0 ; P10-BE-NEXT: stxv 0, 0(3) -; P10-BE-NEXT: sth 4, 20(3) +; P10-BE-NEXT: mtfprd 0, 4 +; P10-BE-NEXT: li 4, 20 +; P10-BE-NEXT: stfiwx 0, 3, 5 +; P10-BE-NEXT: stxsihx 0, 3, 4 ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo3: ; P8-LE: # %bb.0: # %entry ; P8-LE-NEXT: addis 4, 2, .LCPI2_0@toc@ha -; P8-LE-NEXT: li 5, 3333 +; P8-LE-NEXT: li 5, 16 ; P8-LE-NEXT: addi 4, 4, .LCPI2_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 3333 -; P8-LE-NEXT: ori 4, 4, 3333 +; P8-LE-NEXT: mffprd 4, 0 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: stw 4, 16(3) -; P8-LE-NEXT: sth 5, 20(3) +; P8-LE-NEXT: mtfprd 1, 4 +; P8-LE-NEXT: li 4, 3333 +; P8-LE-NEXT: stfiwx 1, 3, 5 +; P8-LE-NEXT: sth 4, 20(3) ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo3: ; P9-LE: # %bb.0: # %entry ; P9-LE-NEXT: addis 4, 2, .LCPI2_0@toc@ha +; P9-LE-NEXT: li 5, 16 ; P9-LE-NEXT: addi 4, 4, .LCPI2_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 3333 -; P9-LE-NEXT: ori 4, 4, 3333 -; P9-LE-NEXT: stw 4, 16(3) -; P9-LE-NEXT: li 4, 3333 +; P9-LE-NEXT: mffprd 4, 0 ; P9-LE-NEXT: stxv 0, 0(3) -; P9-LE-NEXT: sth 4, 20(3) +; P9-LE-NEXT: mtfprd 0, 4 +; P9-LE-NEXT: li 4, 20 +; P9-LE-NEXT: stfiwx 0, 3, 5 +; P9-LE-NEXT: stxsihx 0, 3, 4 ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo3: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 218434821 ; P10-LE-NEXT: xxspltiw 0, 218434821 -; P10-LE-NEXT: stw 4, 16(3) -; P10-LE-NEXT: li 4, 3333 +; P10-LE-NEXT: li 5, 16 +; P10-LE-NEXT: mffprd 4, 0 ; P10-LE-NEXT: stxv 0, 0(3) -; P10-LE-NEXT: sth 4, 20(3) +; P10-LE-NEXT: mtfprd 0, 4 +; P10-LE-NEXT: li 4, 20 +; P10-LE-NEXT: stfiwx 0, 3, 5 +; P10-LE-NEXT: stxsihx 0, 3, 4 ; P10-LE-NEXT: blr entry: store <8 x i16> , ptr %a, align 2 @@ -226,31 +250,23 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C3(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 3333 -; P8-BE-NEXT: ori 4, 4, 3333 -; P8-BE-NEXT: rldimi 4, 4, 32, 0 ; P8-BE-NEXT: stxvw4x 0, 0, 3 -; P8-BE-NEXT: std 4, 16(3) +; P8-BE-NEXT: stfd 0, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo4: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C3(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 3333 -; P9-BE-NEXT: ori 4, 4, 3333 -; P9-BE-NEXT: rldimi 4, 4, 32, 0 ; P9-BE-NEXT: stxv 0, 0(3) -; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo4: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 218434821 ; P10-BE-NEXT: xxspltiw 0, 218434821 -; P10-BE-NEXT: rldimi 4, 4, 32, 0 ; P10-BE-NEXT: stxv 0, 0(3) -; P10-BE-NEXT: std 4, 16(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo4: @@ -258,11 +274,8 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI3_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI3_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 3333 -; P8-LE-NEXT: ori 4, 4, 3333 -; P8-LE-NEXT: rldimi 4, 4, 32, 0 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: stfd 0, 16(3) ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo4: @@ -270,20 +283,15 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI3_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI3_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 3333 -; P9-LE-NEXT: ori 4, 4, 3333 -; P9-LE-NEXT: rldimi 4, 4, 32, 0 ; P9-LE-NEXT: stxv 0, 0(3) -; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo4: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 218434821 ; P10-LE-NEXT: xxspltiw 0, 218434821 -; P10-LE-NEXT: rldimi 4, 4, 32, 0 ; P10-LE-NEXT: stxv 0, 0(3) -; P10-LE-NEXT: std 4, 16(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: store <8 x i16> , ptr %a, align 2 @@ -302,59 +310,67 @@ ; P8-BE-LABEL: foo5: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C4(2) # %const.0 +; P8-BE-NEXT: li 5, 16 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 5 -; P8-BE-NEXT: ori 4, 4, 5653 -; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: mffprd 4, 0 ; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: mtfprd 1, 4 +; P8-BE-NEXT: stfiwx 1, 3, 5 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo5: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C4(2) # %const.0 +; P9-BE-NEXT: li 5, 16 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 5 -; P9-BE-NEXT: ori 4, 4, 5653 -; P9-BE-NEXT: stw 4, 16(3) +; P9-BE-NEXT: mffprd 4, 0 ; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: mtfprd 0, 4 +; P9-BE-NEXT: stfiwx 0, 3, 5 ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo5: ; P10-BE: # %bb.0: # %entry ; P10-BE-NEXT: xxspltiw 0, 333333 -; P10-BE-NEXT: pli 4, 333333 -; P10-BE-NEXT: stw 4, 16(3) +; P10-BE-NEXT: li 5, 16 +; P10-BE-NEXT: mffprd 4, 0 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: mtfprd 0, 4 +; P10-BE-NEXT: stfiwx 0, 3, 5 ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo5: ; P8-LE: # %bb.0: # %entry ; P8-LE-NEXT: addis 4, 2, .LCPI4_0@toc@ha +; P8-LE-NEXT: li 5, 16 ; P8-LE-NEXT: addi 4, 4, .LCPI4_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 5 -; P8-LE-NEXT: ori 4, 4, 5653 +; P8-LE-NEXT: mffprd 4, 0 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: stw 4, 16(3) +; P8-LE-NEXT: mtfprd 1, 4 +; P8-LE-NEXT: stfiwx 1, 3, 5 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo5: ; P9-LE: # %bb.0: # %entry ; P9-LE-NEXT: addis 4, 2, .LCPI4_0@toc@ha +; P9-LE-NEXT: li 5, 16 ; P9-LE-NEXT: addi 4, 4, .LCPI4_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 5 -; P9-LE-NEXT: ori 4, 4, 5653 -; P9-LE-NEXT: stw 4, 16(3) +; P9-LE-NEXT: mffprd 4, 0 ; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: mtfprd 0, 4 +; P9-LE-NEXT: stfiwx 0, 3, 5 ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo5: ; P10-LE: # %bb.0: # %entry ; P10-LE-NEXT: xxspltiw 0, 333333 -; P10-LE-NEXT: pli 4, 333333 -; P10-LE-NEXT: stw 4, 16(3) +; P10-LE-NEXT: li 5, 16 +; P10-LE-NEXT: mffprd 4, 0 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: mtfprd 0, 4 +; P10-LE-NEXT: stfiwx 0, 3, 5 ; P10-LE-NEXT: blr entry: store <4 x i32> , ptr %a, align 4 @@ -368,31 +384,23 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C5(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 5 -; P8-BE-NEXT: ori 4, 4, 5653 -; P8-BE-NEXT: rldimi 4, 4, 32, 0 ; P8-BE-NEXT: stxvw4x 0, 0, 3 -; P8-BE-NEXT: std 4, 16(3) +; P8-BE-NEXT: stfd 0, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo6: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C5(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 5 -; P9-BE-NEXT: ori 4, 4, 5653 -; P9-BE-NEXT: rldimi 4, 4, 32, 0 ; P9-BE-NEXT: stxv 0, 0(3) -; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo6: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 333333 ; P10-BE-NEXT: xxspltiw 0, 333333 -; P10-BE-NEXT: rldimi 4, 4, 32, 0 ; P10-BE-NEXT: stxv 0, 0(3) -; P10-BE-NEXT: std 4, 16(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo6: @@ -400,11 +408,8 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI5_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI5_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 5 -; P8-LE-NEXT: ori 4, 4, 5653 -; P8-LE-NEXT: rldimi 4, 4, 32, 0 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: stfd 0, 16(3) ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo6: @@ -412,20 +417,15 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI5_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI5_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 5 -; P9-LE-NEXT: ori 4, 4, 5653 -; P9-LE-NEXT: rldimi 4, 4, 32, 0 ; P9-LE-NEXT: stxv 0, 0(3) -; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo6: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 333333 ; P10-LE-NEXT: xxspltiw 0, 333333 -; P10-LE-NEXT: rldimi 4, 4, 32, 0 ; P10-LE-NEXT: stxv 0, 0(3) -; P10-LE-NEXT: std 4, 16(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: store <4 x i32> , ptr %a, align 4 @@ -441,29 +441,24 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C6(2) # %const.0 ; P8-BE-NEXT: lxvd2x 0, 0, 4 -; P8-BE-NEXT: lis 4, 508 -; P8-BE-NEXT: ori 4, 4, 41045 -; P8-BE-NEXT: std 4, 16(3) ; P8-BE-NEXT: stxvd2x 0, 0, 3 +; P8-BE-NEXT: stfd 0, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo7: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C6(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 508 -; P9-BE-NEXT: ori 4, 4, 41045 -; P9-BE-NEXT: std 4, 16(3) ; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo7: ; P10-BE: # %bb.0: # %entry ; P10-BE-NEXT: xxlxor 0, 0, 0 -; P10-BE-NEXT: pli 4, 33333333 ; P10-BE-NEXT: xxsplti32dx 0, 1, 33333333 -; P10-BE-NEXT: std 4, 16(3) ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo7: @@ -471,10 +466,8 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI6_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI6_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 508 -; P8-LE-NEXT: ori 4, 4, 41045 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: stfd 0, 16(3) ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo7: @@ -482,19 +475,16 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI6_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI6_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 508 -; P9-LE-NEXT: ori 4, 4, 41045 -; P9-LE-NEXT: std 4, 16(3) ; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo7: ; P10-LE: # %bb.0: # %entry ; P10-LE-NEXT: xxlxor 0, 0, 0 -; P10-LE-NEXT: pli 4, 33333333 ; P10-LE-NEXT: xxsplti32dx 0, 1, 33333333 -; P10-LE-NEXT: std 4, 16(3) ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: store <2 x i64> , ptr %a, align 8 @@ -508,28 +498,26 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C7(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 16469 -; P8-BE-NEXT: ori 4, 4, 7864 -; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: li 4, 16 ; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: stfiwx 0, 3, 4 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo8: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C7(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 16469 -; P9-BE-NEXT: ori 4, 4, 7864 -; P9-BE-NEXT: stw 4, 16(3) +; P9-BE-NEXT: li 4, 16 ; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: stfiwx 0, 3, 4 ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo8: ; P10-BE: # %bb.0: # %entry ; P10-BE-NEXT: xxspltiw 0, 1079320248 -; P10-BE-NEXT: pli 4, 1079320248 -; P10-BE-NEXT: stw 4, 16(3) +; P10-BE-NEXT: li 4, 16 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfiwx 0, 3, 4 ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo8: @@ -537,10 +525,9 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI7_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI7_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 16469 -; P8-LE-NEXT: ori 4, 4, 7864 +; P8-LE-NEXT: li 4, 16 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: stw 4, 16(3) +; P8-LE-NEXT: stfiwx 0, 3, 4 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo8: @@ -548,18 +535,17 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI7_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI7_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 16469 -; P9-LE-NEXT: ori 4, 4, 7864 -; P9-LE-NEXT: stw 4, 16(3) +; P9-LE-NEXT: li 4, 16 ; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: stfiwx 0, 3, 4 ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo8: ; P10-LE: # %bb.0: # %entry ; P10-LE-NEXT: xxspltiw 0, 1079320248 -; P10-LE-NEXT: pli 4, 1079320248 -; P10-LE-NEXT: stw 4, 16(3) +; P10-LE-NEXT: li 4, 16 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfiwx 0, 3, 4 ; P10-LE-NEXT: blr entry: store <4 x float> , ptr %a, align 4 @@ -572,52 +558,34 @@ ; P8-BE-LABEL: foo9: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C8(2) # %const.0 -; P8-BE-NEXT: lis 5, 16394 -; P8-BE-NEXT: ori 5, 5, 41943 ; P8-BE-NEXT: lxvd2x 0, 0, 4 -; P8-BE-NEXT: rldic 4, 5, 32, 1 -; P8-BE-NEXT: oris 4, 4, 2621 -; P8-BE-NEXT: ori 4, 4, 28836 ; P8-BE-NEXT: stxvd2x 0, 0, 3 -; P8-BE-NEXT: std 4, 16(3) +; P8-BE-NEXT: stfd 0, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo9: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C8(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 16394 -; P9-BE-NEXT: ori 4, 4, 41943 -; P9-BE-NEXT: rldic 4, 4, 32, 1 ; P9-BE-NEXT: stxv 0, 0(3) -; P9-BE-NEXT: oris 4, 4, 2621 -; P9-BE-NEXT: ori 4, 4, 28836 -; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo9: ; P10-BE: # %bb.0: # %entry ; P10-BE-NEXT: xxsplti32dx 0, 0, 1074439127 -; P10-BE-NEXT: pli 4, 1074439127 -; P10-BE-NEXT: pli 5, 171798692 -; P10-BE-NEXT: rldimi 5, 4, 32, 0 ; P10-BE-NEXT: xxsplti32dx 0, 1, 171798692 -; P10-BE-NEXT: std 5, 16(3) ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo9: ; P8-LE: # %bb.0: # %entry ; P8-LE-NEXT: addis 4, 2, .LCPI8_0@toc@ha -; P8-LE-NEXT: lis 5, 16394 ; P8-LE-NEXT: addi 4, 4, .LCPI8_0@toc@l -; P8-LE-NEXT: ori 5, 5, 41943 ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: rldic 4, 5, 32, 1 -; P8-LE-NEXT: oris 4, 4, 2621 -; P8-LE-NEXT: ori 4, 4, 28836 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: stfd 0, 16(3) ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo9: @@ -625,24 +593,16 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI8_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI8_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 16394 -; P9-LE-NEXT: ori 4, 4, 41943 -; P9-LE-NEXT: rldic 4, 4, 32, 1 ; P9-LE-NEXT: stxv 0, 0(3) -; P9-LE-NEXT: oris 4, 4, 2621 -; P9-LE-NEXT: ori 4, 4, 28836 -; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo9: ; P10-LE: # %bb.0: # %entry ; P10-LE-NEXT: xxsplti32dx 0, 0, 1074439127 -; P10-LE-NEXT: pli 4, 1074439127 -; P10-LE-NEXT: pli 5, 171798692 -; P10-LE-NEXT: rldimi 5, 4, 32, 0 ; P10-LE-NEXT: xxsplti32dx 0, 1, 171798692 -; P10-LE-NEXT: std 5, 16(3) ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: store <2 x double> , ptr %a, align 8 @@ -656,31 +616,23 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C9(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 16469 -; P8-BE-NEXT: ori 4, 4, 7864 -; P8-BE-NEXT: rldimi 4, 4, 32, 0 ; P8-BE-NEXT: stxvw4x 0, 0, 3 -; P8-BE-NEXT: std 4, 16(3) +; P8-BE-NEXT: stfd 0, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo10: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C9(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 16469 -; P9-BE-NEXT: ori 4, 4, 7864 -; P9-BE-NEXT: rldimi 4, 4, 32, 0 ; P9-BE-NEXT: stxv 0, 0(3) -; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo10: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 1079320248 ; P10-BE-NEXT: xxspltiw 0, 1079320248 -; P10-BE-NEXT: rldimi 4, 4, 32, 0 ; P10-BE-NEXT: stxv 0, 0(3) -; P10-BE-NEXT: std 4, 16(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo10: @@ -688,11 +640,8 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI9_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI9_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 16469 -; P8-LE-NEXT: ori 4, 4, 7864 -; P8-LE-NEXT: rldimi 4, 4, 32, 0 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: stfd 0, 16(3) ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo10: @@ -700,20 +649,15 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI9_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI9_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 16469 -; P9-LE-NEXT: ori 4, 4, 7864 -; P9-LE-NEXT: rldimi 4, 4, 32, 0 ; P9-LE-NEXT: stxv 0, 0(3) -; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo10: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 1079320248 ; P10-LE-NEXT: xxspltiw 0, 1079320248 -; P10-LE-NEXT: rldimi 4, 4, 32, 0 ; P10-LE-NEXT: stxv 0, 0(3) -; P10-LE-NEXT: std 4, 16(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: store <4 x float> , ptr %a, align 4 diff --git a/llvm/test/CodeGen/PowerPC/memset-tail.ll b/llvm/test/CodeGen/PowerPC/memset-tail.ll --- a/llvm/test/CodeGen/PowerPC/memset-tail.ll +++ b/llvm/test/CodeGen/PowerPC/memset-tail.ll @@ -169,59 +169,46 @@ ; P8-BE-LABEL: memsetTailV1B8: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: vspltisb 2, 15 -; P8-BE-NEXT: lis 4, 3855 -; P8-BE-NEXT: ori 4, 4, 3855 -; P8-BE-NEXT: rldimi 4, 4, 32, 0 +; P8-BE-NEXT: li 4, 16 +; P8-BE-NEXT: stxsdx 34, 3, 4 ; P8-BE-NEXT: stxvw4x 34, 0, 3 -; P8-BE-NEXT: std 4, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memsetTailV1B8: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, 3855 ; P9-BE-NEXT: xxspltib 0, 15 -; P9-BE-NEXT: ori 4, 4, 3855 ; P9-BE-NEXT: stxv 0, 0(3) -; P9-BE-NEXT: rldimi 4, 4, 32, 0 -; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memsetTailV1B8: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 252645135 -; P10-BE-NEXT: rldimi 4, 4, 32, 0 -; P10-BE-NEXT: std 4, 16(3) ; P10-BE-NEXT: xxspltib 0, 15 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: memsetTailV1B8: ; P8-LE: # %bb.0: # %entry -; P8-LE-NEXT: lis 4, 3855 ; P8-LE-NEXT: vspltisb 2, 15 -; P8-LE-NEXT: ori 4, 4, 3855 -; P8-LE-NEXT: rldimi 4, 4, 32, 0 -; P8-LE-NEXT: std 4, 16(3) -; P8-LE-NEXT: stxvd2x 34, 0, 3 +; P8-LE-NEXT: li 4, 16 +; P8-LE-NEXT: xxswapd 0, 34 +; P8-LE-NEXT: stxsdx 34, 3, 4 +; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memsetTailV1B8: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, 3855 ; P9-LE-NEXT: xxspltib 0, 15 -; P9-LE-NEXT: ori 4, 4, 3855 ; P9-LE-NEXT: stxv 0, 0(3) -; P9-LE-NEXT: rldimi 4, 4, 32, 0 -; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memsetTailV1B8: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 252645135 -; P10-LE-NEXT: rldimi 4, 4, 32, 0 -; P10-LE-NEXT: std 4, 16(3) ; P10-LE-NEXT: xxspltib 0, 15 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: tail call void @llvm.memset.p0.i64(ptr %p, i8 15, i64 24, i1 false) @@ -231,64 +218,49 @@ define dso_local void @memsetTailV1B7(ptr nocapture noundef writeonly %p) local_unnamed_addr { ; P8-BE-LABEL: memsetTailV1B7: ; P8-BE: # %bb.0: # %entry -; P8-BE-NEXT: lis 4, 3855 ; P8-BE-NEXT: vspltisb 2, 15 -; P8-BE-NEXT: li 5, 15 -; P8-BE-NEXT: ori 4, 4, 3855 -; P8-BE-NEXT: rldimi 4, 4, 32, 0 -; P8-BE-NEXT: stdx 4, 3, 5 +; P8-BE-NEXT: li 4, 15 +; P8-BE-NEXT: stxsdx 34, 3, 4 ; P8-BE-NEXT: stxvw4x 34, 0, 3 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memsetTailV1B7: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, 3855 -; P9-BE-NEXT: li 5, 15 -; P9-BE-NEXT: ori 4, 4, 3855 -; P9-BE-NEXT: rldimi 4, 4, 32, 0 -; P9-BE-NEXT: stdx 4, 3, 5 ; P9-BE-NEXT: xxspltib 0, 15 +; P9-BE-NEXT: stfd 0, 15(3) ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memsetTailV1B7: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 252645135 -; P10-BE-NEXT: rldimi 4, 4, 32, 0 -; P10-BE-NEXT: pstd 4, 15(3), 0 ; P10-BE-NEXT: xxspltib 0, 15 +; P10-BE-NEXT: mffprd 4, 0 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: pstd 4, 15(3), 0 ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: memsetTailV1B7: ; P8-LE: # %bb.0: # %entry -; P8-LE-NEXT: lis 4, 3855 ; P8-LE-NEXT: vspltisb 2, 15 -; P8-LE-NEXT: li 5, 15 -; P8-LE-NEXT: ori 4, 4, 3855 -; P8-LE-NEXT: rldimi 4, 4, 32, 0 -; P8-LE-NEXT: stdx 4, 3, 5 -; P8-LE-NEXT: stxvd2x 34, 0, 3 +; P8-LE-NEXT: li 4, 15 +; P8-LE-NEXT: xxswapd 0, 34 +; P8-LE-NEXT: stxsdx 34, 3, 4 +; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memsetTailV1B7: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, 3855 -; P9-LE-NEXT: li 5, 15 -; P9-LE-NEXT: ori 4, 4, 3855 -; P9-LE-NEXT: rldimi 4, 4, 32, 0 -; P9-LE-NEXT: stdx 4, 3, 5 ; P9-LE-NEXT: xxspltib 0, 15 +; P9-LE-NEXT: stfd 0, 15(3) ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memsetTailV1B7: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 252645135 -; P10-LE-NEXT: rldimi 4, 4, 32, 0 -; P10-LE-NEXT: pstd 4, 15(3), 0 ; P10-LE-NEXT: xxspltib 0, 15 +; P10-LE-NEXT: mffprd 4, 0 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: pstd 4, 15(3), 0 ; P10-LE-NEXT: blr entry: tail call void @llvm.memset.p0.i64(ptr %p, i8 15, i64 23, i1 false) @@ -299,52 +271,61 @@ ; P8-BE-LABEL: memsetTailV1B4: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: vspltisb 2, 15 -; P8-BE-NEXT: lis 4, 3855 -; P8-BE-NEXT: ori 4, 4, 3855 -; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: li 5, 16 +; P8-BE-NEXT: mfvsrd 4, 34 +; P8-BE-NEXT: mtfprd 0, 4 +; P8-BE-NEXT: stfiwx 0, 3, 5 ; P8-BE-NEXT: stxvw4x 34, 0, 3 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memsetTailV1B4: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, 3855 -; P9-BE-NEXT: ori 4, 4, 3855 -; P9-BE-NEXT: stw 4, 16(3) ; P9-BE-NEXT: xxspltib 0, 15 +; P9-BE-NEXT: li 5, 16 +; P9-BE-NEXT: mffprd 4, 0 +; P9-BE-NEXT: mtfprd 1, 4 +; P9-BE-NEXT: stfiwx 1, 3, 5 ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memsetTailV1B4: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 252645135 -; P10-BE-NEXT: stw 4, 16(3) ; P10-BE-NEXT: xxspltib 0, 15 +; P10-BE-NEXT: li 5, 16 +; P10-BE-NEXT: mffprd 4, 0 +; P10-BE-NEXT: mtfprd 1, 4 +; P10-BE-NEXT: stfiwx 1, 3, 5 ; P10-BE-NEXT: stxv 0, 0(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: memsetTailV1B4: ; P8-LE: # %bb.0: # %entry ; P8-LE-NEXT: vspltisb 2, 15 -; P8-LE-NEXT: lis 4, 3855 -; P8-LE-NEXT: ori 4, 4, 3855 -; P8-LE-NEXT: stw 4, 16(3) -; P8-LE-NEXT: stxvd2x 34, 0, 3 +; P8-LE-NEXT: li 5, 16 +; P8-LE-NEXT: mfvsrd 4, 34 +; P8-LE-NEXT: xxswapd 0, 34 +; P8-LE-NEXT: mtfprd 1, 4 +; P8-LE-NEXT: stfiwx 1, 3, 5 +; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memsetTailV1B4: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, 3855 -; P9-LE-NEXT: ori 4, 4, 3855 -; P9-LE-NEXT: stw 4, 16(3) ; P9-LE-NEXT: xxspltib 0, 15 +; P9-LE-NEXT: li 5, 16 +; P9-LE-NEXT: mffprd 4, 0 +; P9-LE-NEXT: mtfprd 1, 4 +; P9-LE-NEXT: stfiwx 1, 3, 5 ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memsetTailV1B4: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 252645135 -; P10-LE-NEXT: stw 4, 16(3) ; P10-LE-NEXT: xxspltib 0, 15 +; P10-LE-NEXT: li 5, 16 +; P10-LE-NEXT: mffprd 4, 0 +; P10-LE-NEXT: mtfprd 1, 4 +; P10-LE-NEXT: stfiwx 1, 3, 5 ; P10-LE-NEXT: stxv 0, 0(3) ; P10-LE-NEXT: blr entry: @@ -356,52 +337,61 @@ ; P8-BE-LABEL: memsetTailV1B3: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: vspltisb 2, 15 -; P8-BE-NEXT: lis 4, 3855 -; P8-BE-NEXT: ori 4, 4, 3855 +; P8-BE-NEXT: li 5, 15 +; P8-BE-NEXT: mfvsrd 4, 34 +; P8-BE-NEXT: mtfprd 0, 4 +; P8-BE-NEXT: stfiwx 0, 3, 5 ; P8-BE-NEXT: stxvw4x 34, 0, 3 -; P8-BE-NEXT: stw 4, 15(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memsetTailV1B3: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, 3855 -; P9-BE-NEXT: ori 4, 4, 3855 -; P9-BE-NEXT: stw 4, 15(3) ; P9-BE-NEXT: xxspltib 0, 15 +; P9-BE-NEXT: li 5, 15 +; P9-BE-NEXT: mffprd 4, 0 +; P9-BE-NEXT: mtfprd 1, 4 +; P9-BE-NEXT: stfiwx 1, 3, 5 ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memsetTailV1B3: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 252645135 -; P10-BE-NEXT: stw 4, 15(3) ; P10-BE-NEXT: xxspltib 0, 15 +; P10-BE-NEXT: li 5, 15 +; P10-BE-NEXT: mffprd 4, 0 +; P10-BE-NEXT: mtfprd 1, 4 +; P10-BE-NEXT: stfiwx 1, 3, 5 ; P10-BE-NEXT: stxv 0, 0(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: memsetTailV1B3: ; P8-LE: # %bb.0: # %entry ; P8-LE-NEXT: vspltisb 2, 15 -; P8-LE-NEXT: lis 4, 3855 -; P8-LE-NEXT: ori 4, 4, 3855 -; P8-LE-NEXT: stw 4, 15(3) -; P8-LE-NEXT: stxvd2x 34, 0, 3 +; P8-LE-NEXT: li 5, 15 +; P8-LE-NEXT: mfvsrd 4, 34 +; P8-LE-NEXT: xxswapd 0, 34 +; P8-LE-NEXT: mtfprd 1, 4 +; P8-LE-NEXT: stfiwx 1, 3, 5 +; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memsetTailV1B3: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, 3855 -; P9-LE-NEXT: ori 4, 4, 3855 -; P9-LE-NEXT: stw 4, 15(3) ; P9-LE-NEXT: xxspltib 0, 15 +; P9-LE-NEXT: li 5, 15 +; P9-LE-NEXT: mffprd 4, 0 +; P9-LE-NEXT: mtfprd 1, 4 +; P9-LE-NEXT: stfiwx 1, 3, 5 ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memsetTailV1B3: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 252645135 -; P10-LE-NEXT: stw 4, 15(3) ; P10-LE-NEXT: xxspltib 0, 15 +; P10-LE-NEXT: li 5, 15 +; P10-LE-NEXT: mffprd 4, 0 +; P10-LE-NEXT: mtfprd 1, 4 +; P10-LE-NEXT: stfiwx 1, 3, 5 ; P10-LE-NEXT: stxv 0, 0(3) ; P10-LE-NEXT: blr entry: @@ -420,17 +410,21 @@ ; ; P9-BE-LABEL: memsetTailV1B2: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: li 4, 3855 -; P9-BE-NEXT: sth 4, 16(3) ; P9-BE-NEXT: xxspltib 0, 15 +; P9-BE-NEXT: li 5, 16 +; P9-BE-NEXT: mffprd 4, 0 +; P9-BE-NEXT: mtfprd 1, 4 +; P9-BE-NEXT: stxsihx 1, 3, 5 ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memsetTailV1B2: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: li 4, 3855 -; P10-BE-NEXT: sth 4, 16(3) ; P10-BE-NEXT: xxspltib 0, 15 +; P10-BE-NEXT: li 5, 16 +; P10-BE-NEXT: mffprd 4, 0 +; P10-BE-NEXT: mtfprd 1, 4 +; P10-BE-NEXT: stxsihx 1, 3, 5 ; P10-BE-NEXT: stxv 0, 0(3) ; P10-BE-NEXT: blr ; @@ -444,17 +438,21 @@ ; ; P9-LE-LABEL: memsetTailV1B2: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: li 4, 3855 -; P9-LE-NEXT: sth 4, 16(3) ; P9-LE-NEXT: xxspltib 0, 15 +; P9-LE-NEXT: li 5, 16 +; P9-LE-NEXT: mffprd 4, 0 +; P9-LE-NEXT: mtfprd 1, 4 +; P9-LE-NEXT: stxsihx 1, 3, 5 ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memsetTailV1B2: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: li 4, 3855 -; P10-LE-NEXT: sth 4, 16(3) ; P10-LE-NEXT: xxspltib 0, 15 +; P10-LE-NEXT: li 5, 16 +; P10-LE-NEXT: mffprd 4, 0 +; P10-LE-NEXT: mtfprd 1, 4 +; P10-LE-NEXT: stxsihx 1, 3, 5 ; P10-LE-NEXT: stxv 0, 0(3) ; P10-LE-NEXT: blr entry: @@ -473,17 +471,21 @@ ; ; P9-BE-LABEL: memsetTailV1B1: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: li 4, 15 -; P9-BE-NEXT: stb 4, 16(3) ; P9-BE-NEXT: xxspltib 0, 15 +; P9-BE-NEXT: li 5, 16 +; P9-BE-NEXT: mffprd 4, 0 +; P9-BE-NEXT: mtfprd 1, 4 +; P9-BE-NEXT: stxsibx 1, 3, 5 ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memsetTailV1B1: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: li 4, 15 -; P10-BE-NEXT: stb 4, 16(3) ; P10-BE-NEXT: xxspltib 0, 15 +; P10-BE-NEXT: li 5, 16 +; P10-BE-NEXT: mffprd 4, 0 +; P10-BE-NEXT: mtfprd 1, 4 +; P10-BE-NEXT: stxsibx 1, 3, 5 ; P10-BE-NEXT: stxv 0, 0(3) ; P10-BE-NEXT: blr ; @@ -497,17 +499,21 @@ ; ; P9-LE-LABEL: memsetTailV1B1: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: li 4, 15 -; P9-LE-NEXT: stb 4, 16(3) ; P9-LE-NEXT: xxspltib 0, 15 +; P9-LE-NEXT: li 5, 16 +; P9-LE-NEXT: mffprd 4, 0 +; P9-LE-NEXT: mtfprd 1, 4 +; P9-LE-NEXT: stxsibx 1, 3, 5 ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memsetTailV1B1: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: li 4, 15 -; P10-LE-NEXT: stb 4, 16(3) ; P10-LE-NEXT: xxspltib 0, 15 +; P10-LE-NEXT: li 5, 16 +; P10-LE-NEXT: mffprd 4, 0 +; P10-LE-NEXT: mtfprd 1, 4 +; P10-LE-NEXT: stxsibx 1, 3, 5 ; P10-LE-NEXT: stxv 0, 0(3) ; P10-LE-NEXT: blr entry: @@ -682,30 +688,22 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C3(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, -23131 -; P8-BE-NEXT: ori 4, 4, 42405 -; P8-BE-NEXT: rldimi 4, 4, 32, 0 +; P8-BE-NEXT: stfd 0, 16(3) ; P8-BE-NEXT: stxvw4x 0, 0, 3 -; P8-BE-NEXT: std 4, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memset2TailV1B8: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, -23131 ; P9-BE-NEXT: xxspltib 0, 165 -; P9-BE-NEXT: ori 4, 4, 42405 ; P9-BE-NEXT: stxv 0, 0(3) -; P9-BE-NEXT: rldimi 4, 4, 32, 0 -; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memset2TailV1B8: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 2779096485 -; P10-BE-NEXT: rldimi 4, 4, 32, 0 -; P10-BE-NEXT: std 4, 16(3) ; P10-BE-NEXT: xxspltib 0, 165 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: memset2TailV1B8: @@ -713,30 +711,22 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI12_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI12_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, -23131 -; P8-LE-NEXT: ori 4, 4, 42405 -; P8-LE-NEXT: rldimi 4, 4, 32, 0 -; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: stfd 0, 16(3) ; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memset2TailV1B8: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, -23131 ; P9-LE-NEXT: xxspltib 0, 165 -; P9-LE-NEXT: ori 4, 4, 42405 ; P9-LE-NEXT: stxv 0, 0(3) -; P9-LE-NEXT: rldimi 4, 4, 32, 0 -; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memset2TailV1B8: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 2779096485 -; P10-LE-NEXT: rldimi 4, 4, 32, 0 -; P10-LE-NEXT: std 4, 16(3) ; P10-LE-NEXT: xxspltib 0, 165 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: tail call void @llvm.memset.p0.i64(ptr %p, i8 165, i64 24, i1 false) @@ -747,66 +737,48 @@ ; P8-BE-LABEL: memset2TailV1B7: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C4(2) # %const.0 -; P8-BE-NEXT: lis 5, -23131 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: ori 4, 5, 42405 -; P8-BE-NEXT: li 5, 15 -; P8-BE-NEXT: rldimi 4, 4, 32, 0 -; P8-BE-NEXT: stdx 4, 3, 5 +; P8-BE-NEXT: stfd 0, 15(3) ; P8-BE-NEXT: stxvw4x 0, 0, 3 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memset2TailV1B7: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, -23131 -; P9-BE-NEXT: li 5, 15 -; P9-BE-NEXT: ori 4, 4, 42405 -; P9-BE-NEXT: rldimi 4, 4, 32, 0 -; P9-BE-NEXT: stdx 4, 3, 5 ; P9-BE-NEXT: xxspltib 0, 165 +; P9-BE-NEXT: stfd 0, 15(3) ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memset2TailV1B7: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 2779096485 -; P10-BE-NEXT: rldimi 4, 4, 32, 0 -; P10-BE-NEXT: pstd 4, 15(3), 0 ; P10-BE-NEXT: xxspltib 0, 165 +; P10-BE-NEXT: mffprd 4, 0 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: pstd 4, 15(3), 0 ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: memset2TailV1B7: ; P8-LE: # %bb.0: # %entry ; P8-LE-NEXT: addis 4, 2, .LCPI13_0@toc@ha -; P8-LE-NEXT: lis 5, -23131 ; P8-LE-NEXT: addi 4, 4, .LCPI13_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: ori 4, 5, 42405 -; P8-LE-NEXT: li 5, 15 -; P8-LE-NEXT: rldimi 4, 4, 32, 0 -; P8-LE-NEXT: stdx 4, 3, 5 +; P8-LE-NEXT: stfd 0, 15(3) ; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memset2TailV1B7: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, -23131 -; P9-LE-NEXT: li 5, 15 -; P9-LE-NEXT: ori 4, 4, 42405 -; P9-LE-NEXT: rldimi 4, 4, 32, 0 -; P9-LE-NEXT: stdx 4, 3, 5 ; P9-LE-NEXT: xxspltib 0, 165 +; P9-LE-NEXT: stfd 0, 15(3) ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memset2TailV1B7: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 2779096485 -; P10-LE-NEXT: rldimi 4, 4, 32, 0 -; P10-LE-NEXT: pstd 4, 15(3), 0 ; P10-LE-NEXT: xxspltib 0, 165 +; P10-LE-NEXT: mffprd 4, 0 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: pstd 4, 15(3), 0 ; P10-LE-NEXT: blr entry: tail call void @llvm.memset.p0.i64(ptr %p, i8 165, i64 23, i1 false) @@ -817,55 +789,63 @@ ; P8-BE-LABEL: memset2TailV1B4: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C5(2) # %const.0 +; P8-BE-NEXT: li 5, 16 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, -23131 -; P8-BE-NEXT: ori 4, 4, 42405 -; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: mffprd 4, 0 +; P8-BE-NEXT: mtfprd 1, 4 +; P8-BE-NEXT: stfiwx 1, 3, 5 ; P8-BE-NEXT: stxvw4x 0, 0, 3 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memset2TailV1B4: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, -23131 -; P9-BE-NEXT: ori 4, 4, 42405 -; P9-BE-NEXT: stw 4, 16(3) ; P9-BE-NEXT: xxspltib 0, 165 +; P9-BE-NEXT: li 5, 16 +; P9-BE-NEXT: mffprd 4, 0 +; P9-BE-NEXT: mtfprd 1, 4 +; P9-BE-NEXT: stfiwx 1, 3, 5 ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memset2TailV1B4: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, -1515870811 -; P10-BE-NEXT: stw 4, 16(3) ; P10-BE-NEXT: xxspltib 0, 165 +; P10-BE-NEXT: li 5, 16 +; P10-BE-NEXT: mffprd 4, 0 +; P10-BE-NEXT: mtfprd 1, 4 +; P10-BE-NEXT: stfiwx 1, 3, 5 ; P10-BE-NEXT: stxv 0, 0(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: memset2TailV1B4: ; P8-LE: # %bb.0: # %entry ; P8-LE-NEXT: addis 4, 2, .LCPI14_0@toc@ha +; P8-LE-NEXT: li 5, 16 ; P8-LE-NEXT: addi 4, 4, .LCPI14_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, -23131 -; P8-LE-NEXT: ori 4, 4, 42405 -; P8-LE-NEXT: stw 4, 16(3) +; P8-LE-NEXT: mffprd 4, 0 +; P8-LE-NEXT: mtfprd 1, 4 +; P8-LE-NEXT: stfiwx 1, 3, 5 ; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memset2TailV1B4: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, -23131 -; P9-LE-NEXT: ori 4, 4, 42405 -; P9-LE-NEXT: stw 4, 16(3) ; P9-LE-NEXT: xxspltib 0, 165 +; P9-LE-NEXT: li 5, 16 +; P9-LE-NEXT: mffprd 4, 0 +; P9-LE-NEXT: mtfprd 1, 4 +; P9-LE-NEXT: stfiwx 1, 3, 5 ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memset2TailV1B4: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, -1515870811 -; P10-LE-NEXT: stw 4, 16(3) ; P10-LE-NEXT: xxspltib 0, 165 +; P10-LE-NEXT: li 5, 16 +; P10-LE-NEXT: mffprd 4, 0 +; P10-LE-NEXT: mtfprd 1, 4 +; P10-LE-NEXT: stfiwx 1, 3, 5 ; P10-LE-NEXT: stxv 0, 0(3) ; P10-LE-NEXT: blr entry: @@ -877,55 +857,63 @@ ; P8-BE-LABEL: memset2TailV1B3: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C6(2) # %const.0 +; P8-BE-NEXT: li 5, 15 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, -23131 -; P8-BE-NEXT: ori 4, 4, 42405 -; P8-BE-NEXT: stw 4, 15(3) +; P8-BE-NEXT: mffprd 4, 0 +; P8-BE-NEXT: mtfprd 1, 4 +; P8-BE-NEXT: stfiwx 1, 3, 5 ; P8-BE-NEXT: stxvw4x 0, 0, 3 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memset2TailV1B3: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, -23131 -; P9-BE-NEXT: ori 4, 4, 42405 -; P9-BE-NEXT: stw 4, 15(3) ; P9-BE-NEXT: xxspltib 0, 165 +; P9-BE-NEXT: li 5, 15 +; P9-BE-NEXT: mffprd 4, 0 +; P9-BE-NEXT: mtfprd 1, 4 +; P9-BE-NEXT: stfiwx 1, 3, 5 ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memset2TailV1B3: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, -1515870811 -; P10-BE-NEXT: stw 4, 15(3) ; P10-BE-NEXT: xxspltib 0, 165 +; P10-BE-NEXT: li 5, 15 +; P10-BE-NEXT: mffprd 4, 0 +; P10-BE-NEXT: mtfprd 1, 4 +; P10-BE-NEXT: stfiwx 1, 3, 5 ; P10-BE-NEXT: stxv 0, 0(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: memset2TailV1B3: ; P8-LE: # %bb.0: # %entry ; P8-LE-NEXT: addis 4, 2, .LCPI15_0@toc@ha +; P8-LE-NEXT: li 5, 15 ; P8-LE-NEXT: addi 4, 4, .LCPI15_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, -23131 -; P8-LE-NEXT: ori 4, 4, 42405 -; P8-LE-NEXT: stw 4, 15(3) +; P8-LE-NEXT: mffprd 4, 0 +; P8-LE-NEXT: mtfprd 1, 4 +; P8-LE-NEXT: stfiwx 1, 3, 5 ; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memset2TailV1B3: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, -23131 -; P9-LE-NEXT: ori 4, 4, 42405 -; P9-LE-NEXT: stw 4, 15(3) ; P9-LE-NEXT: xxspltib 0, 165 +; P9-LE-NEXT: li 5, 15 +; P9-LE-NEXT: mffprd 4, 0 +; P9-LE-NEXT: mtfprd 1, 4 +; P9-LE-NEXT: stfiwx 1, 3, 5 ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memset2TailV1B3: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, -1515870811 -; P10-LE-NEXT: stw 4, 15(3) ; P10-LE-NEXT: xxspltib 0, 165 +; P10-LE-NEXT: li 5, 15 +; P10-LE-NEXT: mffprd 4, 0 +; P10-LE-NEXT: mtfprd 1, 4 +; P10-LE-NEXT: stfiwx 1, 3, 5 ; P10-LE-NEXT: stxv 0, 0(3) ; P10-LE-NEXT: blr entry: @@ -945,17 +933,21 @@ ; ; P9-BE-LABEL: memset2TailV1B2: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: li 4, -23131 -; P9-BE-NEXT: sth 4, 16(3) ; P9-BE-NEXT: xxspltib 0, 165 +; P9-BE-NEXT: li 5, 16 +; P9-BE-NEXT: mffprd 4, 0 +; P9-BE-NEXT: mtfprd 1, 4 +; P9-BE-NEXT: stxsihx 1, 3, 5 ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memset2TailV1B2: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: li 4, -23131 -; P10-BE-NEXT: sth 4, 16(3) ; P10-BE-NEXT: xxspltib 0, 165 +; P10-BE-NEXT: li 5, 16 +; P10-BE-NEXT: mffprd 4, 0 +; P10-BE-NEXT: mtfprd 1, 4 +; P10-BE-NEXT: stxsihx 1, 3, 5 ; P10-BE-NEXT: stxv 0, 0(3) ; P10-BE-NEXT: blr ; @@ -971,17 +963,21 @@ ; ; P9-LE-LABEL: memset2TailV1B2: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: li 4, -23131 -; P9-LE-NEXT: sth 4, 16(3) ; P9-LE-NEXT: xxspltib 0, 165 +; P9-LE-NEXT: li 5, 16 +; P9-LE-NEXT: mffprd 4, 0 +; P9-LE-NEXT: mtfprd 1, 4 +; P9-LE-NEXT: stxsihx 1, 3, 5 ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memset2TailV1B2: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: li 4, -23131 -; P10-LE-NEXT: sth 4, 16(3) ; P10-LE-NEXT: xxspltib 0, 165 +; P10-LE-NEXT: li 5, 16 +; P10-LE-NEXT: mffprd 4, 0 +; P10-LE-NEXT: mtfprd 1, 4 +; P10-LE-NEXT: stxsihx 1, 3, 5 ; P10-LE-NEXT: stxv 0, 0(3) ; P10-LE-NEXT: blr entry: @@ -1001,17 +997,21 @@ ; ; P9-BE-LABEL: memset2TailV1B1: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: li 4, -91 -; P9-BE-NEXT: stb 4, 16(3) ; P9-BE-NEXT: xxspltib 0, 165 +; P9-BE-NEXT: li 5, 16 +; P9-BE-NEXT: mffprd 4, 0 +; P9-BE-NEXT: mtfprd 1, 4 +; P9-BE-NEXT: stxsibx 1, 3, 5 ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memset2TailV1B1: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: li 4, -91 -; P10-BE-NEXT: stb 4, 16(3) ; P10-BE-NEXT: xxspltib 0, 165 +; P10-BE-NEXT: li 5, 16 +; P10-BE-NEXT: mffprd 4, 0 +; P10-BE-NEXT: mtfprd 1, 4 +; P10-BE-NEXT: stxsibx 1, 3, 5 ; P10-BE-NEXT: stxv 0, 0(3) ; P10-BE-NEXT: blr ; @@ -1027,17 +1027,21 @@ ; ; P9-LE-LABEL: memset2TailV1B1: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: li 4, -91 -; P9-LE-NEXT: stb 4, 16(3) ; P9-LE-NEXT: xxspltib 0, 165 +; P9-LE-NEXT: li 5, 16 +; P9-LE-NEXT: mffprd 4, 0 +; P9-LE-NEXT: mtfprd 1, 4 +; P9-LE-NEXT: stxsibx 1, 3, 5 ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memset2TailV1B1: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: li 4, -91 -; P10-LE-NEXT: stb 4, 16(3) ; P10-LE-NEXT: xxspltib 0, 165 +; P10-LE-NEXT: li 5, 16 +; P10-LE-NEXT: mffprd 4, 0 +; P10-LE-NEXT: mtfprd 1, 4 +; P10-LE-NEXT: stxsibx 1, 3, 5 ; P10-LE-NEXT: stxv 0, 0(3) ; P10-LE-NEXT: blr entry: