Index: llvm/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.h +++ llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -892,6 +892,8 @@ SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const; SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue findAndReuseSplatForConst(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -14832,6 +14832,117 @@ return Store; } +// findAndReuseSplatForConst - Search for opportunity that store constant int/fp +// shares the same chain with another store constant vector, which is a splat of +// first store's constant. Convert the first store into store vector element. +SDValue +PPCTargetLowering::findAndReuseSplatForConst(SDNode *N, + DAGCombinerInfo &DCI) const { + StoreSDNode *ST = cast(N); + + // Expect scalar int/fp, and do check if the operation bitwidth is supported + // on subtarget. + if (!(ST->getValue().getValueType().isSimple() && + (!ST->getValue().getValueType().isVector()) && + (ST->getValue().getValueType().isScalarInteger() || + ST->getValue().getValueType().isFloatingPoint()) && + dyn_cast(N->getOperand(1)) + ->getAPIntValue() + .getBitWidth() == ST->getMemoryVT().getSizeInBits() && + (ST->getMemoryVT().getSizeInBits() == 64 || + (ST->getMemoryVT().getSizeInBits() == 32 && Subtarget.hasP8Vector()) || + ((ST->getMemoryVT().getSizeInBits() == 16 || + ST->getMemoryVT().getSizeInBits() == 8) && + Subtarget.hasP9Vector())))) { + return SDValue(); + } + + SelectionDAG &DAG = DCI.DAG; + SDLoc dl(N); + APInt ConstVal = dyn_cast(N->getOperand(1))->getAPIntValue(); + SDValue Chain = ST->getChain(); + SDValue Base = ST->getBasePtr(); + MachineMemOperand *MMO = ST->getMemOperand(); + EVT StoreValTy = ST->getValue().getValueType(); + auto StoreSizeInBits = ST->getMemoryVT().getSizeInBits(); + + unsigned NumNodesExplored = 0; + const unsigned MaxSearchNodes = 16; + SDNode *RootNode = Chain.getNode(); + for (auto I = RootNode->use_begin(), E = RootNode->use_end(); + I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) { + if (auto *OtherStore = dyn_cast(*I)) { + if (*I == N) { + continue; + } + SDValue OtherStoredVal = peekThroughBitcasts(OtherStore->getValue()); + APInt SplatVal; + if (isTypeLegal(OtherStoredVal.getValueType()) && + ISD::isConstantSplatVector(OtherStoredVal.getNode(), SplatVal)) { + // Splat extend both values to equal bitwidth and then compare identity. + SplatVal = (SplatVal.getBitWidth() < ConstVal.getBitWidth()) + ? APInt::getSplat(ConstVal.getBitWidth(), SplatVal) + : SplatVal; + ConstVal = (ConstVal.getBitWidth() < SplatVal.getBitWidth()) + ? APInt::getSplat(SplatVal.getBitWidth(), ConstVal) + : ConstVal; + if (SplatVal != ConstVal) { + continue; + } + EVT ElemTy; + // The value bit size for the new store is 64, the stored value type + // need be different from vector element type, otherwise cannot get + // extract_vector_elt node which is expected by pattern matcher. + if (OtherStoredVal.getValueType() + .getVectorElementType() + .isScalarInteger()) { + ElemTy = EVT::getFloatingPointVT(64); + } else { + ElemTy = EVT::getIntegerVT(*DAG.getContext(), 64); + } + // FIXME: maybe create match rule for this case. + if (StoreSizeInBits == 32 && !(OtherStoredVal.getValueType() + .getVectorElementType() + .isScalarInteger())) { + ElemTy = EVT::getFloatingPointVT(64); + } + + EVT VecTy = EVT::getVectorVT(*DAG.getContext(), ElemTy, 2); + SDValue Tmp1 = + DAG.getNode(ISD::BITCAST, dl, VecTy, OtherStore->getValue()); + if (Tmp1.getOpcode() != ISD::BITCAST) { + continue; + } + unsigned ElemIdx = Subtarget.isLittleEndian() ? 1 : 0; + SDValue Tmp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemTy, Tmp1, + DAG.getVectorIdxConstant(ElemIdx, dl)); + if (Tmp2.getOpcode() != ISD::EXTRACT_VECTOR_ELT) { + continue; + } + SDValue Store; + if ((StoreSizeInBits / 8) == 4) { + SDValue StoreOps[] = {Chain, Tmp2, Base}; + Store = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, + DAG.getVTList(MVT::Other), StoreOps, + StoreValTy, MMO); + } else if ((StoreSizeInBits / 8) == 8) { + Store = DAG.getStore(Chain, dl, Tmp2, Base, MMO); + } else { + SDValue StoreOps[] = { + Chain, Tmp2, Base, + DAG.getConstant((StoreSizeInBits / 8), dl, MVT::i32)}; + Store = DAG.getMemIntrinsicNode(PPCISD::STXSIX, dl, + DAG.getVTList(MVT::Other), StoreOps, + StoreValTy, MMO); + } + DCI.AddToWorklist(Store.getNode()); + return Store; + } + } + } + return SDValue(); +} + // Handle DAG combine for STORE (FP_TO_INT F). SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N, DAGCombinerInfo &DCI) const { @@ -15401,6 +15512,12 @@ StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32)) return expandVSXStoreForLE(N, DCI); } + + // Find opportunity to reuse constant from ConstantSplatVector + if (Subtarget.hasVSX() && isa(N->getOperand(1))) + if (SDValue Val = findAndReuseSplatForConst(N, DCI)) + return Val; + break; } case ISD::LOAD: { Index: llvm/test/CodeGen/PowerPC/aix-vec-arg-spills-mir.ll =================================================================== --- llvm/test/CodeGen/PowerPC/aix-vec-arg-spills-mir.ll +++ llvm/test/CodeGen/PowerPC/aix-vec-arg-spills-mir.ll @@ -77,33 +77,21 @@ ; MIR32-NEXT: BLR implicit $lr, implicit $rm, implicit $f1 ; MIR64-LABEL: name: caller ; MIR64: bb.0.entry: - ; MIR64-NEXT: renamable $x3 = LI8 2049 + ; MIR64-NEXT: renamable $x3 = LI8 0 + ; MIR64-NEXT: renamable $x4 = LI8 2049 + ; MIR64-NEXT: STD killed renamable $x3, 192, $x1 :: (store (s64), align 4) + ; MIR64-NEXT: renamable $x3 = RLDIC killed renamable $x4, 51, 1 ; MIR64-NEXT: renamable $x4 = LI8 1 - ; MIR64-NEXT: renamable $x3 = RLDIC killed renamable $x3, 51, 1 - ; MIR64-NEXT: renamable $x4 = RLDIC killed renamable $x4, 62, 1 ; MIR64-NEXT: STD killed renamable $x3, 216, $x1 :: (store (s64) into unknown-address + 24, align 4) ; MIR64-NEXT: renamable $x3 = LI8 1023 - ; MIR64-NEXT: STD killed renamable $x4, 208, $x1 :: (store (s64) into unknown-address + 16, align 4) - ; MIR64-NEXT: renamable $x5 = LI8 0 + ; MIR64-NEXT: renamable $x4 = RLDIC killed renamable $x4, 62, 1 ; MIR64-NEXT: renamable $x3 = RLDIC killed renamable $x3, 52, 2 - ; MIR64-NEXT: STD renamable $x5, 192, $x1 :: (store (s64), align 4) + ; MIR64-NEXT: STD killed renamable $x4, 208, $x1 :: (store (s64) into unknown-address + 16, align 4) ; MIR64-NEXT: STD killed renamable $x3, 200, $x1 :: (store (s64) into unknown-address + 8, align 4) ; MIR64-NEXT: ADJCALLSTACKDOWN 224, 0, implicit-def dead $r1, implicit $r1 ; MIR64-NEXT: renamable $vsl0 = XXLXORz ; MIR64-NEXT: $f1 = XXLXORdpz ; MIR64-NEXT: $f2 = XXLXORdpz - ; MIR64-NEXT: $v2 = XXLXORz - ; MIR64-NEXT: $v3 = XXLXORz - ; MIR64-NEXT: $v4 = XXLXORz - ; MIR64-NEXT: $v5 = XXLXORz - ; MIR64-NEXT: $v6 = XXLXORz - ; MIR64-NEXT: $v7 = XXLXORz - ; MIR64-NEXT: $v8 = XXLXORz - ; MIR64-NEXT: $v9 = XXLXORz - ; MIR64-NEXT: $v10 = XXLXORz - ; MIR64-NEXT: $v11 = XXLXORz - ; MIR64-NEXT: $v12 = XXLXORz - ; MIR64-NEXT: $v13 = XXLXORz ; MIR64-NEXT: $f3 = XXLXORdpz ; MIR64-NEXT: $f4 = XXLXORdpz ; MIR64-NEXT: $f5 = XXLXORdpz @@ -112,25 +100,37 @@ ; MIR64-NEXT: renamable $x3 = LDtocCPT %const.0, $x2 :: (load (s64) from got) ; MIR64-NEXT: $f8 = XXLXORdpz ; MIR64-NEXT: $f9 = XXLXORdpz - ; MIR64-NEXT: renamable $x4 = LI8 160 ; MIR64-NEXT: $f10 = XXLXORdpz - ; MIR64-NEXT: renamable $x6 = LI8 144 - ; MIR64-NEXT: STXVW4X renamable $vsl0, $x1, killed renamable $x4 :: (store (s128), align 8) - ; MIR64-NEXT: renamable $v0 = LXVD2X $zero8, killed renamable $x3 :: (load (s128) from constant-pool) + ; MIR64-NEXT: renamable $x4 = LI8 160 + ; MIR64-NEXT: renamable $v2 = LXVD2X $zero8, killed renamable $x3 :: (load (s128) from constant-pool) ; MIR64-NEXT: $f11 = XXLXORdpz - ; MIR64-NEXT: renamable $x3 = LI8 128 - ; MIR64-NEXT: STXVW4X renamable $vsl0, $x1, killed renamable $x6 :: (store (s128), align 8) + ; MIR64-NEXT: renamable $x3 = LI8 144 + ; MIR64-NEXT: renamable $x5 = LI8 128 + ; MIR64-NEXT: STXVW4X renamable $vsl0, $x1, killed renamable $x4 :: (store (s128), align 8) ; MIR64-NEXT: $f12 = XXLXORdpz ; MIR64-NEXT: renamable $x4 = LI8 80 - ; MIR64-NEXT: STXVW4X killed renamable $vsl0, $x1, killed renamable $x3 :: (store (s128), align 8) - ; MIR64-NEXT: $f13 = XXLXORdpz - ; MIR64-NEXT: STXVD2X killed renamable $v0, $x1, killed renamable $x4 :: (store (s128)) + ; MIR64-NEXT: STXVW4X renamable $vsl0, $x1, killed renamable $x3 :: (store (s128), align 8) ; MIR64-NEXT: renamable $x6 = LI8 512 + ; MIR64-NEXT: $f13 = XXLXORdpz + ; MIR64-NEXT: STXVW4X renamable $vsl0, $x1, killed renamable $x5 :: (store (s128), align 8) + ; MIR64-NEXT: STXVD2X killed renamable $v2, $x1, killed renamable $x4 :: (store (s128)) ; MIR64-NEXT: $x3 = LI8 128 ; MIR64-NEXT: $x4 = LI8 256 + ; MIR64-NEXT: $v2 = COPY renamable $vsl0 + ; MIR64-NEXT: $v3 = COPY renamable $vsl0 + ; MIR64-NEXT: $v4 = COPY renamable $vsl0 + ; MIR64-NEXT: $v5 = COPY renamable $vsl0 + ; MIR64-NEXT: $v6 = COPY renamable $vsl0 + ; MIR64-NEXT: $v7 = COPY renamable $vsl0 + ; MIR64-NEXT: $v8 = COPY renamable $vsl0 + ; MIR64-NEXT: $v9 = COPY renamable $vsl0 + ; MIR64-NEXT: $v10 = COPY renamable $vsl0 + ; MIR64-NEXT: $v11 = COPY renamable $vsl0 + ; MIR64-NEXT: $v12 = COPY renamable $vsl0 + ; MIR64-NEXT: $v13 = COPY renamable $vsl0 + ; MIR64-NEXT: STFD renamable $f0, 176, $x1, implicit killed $vsl0 :: (store (s64)) ; MIR64-NEXT: STD killed renamable $x6, 184, $x1 :: (store (s64)) - ; MIR64-NEXT: STD killed renamable $x5, 176, $x1 :: (store (s64)) - ; MIR64-NEXT: BL8_NOP , csr_ppc64_altivec, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $f1, implicit $f2, implicit $v2, implicit $v3, implicit $v4, implicit $v5, implicit killed $v6, implicit killed $v7, implicit killed $v8, implicit killed $v9, implicit killed $v10, implicit killed $v11, implicit killed $v12, implicit killed $v13, implicit $f3, implicit $f4, implicit $f5, implicit $f6, implicit $f7, implicit $f8, implicit $f9, implicit $f10, implicit $f11, implicit $f12, implicit $f13, implicit $x2, implicit-def $r1, implicit-def $f1 + ; MIR64-NEXT: BL8_NOP , csr_ppc64_altivec, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $f1, implicit $f2, implicit $v2, implicit $v3, implicit $v4, implicit $v5, implicit $v6, implicit $v7, implicit $v8, implicit $v9, implicit $v10, implicit $v11, implicit $v12, implicit $v13, implicit $f3, implicit $f4, implicit $f5, implicit $f6, implicit $f7, implicit $f8, implicit $f9, implicit $f10, implicit $f11, implicit $f12, implicit $f13, implicit $x2, implicit-def $r1, implicit-def $f1 ; MIR64-NEXT: ADJCALLSTACKUP 224, 0, implicit-def dead $r1, implicit $r1 ; MIR64-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $f1 entry: Index: llvm/test/CodeGen/PowerPC/aix-vec-arg-spills.ll =================================================================== --- llvm/test/CodeGen/PowerPC/aix-vec-arg-spills.ll +++ llvm/test/CodeGen/PowerPC/aix-vec-arg-spills.ll @@ -83,58 +83,59 @@ ; 64BIT: # %bb.0: # %entry ; 64BIT-NEXT: mflr 0 ; 64BIT-NEXT: stdu 1, -224(1) -; 64BIT-NEXT: li 3, 2049 -; 64BIT-NEXT: std 0, 240(1) -; 64BIT-NEXT: li 4, 1 +; 64BIT-NEXT: li 3, 0 +; 64BIT-NEXT: li 4, 2049 ; 64BIT-NEXT: xxlxor 0, 0, 0 -; 64BIT-NEXT: rldic 3, 3, 51, 1 -; 64BIT-NEXT: rldic 4, 4, 62, 1 -; 64BIT-NEXT: li 5, 0 +; 64BIT-NEXT: li 5, 128 +; 64BIT-NEXT: std 0, 240(1) +; 64BIT-NEXT: std 3, 192(1) ; 64BIT-NEXT: xxlxor 1, 1, 1 +; 64BIT-NEXT: rldic 3, 4, 51, 1 +; 64BIT-NEXT: li 4, 1 +; 64BIT-NEXT: xxlxor 2, 2, 2 +; 64BIT-NEXT: li 6, 512 ; 64BIT-NEXT: std 3, 216(1) ; 64BIT-NEXT: li 3, 1023 -; 64BIT-NEXT: xxlxor 2, 2, 2 -; 64BIT-NEXT: li 6, 144 +; 64BIT-NEXT: xxlxor 3, 3, 3 ; 64BIT-NEXT: rldic 3, 3, 52, 2 -; 64BIT-NEXT: std 4, 208(1) -; 64BIT-NEXT: li 4, 160 -; 64BIT-NEXT: xxlxor 34, 34, 34 +; 64BIT-NEXT: rldic 4, 4, 62, 1 +; 64BIT-NEXT: xxlxor 4, 4, 4 ; 64BIT-NEXT: std 3, 200(1) ; 64BIT-NEXT: ld 3, L..C0(2) # %const.0 -; 64BIT-NEXT: std 5, 192(1) -; 64BIT-NEXT: xxlxor 35, 35, 35 -; 64BIT-NEXT: xxlxor 36, 36, 36 -; 64BIT-NEXT: stxvw4x 0, 1, 4 -; 64BIT-NEXT: li 4, 80 -; 64BIT-NEXT: xxlxor 37, 37, 37 -; 64BIT-NEXT: stxvw4x 0, 1, 6 -; 64BIT-NEXT: li 6, 512 -; 64BIT-NEXT: lxvd2x 32, 0, 3 -; 64BIT-NEXT: xxlxor 38, 38, 38 -; 64BIT-NEXT: li 3, 128 -; 64BIT-NEXT: xxlxor 39, 39, 39 -; 64BIT-NEXT: stxvw4x 0, 1, 3 -; 64BIT-NEXT: xxlxor 40, 40, 40 -; 64BIT-NEXT: xxlxor 41, 41, 41 -; 64BIT-NEXT: stxvd2x 32, 1, 4 -; 64BIT-NEXT: li 4, 256 -; 64BIT-NEXT: std 6, 184(1) -; 64BIT-NEXT: xxlxor 42, 42, 42 -; 64BIT-NEXT: std 5, 176(1) -; 64BIT-NEXT: xxlxor 43, 43, 43 -; 64BIT-NEXT: xxlxor 44, 44, 44 -; 64BIT-NEXT: xxlxor 45, 45, 45 -; 64BIT-NEXT: xxlxor 3, 3, 3 -; 64BIT-NEXT: xxlxor 4, 4, 4 +; 64BIT-NEXT: std 4, 208(1) ; 64BIT-NEXT: xxlxor 5, 5, 5 +; 64BIT-NEXT: li 4, 160 ; 64BIT-NEXT: xxlxor 6, 6, 6 +; 64BIT-NEXT: stxvw4x 0, 1, 4 +; 64BIT-NEXT: li 4, 80 ; 64BIT-NEXT: xxlxor 7, 7, 7 +; 64BIT-NEXT: lxvd2x 34, 0, 3 +; 64BIT-NEXT: li 3, 144 ; 64BIT-NEXT: xxlxor 8, 8, 8 ; 64BIT-NEXT: xxlxor 9, 9, 9 +; 64BIT-NEXT: stxvw4x 0, 1, 3 +; 64BIT-NEXT: li 3, 128 ; 64BIT-NEXT: xxlxor 10, 10, 10 +; 64BIT-NEXT: stxvw4x 0, 1, 5 ; 64BIT-NEXT: xxlxor 11, 11, 11 +; 64BIT-NEXT: stxvd2x 34, 1, 4 +; 64BIT-NEXT: li 4, 256 +; 64BIT-NEXT: std 6, 184(1) ; 64BIT-NEXT: xxlxor 12, 12, 12 +; 64BIT-NEXT: stfd 0, 176(1) ; 64BIT-NEXT: xxlxor 13, 13, 13 +; 64BIT-NEXT: xxlor 34, 0, 0 +; 64BIT-NEXT: xxlor 35, 0, 0 +; 64BIT-NEXT: xxlor 36, 0, 0 +; 64BIT-NEXT: xxlor 37, 0, 0 +; 64BIT-NEXT: xxlor 38, 0, 0 +; 64BIT-NEXT: xxlor 39, 0, 0 +; 64BIT-NEXT: xxlor 40, 0, 0 +; 64BIT-NEXT: xxlor 41, 0, 0 +; 64BIT-NEXT: xxlor 42, 0, 0 +; 64BIT-NEXT: xxlor 43, 0, 0 +; 64BIT-NEXT: xxlor 44, 0, 0 +; 64BIT-NEXT: xxlor 45, 0, 0 ; 64BIT-NEXT: bl .callee[PR] ; 64BIT-NEXT: nop ; 64BIT-NEXT: addi 1, 1, 224 Index: llvm/test/CodeGen/PowerPC/const-splat-array-init.ll =================================================================== --- llvm/test/CodeGen/PowerPC/const-splat-array-init.ll +++ llvm/test/CodeGen/PowerPC/const-splat-array-init.ll @@ -26,17 +26,17 @@ ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C0(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: li 4, 3333 -; P9-BE-NEXT: sth 4, 16(3) +; P9-BE-NEXT: li 4, 16 ; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: stxsihx 0, 3, 4 ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo1: ; P10-BE: # %bb.0: # %entry ; P10-BE-NEXT: xxspltiw 0, 218434821 -; P10-BE-NEXT: li 4, 3333 -; P10-BE-NEXT: sth 4, 16(3) +; P10-BE-NEXT: li 4, 16 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stxsihx 0, 3, 4 ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo1: @@ -54,17 +54,17 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI0_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI0_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: li 4, 3333 -; P9-LE-NEXT: sth 4, 16(3) +; P9-LE-NEXT: li 4, 16 ; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: stxsihx 0, 3, 4 ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo1: ; P10-LE: # %bb.0: # %entry ; P10-LE-NEXT: xxspltiw 0, 218434821 -; P10-LE-NEXT: li 4, 3333 -; P10-LE-NEXT: sth 4, 16(3) +; P10-LE-NEXT: li 4, 16 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stxsihx 0, 3, 4 ; P10-LE-NEXT: blr entry: store <8 x i16> , ptr %a, align 2 @@ -78,28 +78,26 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C1(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 3333 -; P8-BE-NEXT: ori 4, 4, 3333 -; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: li 4, 16 ; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: stfiwx 0, 3, 4 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo2: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C1(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 3333 -; P9-BE-NEXT: ori 4, 4, 3333 -; P9-BE-NEXT: stw 4, 16(3) +; P9-BE-NEXT: li 4, 16 ; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: stfiwx 0, 3, 4 ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo2: ; P10-BE: # %bb.0: # %entry ; P10-BE-NEXT: xxspltiw 0, 218434821 -; P10-BE-NEXT: pli 4, 218434821 -; P10-BE-NEXT: stw 4, 16(3) +; P10-BE-NEXT: li 4, 16 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfiwx 0, 3, 4 ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo2: @@ -107,10 +105,9 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI1_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI1_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 3333 -; P8-LE-NEXT: ori 4, 4, 3333 +; P8-LE-NEXT: li 4, 16 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: stw 4, 16(3) +; P8-LE-NEXT: stfiwx 0, 3, 4 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo2: @@ -118,18 +115,17 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI1_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI1_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 3333 -; P9-LE-NEXT: ori 4, 4, 3333 -; P9-LE-NEXT: stw 4, 16(3) +; P9-LE-NEXT: li 4, 16 ; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: stfiwx 0, 3, 4 ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo2: ; P10-LE: # %bb.0: # %entry ; P10-LE-NEXT: xxspltiw 0, 218434821 -; P10-LE-NEXT: pli 4, 218434821 -; P10-LE-NEXT: stw 4, 16(3) +; P10-LE-NEXT: li 4, 16 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfiwx 0, 3, 4 ; P10-LE-NEXT: blr entry: store <8 x i16> , ptr %a, align 2 @@ -144,35 +140,33 @@ ; P8-BE-LABEL: foo3: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C2(2) # %const.0 +; P8-BE-NEXT: li 5, 3333 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 3333 -; P8-BE-NEXT: ori 4, 4, 3333 -; P8-BE-NEXT: stw 4, 16(3) -; P8-BE-NEXT: li 4, 3333 +; P8-BE-NEXT: li 4, 16 ; P8-BE-NEXT: stxvw4x 0, 0, 3 -; P8-BE-NEXT: sth 4, 20(3) +; P8-BE-NEXT: stfiwx 0, 3, 4 +; P8-BE-NEXT: sth 5, 20(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo3: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C2(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 3333 -; P9-BE-NEXT: ori 4, 4, 3333 -; P9-BE-NEXT: stw 4, 16(3) -; P9-BE-NEXT: li 4, 3333 +; P9-BE-NEXT: li 4, 16 ; P9-BE-NEXT: stxv 0, 0(3) -; P9-BE-NEXT: sth 4, 20(3) +; P9-BE-NEXT: stfiwx 0, 3, 4 +; P9-BE-NEXT: li 4, 20 +; P9-BE-NEXT: stxsihx 0, 3, 4 ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo3: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 218434821 ; P10-BE-NEXT: xxspltiw 0, 218434821 -; P10-BE-NEXT: stw 4, 16(3) -; P10-BE-NEXT: li 4, 3333 +; P10-BE-NEXT: li 4, 16 ; P10-BE-NEXT: stxv 0, 0(3) -; P10-BE-NEXT: sth 4, 20(3) +; P10-BE-NEXT: stfiwx 0, 3, 4 +; P10-BE-NEXT: li 4, 20 +; P10-BE-NEXT: stxsihx 0, 3, 4 ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo3: @@ -181,10 +175,9 @@ ; P8-LE-NEXT: li 5, 3333 ; P8-LE-NEXT: addi 4, 4, .LCPI2_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 3333 -; P8-LE-NEXT: ori 4, 4, 3333 +; P8-LE-NEXT: li 4, 16 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: stw 4, 16(3) +; P8-LE-NEXT: stfiwx 0, 3, 4 ; P8-LE-NEXT: sth 5, 20(3) ; P8-LE-NEXT: blr ; @@ -193,22 +186,21 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI2_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI2_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 3333 -; P9-LE-NEXT: ori 4, 4, 3333 -; P9-LE-NEXT: stw 4, 16(3) -; P9-LE-NEXT: li 4, 3333 +; P9-LE-NEXT: li 4, 16 ; P9-LE-NEXT: stxv 0, 0(3) -; P9-LE-NEXT: sth 4, 20(3) +; P9-LE-NEXT: stfiwx 0, 3, 4 +; P9-LE-NEXT: li 4, 20 +; P9-LE-NEXT: stxsihx 0, 3, 4 ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo3: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 218434821 ; P10-LE-NEXT: xxspltiw 0, 218434821 -; P10-LE-NEXT: stw 4, 16(3) -; P10-LE-NEXT: li 4, 3333 +; P10-LE-NEXT: li 4, 16 ; P10-LE-NEXT: stxv 0, 0(3) -; P10-LE-NEXT: sth 4, 20(3) +; P10-LE-NEXT: stfiwx 0, 3, 4 +; P10-LE-NEXT: li 4, 20 +; P10-LE-NEXT: stxsihx 0, 3, 4 ; P10-LE-NEXT: blr entry: store <8 x i16> , ptr %a, align 2 @@ -226,31 +218,23 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C3(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 3333 -; P8-BE-NEXT: ori 4, 4, 3333 -; P8-BE-NEXT: rldimi 4, 4, 32, 0 ; P8-BE-NEXT: stxvw4x 0, 0, 3 -; P8-BE-NEXT: std 4, 16(3) +; P8-BE-NEXT: stfd 0, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo4: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C3(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 3333 -; P9-BE-NEXT: ori 4, 4, 3333 -; P9-BE-NEXT: rldimi 4, 4, 32, 0 ; P9-BE-NEXT: stxv 0, 0(3) -; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo4: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 218434821 ; P10-BE-NEXT: xxspltiw 0, 218434821 -; P10-BE-NEXT: rldimi 4, 4, 32, 0 ; P10-BE-NEXT: stxv 0, 0(3) -; P10-BE-NEXT: std 4, 16(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo4: @@ -258,11 +242,8 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI3_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI3_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 3333 -; P8-LE-NEXT: ori 4, 4, 3333 -; P8-LE-NEXT: rldimi 4, 4, 32, 0 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: stfd 0, 16(3) ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo4: @@ -270,20 +251,15 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI3_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI3_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 3333 -; P9-LE-NEXT: ori 4, 4, 3333 -; P9-LE-NEXT: rldimi 4, 4, 32, 0 ; P9-LE-NEXT: stxv 0, 0(3) -; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo4: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 218434821 ; P10-LE-NEXT: xxspltiw 0, 218434821 -; P10-LE-NEXT: rldimi 4, 4, 32, 0 ; P10-LE-NEXT: stxv 0, 0(3) -; P10-LE-NEXT: std 4, 16(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: store <8 x i16> , ptr %a, align 2 @@ -303,28 +279,26 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C4(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 5 -; P8-BE-NEXT: ori 4, 4, 5653 -; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: li 4, 16 ; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: stfiwx 0, 3, 4 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo5: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C4(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 5 -; P9-BE-NEXT: ori 4, 4, 5653 -; P9-BE-NEXT: stw 4, 16(3) +; P9-BE-NEXT: li 4, 16 ; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: stfiwx 0, 3, 4 ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo5: ; P10-BE: # %bb.0: # %entry ; P10-BE-NEXT: xxspltiw 0, 333333 -; P10-BE-NEXT: pli 4, 333333 -; P10-BE-NEXT: stw 4, 16(3) +; P10-BE-NEXT: li 4, 16 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfiwx 0, 3, 4 ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo5: @@ -332,10 +306,9 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI4_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI4_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 5 -; P8-LE-NEXT: ori 4, 4, 5653 +; P8-LE-NEXT: li 4, 16 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: stw 4, 16(3) +; P8-LE-NEXT: stfiwx 0, 3, 4 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo5: @@ -343,18 +316,17 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI4_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI4_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 5 -; P9-LE-NEXT: ori 4, 4, 5653 -; P9-LE-NEXT: stw 4, 16(3) +; P9-LE-NEXT: li 4, 16 ; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: stfiwx 0, 3, 4 ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo5: ; P10-LE: # %bb.0: # %entry ; P10-LE-NEXT: xxspltiw 0, 333333 -; P10-LE-NEXT: pli 4, 333333 -; P10-LE-NEXT: stw 4, 16(3) +; P10-LE-NEXT: li 4, 16 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfiwx 0, 3, 4 ; P10-LE-NEXT: blr entry: store <4 x i32> , ptr %a, align 4 @@ -368,31 +340,23 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C5(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 5 -; P8-BE-NEXT: ori 4, 4, 5653 -; P8-BE-NEXT: rldimi 4, 4, 32, 0 ; P8-BE-NEXT: stxvw4x 0, 0, 3 -; P8-BE-NEXT: std 4, 16(3) +; P8-BE-NEXT: stfd 0, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo6: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C5(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 5 -; P9-BE-NEXT: ori 4, 4, 5653 -; P9-BE-NEXT: rldimi 4, 4, 32, 0 ; P9-BE-NEXT: stxv 0, 0(3) -; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo6: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 333333 ; P10-BE-NEXT: xxspltiw 0, 333333 -; P10-BE-NEXT: rldimi 4, 4, 32, 0 ; P10-BE-NEXT: stxv 0, 0(3) -; P10-BE-NEXT: std 4, 16(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo6: @@ -400,11 +364,8 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI5_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI5_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 5 -; P8-LE-NEXT: ori 4, 4, 5653 -; P8-LE-NEXT: rldimi 4, 4, 32, 0 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: stfd 0, 16(3) ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo6: @@ -412,20 +373,15 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI5_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI5_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 5 -; P9-LE-NEXT: ori 4, 4, 5653 -; P9-LE-NEXT: rldimi 4, 4, 32, 0 ; P9-LE-NEXT: stxv 0, 0(3) -; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo6: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 333333 ; P10-LE-NEXT: xxspltiw 0, 333333 -; P10-LE-NEXT: rldimi 4, 4, 32, 0 ; P10-LE-NEXT: stxv 0, 0(3) -; P10-LE-NEXT: std 4, 16(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: store <4 x i32> , ptr %a, align 4 @@ -441,29 +397,24 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C6(2) # %const.0 ; P8-BE-NEXT: lxvd2x 0, 0, 4 -; P8-BE-NEXT: lis 4, 508 -; P8-BE-NEXT: ori 4, 4, 41045 -; P8-BE-NEXT: std 4, 16(3) ; P8-BE-NEXT: stxvd2x 0, 0, 3 +; P8-BE-NEXT: stfd 0, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo7: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C6(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 508 -; P9-BE-NEXT: ori 4, 4, 41045 -; P9-BE-NEXT: std 4, 16(3) ; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo7: ; P10-BE: # %bb.0: # %entry ; P10-BE-NEXT: xxlxor 0, 0, 0 -; P10-BE-NEXT: pli 4, 33333333 ; P10-BE-NEXT: xxsplti32dx 0, 1, 33333333 -; P10-BE-NEXT: std 4, 16(3) ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo7: @@ -471,10 +422,8 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI6_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI6_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 508 -; P8-LE-NEXT: ori 4, 4, 41045 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: stfd 0, 16(3) ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo7: @@ -482,19 +431,16 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI6_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI6_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 508 -; P9-LE-NEXT: ori 4, 4, 41045 -; P9-LE-NEXT: std 4, 16(3) ; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo7: ; P10-LE: # %bb.0: # %entry ; P10-LE-NEXT: xxlxor 0, 0, 0 -; P10-LE-NEXT: pli 4, 33333333 ; P10-LE-NEXT: xxsplti32dx 0, 1, 33333333 -; P10-LE-NEXT: std 4, 16(3) ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: store <2 x i64> , ptr %a, align 8 @@ -508,28 +454,26 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C7(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 16469 -; P8-BE-NEXT: ori 4, 4, 7864 -; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: li 4, 16 ; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: stfiwx 0, 3, 4 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo8: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C7(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 16469 -; P9-BE-NEXT: ori 4, 4, 7864 -; P9-BE-NEXT: stw 4, 16(3) +; P9-BE-NEXT: li 4, 16 ; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: stfiwx 0, 3, 4 ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo8: ; P10-BE: # %bb.0: # %entry ; P10-BE-NEXT: xxspltiw 0, 1079320248 -; P10-BE-NEXT: pli 4, 1079320248 -; P10-BE-NEXT: stw 4, 16(3) +; P10-BE-NEXT: li 4, 16 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfiwx 0, 3, 4 ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo8: @@ -537,10 +481,9 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI7_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI7_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 16469 -; P8-LE-NEXT: ori 4, 4, 7864 +; P8-LE-NEXT: li 4, 16 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: stw 4, 16(3) +; P8-LE-NEXT: stfiwx 0, 3, 4 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo8: @@ -548,18 +491,17 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI7_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI7_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 16469 -; P9-LE-NEXT: ori 4, 4, 7864 -; P9-LE-NEXT: stw 4, 16(3) +; P9-LE-NEXT: li 4, 16 ; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: stfiwx 0, 3, 4 ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo8: ; P10-LE: # %bb.0: # %entry ; P10-LE-NEXT: xxspltiw 0, 1079320248 -; P10-LE-NEXT: pli 4, 1079320248 -; P10-LE-NEXT: stw 4, 16(3) +; P10-LE-NEXT: li 4, 16 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfiwx 0, 3, 4 ; P10-LE-NEXT: blr entry: store <4 x float> , ptr %a, align 4 @@ -572,52 +514,34 @@ ; P8-BE-LABEL: foo9: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C8(2) # %const.0 -; P8-BE-NEXT: lis 5, 16394 -; P8-BE-NEXT: ori 5, 5, 41943 ; P8-BE-NEXT: lxvd2x 0, 0, 4 -; P8-BE-NEXT: rldic 4, 5, 32, 1 -; P8-BE-NEXT: oris 4, 4, 2621 -; P8-BE-NEXT: ori 4, 4, 28836 ; P8-BE-NEXT: stxvd2x 0, 0, 3 -; P8-BE-NEXT: std 4, 16(3) +; P8-BE-NEXT: stfd 0, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo9: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C8(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 16394 -; P9-BE-NEXT: ori 4, 4, 41943 -; P9-BE-NEXT: rldic 4, 4, 32, 1 ; P9-BE-NEXT: stxv 0, 0(3) -; P9-BE-NEXT: oris 4, 4, 2621 -; P9-BE-NEXT: ori 4, 4, 28836 -; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo9: ; P10-BE: # %bb.0: # %entry ; P10-BE-NEXT: xxsplti32dx 0, 0, 1074439127 -; P10-BE-NEXT: pli 4, 1074439127 -; P10-BE-NEXT: pli 5, 171798692 -; P10-BE-NEXT: rldimi 5, 4, 32, 0 ; P10-BE-NEXT: xxsplti32dx 0, 1, 171798692 -; P10-BE-NEXT: std 5, 16(3) ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo9: ; P8-LE: # %bb.0: # %entry ; P8-LE-NEXT: addis 4, 2, .LCPI8_0@toc@ha -; P8-LE-NEXT: lis 5, 16394 ; P8-LE-NEXT: addi 4, 4, .LCPI8_0@toc@l -; P8-LE-NEXT: ori 5, 5, 41943 ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: rldic 4, 5, 32, 1 -; P8-LE-NEXT: oris 4, 4, 2621 -; P8-LE-NEXT: ori 4, 4, 28836 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: stfd 0, 16(3) ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo9: @@ -625,24 +549,16 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI8_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI8_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 16394 -; P9-LE-NEXT: ori 4, 4, 41943 -; P9-LE-NEXT: rldic 4, 4, 32, 1 ; P9-LE-NEXT: stxv 0, 0(3) -; P9-LE-NEXT: oris 4, 4, 2621 -; P9-LE-NEXT: ori 4, 4, 28836 -; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo9: ; P10-LE: # %bb.0: # %entry ; P10-LE-NEXT: xxsplti32dx 0, 0, 1074439127 -; P10-LE-NEXT: pli 4, 1074439127 -; P10-LE-NEXT: pli 5, 171798692 -; P10-LE-NEXT: rldimi 5, 4, 32, 0 ; P10-LE-NEXT: xxsplti32dx 0, 1, 171798692 -; P10-LE-NEXT: std 5, 16(3) ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: store <2 x double> , ptr %a, align 8 @@ -656,31 +572,23 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C9(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, 16469 -; P8-BE-NEXT: ori 4, 4, 7864 -; P8-BE-NEXT: rldimi 4, 4, 32, 0 ; P8-BE-NEXT: stxvw4x 0, 0, 3 -; P8-BE-NEXT: std 4, 16(3) +; P8-BE-NEXT: stfd 0, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: foo10: ; P9-BE: # %bb.0: # %entry ; P9-BE-NEXT: ld 4, L..C9(2) # %const.0 ; P9-BE-NEXT: lxv 0, 0(4) -; P9-BE-NEXT: lis 4, 16469 -; P9-BE-NEXT: ori 4, 4, 7864 -; P9-BE-NEXT: rldimi 4, 4, 32, 0 ; P9-BE-NEXT: stxv 0, 0(3) -; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: foo10: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 1079320248 ; P10-BE-NEXT: xxspltiw 0, 1079320248 -; P10-BE-NEXT: rldimi 4, 4, 32, 0 ; P10-BE-NEXT: stxv 0, 0(3) -; P10-BE-NEXT: std 4, 16(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: foo10: @@ -688,11 +596,8 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI9_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI9_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, 16469 -; P8-LE-NEXT: ori 4, 4, 7864 -; P8-LE-NEXT: rldimi 4, 4, 32, 0 ; P8-LE-NEXT: stxvd2x 0, 0, 3 -; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: stfd 0, 16(3) ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: foo10: @@ -700,20 +605,15 @@ ; P9-LE-NEXT: addis 4, 2, .LCPI9_0@toc@ha ; P9-LE-NEXT: addi 4, 4, .LCPI9_0@toc@l ; P9-LE-NEXT: lxv 0, 0(4) -; P9-LE-NEXT: lis 4, 16469 -; P9-LE-NEXT: ori 4, 4, 7864 -; P9-LE-NEXT: rldimi 4, 4, 32, 0 ; P9-LE-NEXT: stxv 0, 0(3) -; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: foo10: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 1079320248 ; P10-LE-NEXT: xxspltiw 0, 1079320248 -; P10-LE-NEXT: rldimi 4, 4, 32, 0 ; P10-LE-NEXT: stxv 0, 0(3) -; P10-LE-NEXT: std 4, 16(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: store <4 x float> , ptr %a, align 4 Index: llvm/test/CodeGen/PowerPC/memset-tail.ll =================================================================== --- llvm/test/CodeGen/PowerPC/memset-tail.ll +++ llvm/test/CodeGen/PowerPC/memset-tail.ll @@ -169,59 +169,46 @@ ; P8-BE-LABEL: memsetTailV1B8: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: vspltisb 2, 15 -; P8-BE-NEXT: lis 4, 3855 -; P8-BE-NEXT: ori 4, 4, 3855 -; P8-BE-NEXT: rldimi 4, 4, 32, 0 +; P8-BE-NEXT: li 4, 16 +; P8-BE-NEXT: stxsdx 34, 3, 4 ; P8-BE-NEXT: stxvw4x 34, 0, 3 -; P8-BE-NEXT: std 4, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memsetTailV1B8: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, 3855 ; P9-BE-NEXT: xxspltib 0, 15 -; P9-BE-NEXT: ori 4, 4, 3855 ; P9-BE-NEXT: stxv 0, 0(3) -; P9-BE-NEXT: rldimi 4, 4, 32, 0 -; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memsetTailV1B8: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 252645135 -; P10-BE-NEXT: rldimi 4, 4, 32, 0 -; P10-BE-NEXT: std 4, 16(3) ; P10-BE-NEXT: xxspltib 0, 15 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: memsetTailV1B8: ; P8-LE: # %bb.0: # %entry -; P8-LE-NEXT: lis 4, 3855 ; P8-LE-NEXT: vspltisb 2, 15 -; P8-LE-NEXT: ori 4, 4, 3855 -; P8-LE-NEXT: rldimi 4, 4, 32, 0 -; P8-LE-NEXT: std 4, 16(3) -; P8-LE-NEXT: stxvd2x 34, 0, 3 +; P8-LE-NEXT: li 4, 16 +; P8-LE-NEXT: xxswapd 0, 34 +; P8-LE-NEXT: stxsdx 34, 3, 4 +; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memsetTailV1B8: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, 3855 ; P9-LE-NEXT: xxspltib 0, 15 -; P9-LE-NEXT: ori 4, 4, 3855 ; P9-LE-NEXT: stxv 0, 0(3) -; P9-LE-NEXT: rldimi 4, 4, 32, 0 -; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memsetTailV1B8: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 252645135 -; P10-LE-NEXT: rldimi 4, 4, 32, 0 -; P10-LE-NEXT: std 4, 16(3) ; P10-LE-NEXT: xxspltib 0, 15 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: tail call void @llvm.memset.p0.i64(ptr %p, i8 15, i64 24, i1 false) @@ -231,64 +218,47 @@ define dso_local void @memsetTailV1B7(ptr nocapture noundef writeonly %p) local_unnamed_addr { ; P8-BE-LABEL: memsetTailV1B7: ; P8-BE: # %bb.0: # %entry -; P8-BE-NEXT: lis 4, 3855 ; P8-BE-NEXT: vspltisb 2, 15 -; P8-BE-NEXT: li 5, 15 -; P8-BE-NEXT: ori 4, 4, 3855 -; P8-BE-NEXT: rldimi 4, 4, 32, 0 -; P8-BE-NEXT: stdx 4, 3, 5 +; P8-BE-NEXT: li 4, 15 +; P8-BE-NEXT: stxsdx 34, 3, 4 ; P8-BE-NEXT: stxvw4x 34, 0, 3 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memsetTailV1B7: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, 3855 -; P9-BE-NEXT: li 5, 15 -; P9-BE-NEXT: ori 4, 4, 3855 -; P9-BE-NEXT: rldimi 4, 4, 32, 0 -; P9-BE-NEXT: stdx 4, 3, 5 ; P9-BE-NEXT: xxspltib 0, 15 +; P9-BE-NEXT: stfd 0, 15(3) ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memsetTailV1B7: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 252645135 -; P10-BE-NEXT: rldimi 4, 4, 32, 0 -; P10-BE-NEXT: pstd 4, 15(3), 0 ; P10-BE-NEXT: xxspltib 0, 15 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: pstfd 0, 15(3), 0 ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: memsetTailV1B7: ; P8-LE: # %bb.0: # %entry -; P8-LE-NEXT: lis 4, 3855 ; P8-LE-NEXT: vspltisb 2, 15 -; P8-LE-NEXT: li 5, 15 -; P8-LE-NEXT: ori 4, 4, 3855 -; P8-LE-NEXT: rldimi 4, 4, 32, 0 -; P8-LE-NEXT: stdx 4, 3, 5 -; P8-LE-NEXT: stxvd2x 34, 0, 3 +; P8-LE-NEXT: li 4, 15 +; P8-LE-NEXT: xxswapd 0, 34 +; P8-LE-NEXT: stxsdx 34, 3, 4 +; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memsetTailV1B7: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, 3855 -; P9-LE-NEXT: li 5, 15 -; P9-LE-NEXT: ori 4, 4, 3855 -; P9-LE-NEXT: rldimi 4, 4, 32, 0 -; P9-LE-NEXT: stdx 4, 3, 5 ; P9-LE-NEXT: xxspltib 0, 15 +; P9-LE-NEXT: stfd 0, 15(3) ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memsetTailV1B7: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 252645135 -; P10-LE-NEXT: rldimi 4, 4, 32, 0 -; P10-LE-NEXT: pstd 4, 15(3), 0 ; P10-LE-NEXT: xxspltib 0, 15 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: pstfd 0, 15(3), 0 ; P10-LE-NEXT: blr entry: tail call void @llvm.memset.p0.i64(ptr %p, i8 15, i64 23, i1 false) @@ -299,52 +269,49 @@ ; P8-BE-LABEL: memsetTailV1B4: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: vspltisb 2, 15 -; P8-BE-NEXT: lis 4, 3855 -; P8-BE-NEXT: ori 4, 4, 3855 -; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: li 4, 16 +; P8-BE-NEXT: stxsiwx 34, 3, 4 ; P8-BE-NEXT: stxvw4x 34, 0, 3 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memsetTailV1B4: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, 3855 -; P9-BE-NEXT: ori 4, 4, 3855 -; P9-BE-NEXT: stw 4, 16(3) ; P9-BE-NEXT: xxspltib 0, 15 +; P9-BE-NEXT: li 4, 16 +; P9-BE-NEXT: stfiwx 0, 3, 4 ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memsetTailV1B4: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 252645135 -; P10-BE-NEXT: stw 4, 16(3) ; P10-BE-NEXT: xxspltib 0, 15 +; P10-BE-NEXT: li 4, 16 +; P10-BE-NEXT: stfiwx 0, 3, 4 ; P10-BE-NEXT: stxv 0, 0(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: memsetTailV1B4: ; P8-LE: # %bb.0: # %entry ; P8-LE-NEXT: vspltisb 2, 15 -; P8-LE-NEXT: lis 4, 3855 -; P8-LE-NEXT: ori 4, 4, 3855 -; P8-LE-NEXT: stw 4, 16(3) -; P8-LE-NEXT: stxvd2x 34, 0, 3 +; P8-LE-NEXT: li 4, 16 +; P8-LE-NEXT: xxswapd 0, 34 +; P8-LE-NEXT: stxsiwx 34, 3, 4 +; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memsetTailV1B4: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, 3855 -; P9-LE-NEXT: ori 4, 4, 3855 -; P9-LE-NEXT: stw 4, 16(3) ; P9-LE-NEXT: xxspltib 0, 15 +; P9-LE-NEXT: li 4, 16 +; P9-LE-NEXT: stfiwx 0, 3, 4 ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memsetTailV1B4: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 252645135 -; P10-LE-NEXT: stw 4, 16(3) ; P10-LE-NEXT: xxspltib 0, 15 +; P10-LE-NEXT: li 4, 16 +; P10-LE-NEXT: stfiwx 0, 3, 4 ; P10-LE-NEXT: stxv 0, 0(3) ; P10-LE-NEXT: blr entry: @@ -356,52 +323,49 @@ ; P8-BE-LABEL: memsetTailV1B3: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: vspltisb 2, 15 -; P8-BE-NEXT: lis 4, 3855 -; P8-BE-NEXT: ori 4, 4, 3855 +; P8-BE-NEXT: li 4, 15 +; P8-BE-NEXT: stxsiwx 34, 3, 4 ; P8-BE-NEXT: stxvw4x 34, 0, 3 -; P8-BE-NEXT: stw 4, 15(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memsetTailV1B3: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, 3855 -; P9-BE-NEXT: ori 4, 4, 3855 -; P9-BE-NEXT: stw 4, 15(3) ; P9-BE-NEXT: xxspltib 0, 15 +; P9-BE-NEXT: li 4, 15 +; P9-BE-NEXT: stfiwx 0, 3, 4 ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memsetTailV1B3: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 252645135 -; P10-BE-NEXT: stw 4, 15(3) ; P10-BE-NEXT: xxspltib 0, 15 +; P10-BE-NEXT: li 4, 15 +; P10-BE-NEXT: stfiwx 0, 3, 4 ; P10-BE-NEXT: stxv 0, 0(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: memsetTailV1B3: ; P8-LE: # %bb.0: # %entry ; P8-LE-NEXT: vspltisb 2, 15 -; P8-LE-NEXT: lis 4, 3855 -; P8-LE-NEXT: ori 4, 4, 3855 -; P8-LE-NEXT: stw 4, 15(3) -; P8-LE-NEXT: stxvd2x 34, 0, 3 +; P8-LE-NEXT: li 4, 15 +; P8-LE-NEXT: xxswapd 0, 34 +; P8-LE-NEXT: stxsiwx 34, 3, 4 +; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memsetTailV1B3: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, 3855 -; P9-LE-NEXT: ori 4, 4, 3855 -; P9-LE-NEXT: stw 4, 15(3) ; P9-LE-NEXT: xxspltib 0, 15 +; P9-LE-NEXT: li 4, 15 +; P9-LE-NEXT: stfiwx 0, 3, 4 ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memsetTailV1B3: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 252645135 -; P10-LE-NEXT: stw 4, 15(3) ; P10-LE-NEXT: xxspltib 0, 15 +; P10-LE-NEXT: li 4, 15 +; P10-LE-NEXT: stfiwx 0, 3, 4 ; P10-LE-NEXT: stxv 0, 0(3) ; P10-LE-NEXT: blr entry: @@ -420,17 +384,17 @@ ; ; P9-BE-LABEL: memsetTailV1B2: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: li 4, 3855 -; P9-BE-NEXT: sth 4, 16(3) ; P9-BE-NEXT: xxspltib 0, 15 +; P9-BE-NEXT: li 4, 16 +; P9-BE-NEXT: stxsihx 0, 3, 4 ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memsetTailV1B2: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: li 4, 3855 -; P10-BE-NEXT: sth 4, 16(3) ; P10-BE-NEXT: xxspltib 0, 15 +; P10-BE-NEXT: li 4, 16 +; P10-BE-NEXT: stxsihx 0, 3, 4 ; P10-BE-NEXT: stxv 0, 0(3) ; P10-BE-NEXT: blr ; @@ -444,17 +408,17 @@ ; ; P9-LE-LABEL: memsetTailV1B2: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: li 4, 3855 -; P9-LE-NEXT: sth 4, 16(3) ; P9-LE-NEXT: xxspltib 0, 15 +; P9-LE-NEXT: li 4, 16 +; P9-LE-NEXT: stxsihx 0, 3, 4 ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memsetTailV1B2: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: li 4, 3855 -; P10-LE-NEXT: sth 4, 16(3) ; P10-LE-NEXT: xxspltib 0, 15 +; P10-LE-NEXT: li 4, 16 +; P10-LE-NEXT: stxsihx 0, 3, 4 ; P10-LE-NEXT: stxv 0, 0(3) ; P10-LE-NEXT: blr entry: @@ -473,17 +437,17 @@ ; ; P9-BE-LABEL: memsetTailV1B1: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: li 4, 15 -; P9-BE-NEXT: stb 4, 16(3) ; P9-BE-NEXT: xxspltib 0, 15 +; P9-BE-NEXT: li 4, 16 +; P9-BE-NEXT: stxsibx 0, 3, 4 ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memsetTailV1B1: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: li 4, 15 -; P10-BE-NEXT: stb 4, 16(3) ; P10-BE-NEXT: xxspltib 0, 15 +; P10-BE-NEXT: li 4, 16 +; P10-BE-NEXT: stxsibx 0, 3, 4 ; P10-BE-NEXT: stxv 0, 0(3) ; P10-BE-NEXT: blr ; @@ -497,17 +461,17 @@ ; ; P9-LE-LABEL: memsetTailV1B1: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: li 4, 15 -; P9-LE-NEXT: stb 4, 16(3) ; P9-LE-NEXT: xxspltib 0, 15 +; P9-LE-NEXT: li 4, 16 +; P9-LE-NEXT: stxsibx 0, 3, 4 ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memsetTailV1B1: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: li 4, 15 -; P10-LE-NEXT: stb 4, 16(3) ; P10-LE-NEXT: xxspltib 0, 15 +; P10-LE-NEXT: li 4, 16 +; P10-LE-NEXT: stxsibx 0, 3, 4 ; P10-LE-NEXT: stxv 0, 0(3) ; P10-LE-NEXT: blr entry: @@ -682,30 +646,22 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C3(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, -23131 -; P8-BE-NEXT: ori 4, 4, 42405 -; P8-BE-NEXT: rldimi 4, 4, 32, 0 +; P8-BE-NEXT: stfd 0, 16(3) ; P8-BE-NEXT: stxvw4x 0, 0, 3 -; P8-BE-NEXT: std 4, 16(3) ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memset2TailV1B8: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, -23131 ; P9-BE-NEXT: xxspltib 0, 165 -; P9-BE-NEXT: ori 4, 4, 42405 ; P9-BE-NEXT: stxv 0, 0(3) -; P9-BE-NEXT: rldimi 4, 4, 32, 0 -; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: stfd 0, 16(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memset2TailV1B8: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 2779096485 -; P10-BE-NEXT: rldimi 4, 4, 32, 0 -; P10-BE-NEXT: std 4, 16(3) ; P10-BE-NEXT: xxspltib 0, 165 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: stfd 0, 16(3) ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: memset2TailV1B8: @@ -713,30 +669,22 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI12_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI12_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, -23131 -; P8-LE-NEXT: ori 4, 4, 42405 -; P8-LE-NEXT: rldimi 4, 4, 32, 0 -; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: stfd 0, 16(3) ; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memset2TailV1B8: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, -23131 ; P9-LE-NEXT: xxspltib 0, 165 -; P9-LE-NEXT: ori 4, 4, 42405 ; P9-LE-NEXT: stxv 0, 0(3) -; P9-LE-NEXT: rldimi 4, 4, 32, 0 -; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: stfd 0, 16(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memset2TailV1B8: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 2779096485 -; P10-LE-NEXT: rldimi 4, 4, 32, 0 -; P10-LE-NEXT: std 4, 16(3) ; P10-LE-NEXT: xxspltib 0, 165 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: stfd 0, 16(3) ; P10-LE-NEXT: blr entry: tail call void @llvm.memset.p0.i64(ptr %p, i8 165, i64 24, i1 false) @@ -747,66 +695,46 @@ ; P8-BE-LABEL: memset2TailV1B7: ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C4(2) # %const.0 -; P8-BE-NEXT: lis 5, -23131 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: ori 4, 5, 42405 -; P8-BE-NEXT: li 5, 15 -; P8-BE-NEXT: rldimi 4, 4, 32, 0 -; P8-BE-NEXT: stdx 4, 3, 5 +; P8-BE-NEXT: stfd 0, 15(3) ; P8-BE-NEXT: stxvw4x 0, 0, 3 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memset2TailV1B7: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, -23131 -; P9-BE-NEXT: li 5, 15 -; P9-BE-NEXT: ori 4, 4, 42405 -; P9-BE-NEXT: rldimi 4, 4, 32, 0 -; P9-BE-NEXT: stdx 4, 3, 5 ; P9-BE-NEXT: xxspltib 0, 165 +; P9-BE-NEXT: stfd 0, 15(3) ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memset2TailV1B7: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, 2779096485 -; P10-BE-NEXT: rldimi 4, 4, 32, 0 -; P10-BE-NEXT: pstd 4, 15(3), 0 ; P10-BE-NEXT: xxspltib 0, 165 ; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: pstfd 0, 15(3), 0 ; P10-BE-NEXT: blr ; ; P8-LE-LABEL: memset2TailV1B7: ; P8-LE: # %bb.0: # %entry ; P8-LE-NEXT: addis 4, 2, .LCPI13_0@toc@ha -; P8-LE-NEXT: lis 5, -23131 ; P8-LE-NEXT: addi 4, 4, .LCPI13_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: ori 4, 5, 42405 -; P8-LE-NEXT: li 5, 15 -; P8-LE-NEXT: rldimi 4, 4, 32, 0 -; P8-LE-NEXT: stdx 4, 3, 5 +; P8-LE-NEXT: stfd 0, 15(3) ; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memset2TailV1B7: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, -23131 -; P9-LE-NEXT: li 5, 15 -; P9-LE-NEXT: ori 4, 4, 42405 -; P9-LE-NEXT: rldimi 4, 4, 32, 0 -; P9-LE-NEXT: stdx 4, 3, 5 ; P9-LE-NEXT: xxspltib 0, 165 +; P9-LE-NEXT: stfd 0, 15(3) ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memset2TailV1B7: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, 2779096485 -; P10-LE-NEXT: rldimi 4, 4, 32, 0 -; P10-LE-NEXT: pstd 4, 15(3), 0 ; P10-LE-NEXT: xxspltib 0, 165 ; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: pstfd 0, 15(3), 0 ; P10-LE-NEXT: blr entry: tail call void @llvm.memset.p0.i64(ptr %p, i8 165, i64 23, i1 false) @@ -818,26 +746,24 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C5(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, -23131 -; P8-BE-NEXT: ori 4, 4, 42405 -; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: li 4, 16 +; P8-BE-NEXT: stfiwx 0, 3, 4 ; P8-BE-NEXT: stxvw4x 0, 0, 3 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memset2TailV1B4: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, -23131 -; P9-BE-NEXT: ori 4, 4, 42405 -; P9-BE-NEXT: stw 4, 16(3) ; P9-BE-NEXT: xxspltib 0, 165 +; P9-BE-NEXT: li 4, 16 +; P9-BE-NEXT: stfiwx 0, 3, 4 ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memset2TailV1B4: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, -1515870811 -; P10-BE-NEXT: stw 4, 16(3) ; P10-BE-NEXT: xxspltib 0, 165 +; P10-BE-NEXT: li 4, 16 +; P10-BE-NEXT: stfiwx 0, 3, 4 ; P10-BE-NEXT: stxv 0, 0(3) ; P10-BE-NEXT: blr ; @@ -846,26 +772,24 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI14_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI14_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, -23131 -; P8-LE-NEXT: ori 4, 4, 42405 -; P8-LE-NEXT: stw 4, 16(3) +; P8-LE-NEXT: li 4, 16 +; P8-LE-NEXT: stfiwx 0, 3, 4 ; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memset2TailV1B4: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, -23131 -; P9-LE-NEXT: ori 4, 4, 42405 -; P9-LE-NEXT: stw 4, 16(3) ; P9-LE-NEXT: xxspltib 0, 165 +; P9-LE-NEXT: li 4, 16 +; P9-LE-NEXT: stfiwx 0, 3, 4 ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memset2TailV1B4: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, -1515870811 -; P10-LE-NEXT: stw 4, 16(3) ; P10-LE-NEXT: xxspltib 0, 165 +; P10-LE-NEXT: li 4, 16 +; P10-LE-NEXT: stfiwx 0, 3, 4 ; P10-LE-NEXT: stxv 0, 0(3) ; P10-LE-NEXT: blr entry: @@ -878,26 +802,24 @@ ; P8-BE: # %bb.0: # %entry ; P8-BE-NEXT: ld 4, L..C6(2) # %const.0 ; P8-BE-NEXT: lxvw4x 0, 0, 4 -; P8-BE-NEXT: lis 4, -23131 -; P8-BE-NEXT: ori 4, 4, 42405 -; P8-BE-NEXT: stw 4, 15(3) +; P8-BE-NEXT: li 4, 15 +; P8-BE-NEXT: stfiwx 0, 3, 4 ; P8-BE-NEXT: stxvw4x 0, 0, 3 ; P8-BE-NEXT: blr ; ; P9-BE-LABEL: memset2TailV1B3: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: lis 4, -23131 -; P9-BE-NEXT: ori 4, 4, 42405 -; P9-BE-NEXT: stw 4, 15(3) ; P9-BE-NEXT: xxspltib 0, 165 +; P9-BE-NEXT: li 4, 15 +; P9-BE-NEXT: stfiwx 0, 3, 4 ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memset2TailV1B3: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: pli 4, -1515870811 -; P10-BE-NEXT: stw 4, 15(3) ; P10-BE-NEXT: xxspltib 0, 165 +; P10-BE-NEXT: li 4, 15 +; P10-BE-NEXT: stfiwx 0, 3, 4 ; P10-BE-NEXT: stxv 0, 0(3) ; P10-BE-NEXT: blr ; @@ -906,26 +828,24 @@ ; P8-LE-NEXT: addis 4, 2, .LCPI15_0@toc@ha ; P8-LE-NEXT: addi 4, 4, .LCPI15_0@toc@l ; P8-LE-NEXT: lxvd2x 0, 0, 4 -; P8-LE-NEXT: lis 4, -23131 -; P8-LE-NEXT: ori 4, 4, 42405 -; P8-LE-NEXT: stw 4, 15(3) +; P8-LE-NEXT: li 4, 15 +; P8-LE-NEXT: stfiwx 0, 3, 4 ; P8-LE-NEXT: stxvd2x 0, 0, 3 ; P8-LE-NEXT: blr ; ; P9-LE-LABEL: memset2TailV1B3: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: lis 4, -23131 -; P9-LE-NEXT: ori 4, 4, 42405 -; P9-LE-NEXT: stw 4, 15(3) ; P9-LE-NEXT: xxspltib 0, 165 +; P9-LE-NEXT: li 4, 15 +; P9-LE-NEXT: stfiwx 0, 3, 4 ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memset2TailV1B3: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: pli 4, -1515870811 -; P10-LE-NEXT: stw 4, 15(3) ; P10-LE-NEXT: xxspltib 0, 165 +; P10-LE-NEXT: li 4, 15 +; P10-LE-NEXT: stfiwx 0, 3, 4 ; P10-LE-NEXT: stxv 0, 0(3) ; P10-LE-NEXT: blr entry: @@ -945,17 +865,17 @@ ; ; P9-BE-LABEL: memset2TailV1B2: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: li 4, -23131 -; P9-BE-NEXT: sth 4, 16(3) ; P9-BE-NEXT: xxspltib 0, 165 +; P9-BE-NEXT: li 4, 16 +; P9-BE-NEXT: stxsihx 0, 3, 4 ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memset2TailV1B2: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: li 4, -23131 -; P10-BE-NEXT: sth 4, 16(3) ; P10-BE-NEXT: xxspltib 0, 165 +; P10-BE-NEXT: li 4, 16 +; P10-BE-NEXT: stxsihx 0, 3, 4 ; P10-BE-NEXT: stxv 0, 0(3) ; P10-BE-NEXT: blr ; @@ -971,17 +891,17 @@ ; ; P9-LE-LABEL: memset2TailV1B2: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: li 4, -23131 -; P9-LE-NEXT: sth 4, 16(3) ; P9-LE-NEXT: xxspltib 0, 165 +; P9-LE-NEXT: li 4, 16 +; P9-LE-NEXT: stxsihx 0, 3, 4 ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memset2TailV1B2: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: li 4, -23131 -; P10-LE-NEXT: sth 4, 16(3) ; P10-LE-NEXT: xxspltib 0, 165 +; P10-LE-NEXT: li 4, 16 +; P10-LE-NEXT: stxsihx 0, 3, 4 ; P10-LE-NEXT: stxv 0, 0(3) ; P10-LE-NEXT: blr entry: @@ -1001,17 +921,17 @@ ; ; P9-BE-LABEL: memset2TailV1B1: ; P9-BE: # %bb.0: # %entry -; P9-BE-NEXT: li 4, -91 -; P9-BE-NEXT: stb 4, 16(3) ; P9-BE-NEXT: xxspltib 0, 165 +; P9-BE-NEXT: li 4, 16 +; P9-BE-NEXT: stxsibx 0, 3, 4 ; P9-BE-NEXT: stxv 0, 0(3) ; P9-BE-NEXT: blr ; ; P10-BE-LABEL: memset2TailV1B1: ; P10-BE: # %bb.0: # %entry -; P10-BE-NEXT: li 4, -91 -; P10-BE-NEXT: stb 4, 16(3) ; P10-BE-NEXT: xxspltib 0, 165 +; P10-BE-NEXT: li 4, 16 +; P10-BE-NEXT: stxsibx 0, 3, 4 ; P10-BE-NEXT: stxv 0, 0(3) ; P10-BE-NEXT: blr ; @@ -1027,17 +947,17 @@ ; ; P9-LE-LABEL: memset2TailV1B1: ; P9-LE: # %bb.0: # %entry -; P9-LE-NEXT: li 4, -91 -; P9-LE-NEXT: stb 4, 16(3) ; P9-LE-NEXT: xxspltib 0, 165 +; P9-LE-NEXT: li 4, 16 +; P9-LE-NEXT: stxsibx 0, 3, 4 ; P9-LE-NEXT: stxv 0, 0(3) ; P9-LE-NEXT: blr ; ; P10-LE-LABEL: memset2TailV1B1: ; P10-LE: # %bb.0: # %entry -; P10-LE-NEXT: li 4, -91 -; P10-LE-NEXT: stb 4, 16(3) ; P10-LE-NEXT: xxspltib 0, 165 +; P10-LE-NEXT: li 4, 16 +; P10-LE-NEXT: stxsibx 0, 3, 4 ; P10-LE-NEXT: stxv 0, 0(3) ; P10-LE-NEXT: blr entry: