diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -12649,6 +12649,100 @@ return SDValue(); } +// Convert a build_vector of int-to-fp conversions into an int-to-fp conversion +// of a build_vector of ints. +// FIXME: If the values originally come from up to two vectors of integers, we +// can emit an int-to-fp conversion of a shuffle. +static SDValue combineBVOfFpConversions(SDNode *N, + SelectionDAG &DAG) { + assert(N->getOpcode() == ISD::BUILD_VECTOR && + "Should be called with a BUILD_VECTOR node"); + + for (int i = 0, e = N->getNumOperands(); i < e; i++) + if (N->getOperand(i).getOpcode() != ISD::SINT_TO_FP && + N->getOperand(i).getOpcode() != ISD::UINT_TO_FP) + return SDValue(); + + SDLoc dl(N); + bool Signed = N->getOperand(0).getOpcode() == ISD::SINT_TO_FP; + EVT FinalVecType = N->getValueType(0); + EVT IntermVecType = FinalVecType.changeVectorElementTypeToInteger(); + SmallVector Ops; + auto extendIfNecessary = [&](SDValue Op) { + EVT ScalarVT = IntermVecType.getScalarType(); + unsigned InputWidth = Op.getValueType().getSizeInBits(); + unsigned OutputWidth = ScalarVT.getSizeInBits(); + if (InputWidth < OutputWidth) + return DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, + ScalarVT, Op); + else if (InputWidth > OutputWidth) + return SDValue(); + return Op; + }; + for (int i = 0, e = N->getNumOperands(); i < e; i++) { + SDValue InOp = N->getOperand(i).getOperand(0); + InOp = extendIfNecessary(InOp); + if (!InOp) + return SDValue(); + Ops.push_back(InOp); + } + SDValue BV = DAG.getBuildVector(IntermVecType, dl, Ops); + return DAG.getNode(N->getOperand(0).getOpcode(), dl, FinalVecType, BV); +} + +static SDValue combineBVOfExtractFpConvert(SDNode *N, + SelectionDAG &DAG) { + assert(N->getOpcode() == ISD::BUILD_VECTOR && + "Should be called with a BUILD_VECTOR node"); + if (N->getValueType(0) != MVT::v2f64) + return SDValue(); + + SDValue FirstInput = N->getOperand(0); + // Looking for: + // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1)) + if (FirstInput.getOpcode() != ISD::SINT_TO_FP && + FirstInput.getOpcode() != ISD::UINT_TO_FP) + return SDValue(); + if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP && + N->getOperand(1).getOpcode() != ISD::UINT_TO_FP) + return SDValue(); + if (FirstInput.getOpcode() != N->getOperand(1).getOpcode()) + return SDValue(); + + SDValue Ext1 = FirstInput.getOperand(0); + SDValue Ext2 = N->getOperand(1).getOperand(0); + if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return SDValue(); + + ConstantSDNode *Ext1Op = dyn_cast(Ext1.getOperand(1)); + ConstantSDNode *Ext2Op = dyn_cast(Ext2.getOperand(1)); + if (!Ext1Op || !Ext2Op) + return SDValue(); + if (Ext1.getOperand(0).getValueType() != MVT::v4i32 || + Ext1.getOperand(0) != Ext2.getOperand(0)) + return SDValue(); + + const PPCSubtarget& Subtarget = + static_cast(DAG.getSubtarget()); + int FirstElem = Ext1Op->getZExtValue(); + int SecondElem = Ext2Op->getZExtValue(); + int SubvecIdx; + if (FirstElem == 0 && SecondElem == 1) + SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0; + else if (FirstElem == 2 && SecondElem == 3) + SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1; + else + return SDValue(); + + SDLoc dl(N); + SDValue SrcVec = Ext1.getOperand(0); + auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ? + PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP; + return DAG.getNode(NodeType, dl, MVT::v2f64, + SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl)); +} + /// Reduce the number of loads when building a vector. /// /// Building a vector out of multiple loads can be converted to a load @@ -12910,50 +13004,14 @@ return Reduced; } - - if (N->getValueType(0) != MVT::v2f64) - return SDValue(); - - // Looking for: // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1)) - if (FirstInput.getOpcode() != ISD::SINT_TO_FP && - FirstInput.getOpcode() != ISD::UINT_TO_FP) - return SDValue(); - if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP && - N->getOperand(1).getOpcode() != ISD::UINT_TO_FP) - return SDValue(); - if (FirstInput.getOpcode() != N->getOperand(1).getOpcode()) - return SDValue(); - - SDValue Ext1 = FirstInput.getOperand(0); - SDValue Ext2 = N->getOperand(1).getOperand(0); - if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT || - Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT) - return SDValue(); - - ConstantSDNode *Ext1Op = dyn_cast(Ext1.getOperand(1)); - ConstantSDNode *Ext2Op = dyn_cast(Ext2.getOperand(1)); - if (!Ext1Op || !Ext2Op) - return SDValue(); - if (Ext1.getOperand(0).getValueType() != MVT::v4i32 || - Ext1.getOperand(0) != Ext2.getOperand(0)) - return SDValue(); - - int FirstElem = Ext1Op->getZExtValue(); - int SecondElem = Ext2Op->getZExtValue(); - int SubvecIdx; - if (FirstElem == 0 && SecondElem == 1) - SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0; - else if (FirstElem == 2 && SecondElem == 3) - SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1; - else - return SDValue(); + Reduced = combineBVOfExtractFpConvert(N, DAG); + if (Reduced) + return Reduced; - SDValue SrcVec = Ext1.getOperand(0); - auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ? - PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP; - return DAG.getNode(NodeType, dl, MVT::v2f64, - SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl)); + // Combine (build_vector (sint_to_fp*)) to (sint_to_fp (build_vector)). + Reduced = combineBVOfFpConversions(N, DAG); + return Reduced; } SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N, @@ -12977,18 +13035,24 @@ Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64)) return SDValue(); + // If we are loading a subword value and converting it to FP, we can avoid the + // move and load directly to a VSR. However, that is only profitable if there + // are no other uses of the loaded value. SDValue FirstOperand(Op.getOperand(0)); - bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD && - (FirstOperand.getValueType() == MVT::i8 || - FirstOperand.getValueType() == MVT::i16); - if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) { + LoadSDNode *LdInput = dyn_cast(FirstOperand); + bool SubWordLoad = LdInput && + (LdInput->getMemoryVT() == MVT::i8 || + LdInput->getMemoryVT() == MVT::i16); + if (!DCI.isBeforeLegalize() && Subtarget.hasP9Vector() && + Subtarget.hasP9Altivec() && SubWordLoad && + FirstOperand.hasOneUse()) { bool Signed = N->getOpcode() == ISD::SINT_TO_FP; bool DstDouble = Op.getValueType() == MVT::f64; unsigned ConvOp = Signed ? (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) : (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS); SDValue WidthConst = - DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2, + DAG.getIntPtrConstant(LdInput->getMemoryVT() == MVT::i8 ? 1 : 2, dl, false); LoadSDNode *LDN = cast(FirstOperand.getNode()); SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst }; diff --git a/test/CodeGen/PowerPC/int-to-fp-build-vectors.ll b/test/CodeGen/PowerPC/int-to-fp-build-vectors.ll --- a/test/CodeGen/PowerPC/int-to-fp-build-vectors.ll +++ b/test/CodeGen/PowerPC/int-to-fp-build-vectors.ll @@ -0,0 +1,517 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 < %s | FileCheck -check-prefix=CHECK-PWR9 %s +; Function Attrs: norecurse nounwind readnone +define <2 x double> @test_sc_to_d(<16 x i8> %a) { +; CHECK-LABEL: test_sc_to_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd 0, 34 +; CHECK-NEXT: mfvsrd 3, 34 +; CHECK-NEXT: rldicl 3, 3, 8, 56 +; CHECK-NEXT: mfvsrd 4, 0 +; CHECK-NEXT: extsb 3, 3 +; CHECK-NEXT: mtvsrd 0, 3 +; CHECK-NEXT: rldicl 4, 4, 8, 56 +; CHECK-NEXT: extsb 3, 4 +; CHECK-NEXT: mtvsrd 1, 3 +; CHECK-NEXT: xxmrghd 34, 0, 1 +; CHECK-NEXT: xvcvsxddp 34, 34 +; CHECK-NEXT: blr +; +; CHECK-PWR9-LABEL: test_sc_to_d: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: vsldoi 2, 2, 2, 9 +; CHECK-PWR9-NEXT: vextsb2d 2, 2 +; CHECK-PWR9-NEXT: xvcvsxddp 34, 34 +; CHECK-PWR9-NEXT: blr +entry: + %vecext = extractelement <16 x i8> %a, i32 7 + %conv = sitofp i8 %vecext to double + %vecinit = insertelement <2 x double> undef, double %conv, i32 0 + %vecext1 = extractelement <16 x i8> %a, i32 15 + %conv2 = sitofp i8 %vecext1 to double + %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1 + ret <2 x double> %vecinit3 +} + +; Function Attrs: norecurse nounwind readnone +define <2 x double> @test_uc_to_d(<16 x i8> %a) { +; CHECK-LABEL: test_uc_to_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrd 3, 34 +; CHECK-NEXT: clrldi 4, 3, 56 +; CHECK-NEXT: rldicl 3, 3, 32, 56 +; CHECK-NEXT: clrldi 4, 4, 56 +; CHECK-NEXT: clrldi 3, 3, 56 +; CHECK-NEXT: mtvsrd 0, 4 +; CHECK-NEXT: mtvsrd 1, 3 +; CHECK-NEXT: xxmrghd 34, 1, 0 +; CHECK-NEXT: xvcvuxddp 34, 34 +; CHECK-NEXT: blr +; +; CHECK-PWR9-LABEL: test_uc_to_d: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: li 3, 8 +; CHECK-PWR9-NEXT: li 4, 12 +; CHECK-PWR9-NEXT: vextubrx 3, 3, 2 +; CHECK-PWR9-NEXT: vextubrx 4, 4, 2 +; CHECK-PWR9-NEXT: clrldi 3, 3, 56 +; CHECK-PWR9-NEXT: clrldi 4, 4, 56 +; CHECK-PWR9-NEXT: mtvsrdd 34, 4, 3 +; CHECK-PWR9-NEXT: xvcvuxddp 34, 34 +; CHECK-PWR9-NEXT: blr +entry: + %vecext = extractelement <16 x i8> %a, i32 8 + %conv = uitofp i8 %vecext to double + %vecinit = insertelement <2 x double> undef, double %conv, i32 0 + %vecext1 = extractelement <16 x i8> %a, i32 12 + %conv2 = uitofp i8 %vecext1 to double + %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1 + ret <2 x double> %vecinit3 +} + +; Function Attrs: norecurse nounwind readnone +define <4 x float> @test_sc_to_f(<16 x i8> %a) { +; CHECK-LABEL: test_sc_to_f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd 0, 34 +; CHECK-NEXT: mfvsrd 3, 34 +; CHECK-NEXT: rldicl 3, 3, 40, 56 +; CHECK-NEXT: mfvsrd 4, 0 +; CHECK-NEXT: extsb 3, 3 +; CHECK-NEXT: clrldi 5, 4, 56 +; CHECK-NEXT: rldicl 6, 4, 40, 56 +; CHECK-NEXT: rldicl 4, 4, 8, 56 +; CHECK-NEXT: extsb 6, 6 +; CHECK-NEXT: extsb 5, 5 +; CHECK-NEXT: extsb 4, 4 +; CHECK-NEXT: rldimi 5, 6, 32, 0 +; CHECK-NEXT: rldimi 4, 3, 32, 0 +; CHECK-NEXT: mtvsrd 0, 5 +; CHECK-NEXT: mtvsrd 1, 4 +; CHECK-NEXT: xxmrghd 0, 1, 0 +; CHECK-NEXT: xvcvsxwsp 34, 0 +; CHECK-NEXT: blr +; +; CHECK-PWR9-LABEL: test_sc_to_f: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: addis 3, 2, .LCPI2_0@toc@ha +; CHECK-PWR9-NEXT: addi 3, 3, .LCPI2_0@toc@l +; CHECK-PWR9-NEXT: lxvx 35, 0, 3 +; CHECK-PWR9-NEXT: vperm 2, 2, 2, 3 +; CHECK-PWR9-NEXT: vextsb2w 2, 2 +; CHECK-PWR9-NEXT: xvcvsxwsp 34, 34 +; CHECK-PWR9-NEXT: blr +entry: + %vecext = extractelement <16 x i8> %a, i32 0 + %conv = sitofp i8 %vecext to float + %vecinit = insertelement <4 x float> undef, float %conv, i32 0 + %vecext1 = extractelement <16 x i8> %a, i32 3 + %conv2 = sitofp i8 %vecext1 to float + %vecinit3 = insertelement <4 x float> %vecinit, float %conv2, i32 1 + %vecext4 = extractelement <16 x i8> %a, i32 7 + %conv5 = sitofp i8 %vecext4 to float + %vecinit6 = insertelement <4 x float> %vecinit3, float %conv5, i32 2 + %vecext7 = extractelement <16 x i8> %a, i32 11 + %conv8 = sitofp i8 %vecext7 to float + %vecinit9 = insertelement <4 x float> %vecinit6, float %conv8, i32 3 + ret <4 x float> %vecinit9 +} + +; Function Attrs: norecurse nounwind readnone +define <4 x float> @test_uc_to_f(<16 x i8> %a) { +; CHECK-LABEL: test_uc_to_f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd 0, 34 +; CHECK-NEXT: mfvsrd 3, 34 +; CHECK-NEXT: rldicl 3, 3, 40, 56 +; CHECK-NEXT: mfvsrd 4, 0 +; CHECK-NEXT: rlwinm 3, 3, 0, 24, 31 +; CHECK-NEXT: clrldi 5, 4, 56 +; CHECK-NEXT: rldicl 6, 4, 40, 56 +; CHECK-NEXT: rldicl 4, 4, 8, 56 +; CHECK-NEXT: rlwinm 6, 6, 0, 24, 31 +; CHECK-NEXT: rlwinm 5, 5, 0, 24, 31 +; CHECK-NEXT: rlwinm 4, 4, 0, 24, 31 +; CHECK-NEXT: rldimi 5, 6, 32, 0 +; CHECK-NEXT: rldimi 4, 3, 32, 0 +; CHECK-NEXT: mtvsrd 0, 5 +; CHECK-NEXT: mtvsrd 1, 4 +; CHECK-NEXT: xxmrghd 0, 1, 0 +; CHECK-NEXT: xvcvuxwsp 34, 0 +; CHECK-NEXT: blr +; +; CHECK-PWR9-LABEL: test_uc_to_f: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: li 3, 0 +; CHECK-PWR9-NEXT: li 4, 3 +; CHECK-PWR9-NEXT: vextubrx 4, 4, 2 +; CHECK-PWR9-NEXT: vextubrx 3, 3, 2 +; CHECK-PWR9-NEXT: li 5, 7 +; CHECK-PWR9-NEXT: li 6, 11 +; CHECK-PWR9-NEXT: vextubrx 5, 5, 2 +; CHECK-PWR9-NEXT: vextubrx 6, 6, 2 +; CHECK-PWR9-NEXT: rlwinm 4, 4, 0, 24, 31 +; CHECK-PWR9-NEXT: rlwinm 3, 3, 0, 24, 31 +; CHECK-PWR9-NEXT: rldimi 3, 4, 32, 0 +; CHECK-PWR9-NEXT: rlwinm 4, 6, 0, 24, 31 +; CHECK-PWR9-NEXT: rlwinm 5, 5, 0, 24, 31 +; CHECK-PWR9-NEXT: rldimi 5, 4, 32, 0 +; CHECK-PWR9-NEXT: mtvsrdd 0, 5, 3 +; CHECK-PWR9-NEXT: xvcvuxwsp 34, 0 +; CHECK-PWR9-NEXT: blr +entry: + %vecext = extractelement <16 x i8> %a, i32 0 + %conv = uitofp i8 %vecext to float + %vecinit = insertelement <4 x float> undef, float %conv, i32 0 + %vecext1 = extractelement <16 x i8> %a, i32 3 + %conv2 = uitofp i8 %vecext1 to float + %vecinit3 = insertelement <4 x float> %vecinit, float %conv2, i32 1 + %vecext4 = extractelement <16 x i8> %a, i32 7 + %conv5 = uitofp i8 %vecext4 to float + %vecinit6 = insertelement <4 x float> %vecinit3, float %conv5, i32 2 + %vecext7 = extractelement <16 x i8> %a, i32 11 + %conv8 = uitofp i8 %vecext7 to float + %vecinit9 = insertelement <4 x float> %vecinit6, float %conv8, i32 3 + ret <4 x float> %vecinit9 +} + +; Function Attrs: norecurse nounwind readnone +define <2 x double> @test_ss_to_d(<8 x i16> %a) { +; CHECK-LABEL: test_ss_to_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd 0, 34 +; CHECK-NEXT: mfvsrd 3, 34 +; CHECK-NEXT: clrldi 3, 3, 48 +; CHECK-NEXT: mfvsrd 4, 0 +; CHECK-NEXT: extsh 3, 3 +; CHECK-NEXT: mtvsrd 0, 3 +; CHECK-NEXT: clrldi 4, 4, 48 +; CHECK-NEXT: extsh 3, 4 +; CHECK-NEXT: mtvsrd 1, 3 +; CHECK-NEXT: xxmrghd 34, 0, 1 +; CHECK-NEXT: xvcvsxddp 34, 34 +; CHECK-NEXT: blr +; +; CHECK-PWR9-LABEL: test_ss_to_d: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: vextsh2d 2, 2 +; CHECK-PWR9-NEXT: xvcvsxddp 34, 34 +; CHECK-PWR9-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %a, i32 0 + %conv = sitofp i16 %vecext to double + %vecinit = insertelement <2 x double> undef, double %conv, i32 0 + %vecext1 = extractelement <8 x i16> %a, i32 4 + %conv2 = sitofp i16 %vecext1 to double + %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1 + ret <2 x double> %vecinit3 +} + +; Function Attrs: norecurse nounwind readnone +define <2 x double> @test_us_to_d(<8 x i16> %a) { +; CHECK-LABEL: test_us_to_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd 0, 34 +; CHECK-NEXT: mfvsrd 3, 34 +; CHECK-NEXT: clrldi 3, 3, 48 +; CHECK-NEXT: mfvsrd 4, 0 +; CHECK-NEXT: clrldi 3, 3, 48 +; CHECK-NEXT: mtvsrd 0, 3 +; CHECK-NEXT: clrldi 4, 4, 48 +; CHECK-NEXT: clrldi 3, 4, 48 +; CHECK-NEXT: mtvsrd 1, 3 +; CHECK-NEXT: xxmrghd 34, 0, 1 +; CHECK-NEXT: xvcvuxddp 34, 34 +; CHECK-NEXT: blr +; +; CHECK-PWR9-LABEL: test_us_to_d: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: li 3, 0 +; CHECK-PWR9-NEXT: li 4, 8 +; CHECK-PWR9-NEXT: vextuhrx 3, 3, 2 +; CHECK-PWR9-NEXT: vextuhrx 4, 4, 2 +; CHECK-PWR9-NEXT: clrldi 3, 3, 48 +; CHECK-PWR9-NEXT: clrldi 4, 4, 48 +; CHECK-PWR9-NEXT: mtvsrdd 34, 4, 3 +; CHECK-PWR9-NEXT: xvcvuxddp 34, 34 +; CHECK-PWR9-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %a, i32 0 + %conv = uitofp i16 %vecext to double + %vecinit = insertelement <2 x double> undef, double %conv, i32 0 + %vecext1 = extractelement <8 x i16> %a, i32 4 + %conv2 = uitofp i16 %vecext1 to double + %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1 + ret <2 x double> %vecinit3 +} + +; Function Attrs: norecurse nounwind readnone +define <4 x float> @test_ss_to_f(<8 x i16> %a) { +; CHECK-LABEL: test_ss_to_f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd 0, 34 +; CHECK-NEXT: mfvsrd 3, 34 +; CHECK-NEXT: clrldi 5, 3, 48 +; CHECK-NEXT: rldicl 3, 3, 32, 48 +; CHECK-NEXT: mfvsrd 4, 0 +; CHECK-NEXT: extsh 3, 3 +; CHECK-NEXT: extsh 5, 5 +; CHECK-NEXT: rldimi 5, 3, 32, 0 +; CHECK-NEXT: clrldi 6, 4, 48 +; CHECK-NEXT: rldicl 4, 4, 32, 48 +; CHECK-NEXT: mtvsrd 0, 5 +; CHECK-NEXT: extsh 3, 4 +; CHECK-NEXT: extsh 4, 6 +; CHECK-NEXT: rldimi 4, 3, 32, 0 +; CHECK-NEXT: mtvsrd 1, 4 +; CHECK-NEXT: xxmrghd 0, 0, 1 +; CHECK-NEXT: xvcvsxwsp 34, 0 +; CHECK-NEXT: blr +; +; CHECK-PWR9-LABEL: test_ss_to_f: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: vextsh2w 2, 2 +; CHECK-PWR9-NEXT: xvcvsxwsp 34, 34 +; CHECK-PWR9-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %a, i32 0 + %conv = sitofp i16 %vecext to float + %vecinit = insertelement <4 x float> undef, float %conv, i32 0 + %vecext1 = extractelement <8 x i16> %a, i32 2 + %conv2 = sitofp i16 %vecext1 to float + %vecinit3 = insertelement <4 x float> %vecinit, float %conv2, i32 1 + %vecext4 = extractelement <8 x i16> %a, i32 4 + %conv5 = sitofp i16 %vecext4 to float + %vecinit6 = insertelement <4 x float> %vecinit3, float %conv5, i32 2 + %vecext7 = extractelement <8 x i16> %a, i32 6 + %conv8 = sitofp i16 %vecext7 to float + %vecinit9 = insertelement <4 x float> %vecinit6, float %conv8, i32 3 + ret <4 x float> %vecinit9 +} + +; Function Attrs: norecurse nounwind readnone +define <4 x float> @test_us_to_f(<8 x i16> %a) { +; CHECK-LABEL: test_us_to_f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd 0, 34 +; CHECK-NEXT: mfvsrd 3, 34 +; CHECK-NEXT: clrldi 5, 3, 48 +; CHECK-NEXT: rldicl 3, 3, 32, 48 +; CHECK-NEXT: mfvsrd 4, 0 +; CHECK-NEXT: rlwinm 3, 3, 0, 16, 31 +; CHECK-NEXT: rlwinm 5, 5, 0, 16, 31 +; CHECK-NEXT: rldimi 5, 3, 32, 0 +; CHECK-NEXT: clrldi 6, 4, 48 +; CHECK-NEXT: rldicl 4, 4, 32, 48 +; CHECK-NEXT: mtvsrd 0, 5 +; CHECK-NEXT: rlwinm 3, 4, 0, 16, 31 +; CHECK-NEXT: rlwinm 4, 6, 0, 16, 31 +; CHECK-NEXT: rldimi 4, 3, 32, 0 +; CHECK-NEXT: mtvsrd 1, 4 +; CHECK-NEXT: xxmrghd 0, 0, 1 +; CHECK-NEXT: xvcvuxwsp 34, 0 +; CHECK-NEXT: blr +; +; CHECK-PWR9-LABEL: test_us_to_f: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: li 3, 0 +; CHECK-PWR9-NEXT: li 4, 4 +; CHECK-PWR9-NEXT: li 5, 8 +; CHECK-PWR9-NEXT: vextuhrx 3, 3, 2 +; CHECK-PWR9-NEXT: vextuhrx 4, 4, 2 +; CHECK-PWR9-NEXT: li 6, 12 +; CHECK-PWR9-NEXT: vextuhrx 5, 5, 2 +; CHECK-PWR9-NEXT: vextuhrx 6, 6, 2 +; CHECK-PWR9-NEXT: rlwinm 4, 4, 0, 16, 31 +; CHECK-PWR9-NEXT: rlwinm 3, 3, 0, 16, 31 +; CHECK-PWR9-NEXT: rldimi 3, 4, 32, 0 +; CHECK-PWR9-NEXT: rlwinm 4, 6, 0, 16, 31 +; CHECK-PWR9-NEXT: rlwinm 5, 5, 0, 16, 31 +; CHECK-PWR9-NEXT: rldimi 5, 4, 32, 0 +; CHECK-PWR9-NEXT: mtvsrdd 0, 5, 3 +; CHECK-PWR9-NEXT: xvcvuxwsp 34, 0 +; CHECK-PWR9-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %a, i32 0 + %conv = uitofp i16 %vecext to float + %vecinit = insertelement <4 x float> undef, float %conv, i32 0 + %vecext1 = extractelement <8 x i16> %a, i32 2 + %conv2 = uitofp i16 %vecext1 to float + %vecinit3 = insertelement <4 x float> %vecinit, float %conv2, i32 1 + %vecext4 = extractelement <8 x i16> %a, i32 4 + %conv5 = uitofp i16 %vecext4 to float + %vecinit6 = insertelement <4 x float> %vecinit3, float %conv5, i32 2 + %vecext7 = extractelement <8 x i16> %a, i32 6 + %conv8 = uitofp i16 %vecext7 to float + %vecinit9 = insertelement <4 x float> %vecinit6, float %conv8, i32 3 + ret <4 x float> %vecinit9 +} + +; Function Attrs: norecurse nounwind readnone +define <2 x double> @test_si_to_d(<4 x i32> %a) { +; CHECK-LABEL: test_si_to_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd 0, 34 +; CHECK-NEXT: mfvsrwz 4, 34 +; CHECK-NEXT: extsw 4, 4 +; CHECK-NEXT: mfvsrwz 3, 0 +; CHECK-NEXT: mtvsrd 1, 4 +; CHECK-NEXT: extsw 3, 3 +; CHECK-NEXT: mtvsrd 0, 3 +; CHECK-NEXT: xxmrghd 34, 1, 0 +; CHECK-NEXT: xvcvsxddp 34, 34 +; CHECK-NEXT: blr +; +; CHECK-PWR9-LABEL: test_si_to_d: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: vextsw2d 2, 2 +; CHECK-PWR9-NEXT: xvcvsxddp 34, 34 +; CHECK-PWR9-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 0 + %conv = sitofp i32 %vecext to double + %vecinit = insertelement <2 x double> undef, double %conv, i32 0 + %vecext1 = extractelement <4 x i32> %a, i32 2 + %conv2 = sitofp i32 %vecext1 to double + %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1 + ret <2 x double> %vecinit3 +} + +; Function Attrs: norecurse nounwind readnone +define <2 x double> @test_ui_to_d(<4 x i32> %a) { +; CHECK-LABEL: test_ui_to_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd 0, 34 +; CHECK-NEXT: xxlxor 1, 1, 1 +; CHECK-NEXT: mfvsrwz 3, 0 +; CHECK-NEXT: mtfprwz 0, 3 +; CHECK-NEXT: xscvuxddp 0, 0 +; CHECK-NEXT: xxmrghd 34, 1, 0 +; CHECK-NEXT: blr +; +; CHECK-PWR9-LABEL: test_ui_to_d: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: xxextractuw 0, 34, 12 +; CHECK-PWR9-NEXT: xscvuxddp 0, 0 +; CHECK-PWR9-NEXT: xxlxor 1, 1, 1 +; CHECK-PWR9-NEXT: xxmrghd 34, 1, 0 +; CHECK-PWR9-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 0 + %conv = uitofp i32 %vecext to double + %vecinit3 = insertelement <2 x double> , double %conv, i32 0 + ret <2 x double> %vecinit3 +} + +; Function Attrs: norecurse nounwind readnone +define <4 x float> @test_si_to_f(<4 x i32> %a) { +; CHECK-LABEL: test_si_to_f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis 3, 2, .LCPI10_0@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI10_0@toc@l +; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: vperm 2, 2, 2, 3 +; CHECK-NEXT: xvcvsxwsp 34, 34 +; CHECK-NEXT: blr +; +; CHECK-PWR9-LABEL: test_si_to_f: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: addis 3, 2, .LCPI10_0@toc@ha +; CHECK-PWR9-NEXT: addi 3, 3, .LCPI10_0@toc@l +; CHECK-PWR9-NEXT: lxvx 35, 0, 3 +; CHECK-PWR9-NEXT: vperm 2, 2, 2, 3 +; CHECK-PWR9-NEXT: xvcvsxwsp 34, 34 +; CHECK-PWR9-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 0 + %conv = sitofp i32 %vecext to float + %vecinit = insertelement <4 x float> undef, float %conv, i32 0 + %vecext1 = extractelement <4 x i32> %a, i32 3 + %conv2 = sitofp i32 %vecext1 to float + %vecinit3 = insertelement <4 x float> %vecinit, float %conv2, i32 1 + %vecext4 = extractelement <4 x i32> %a, i32 1 + %conv5 = sitofp i32 %vecext4 to float + %vecinit6 = insertelement <4 x float> %vecinit3, float %conv5, i32 2 + %vecext7 = extractelement <4 x i32> %a, i32 2 + %conv8 = sitofp i32 %vecext7 to float + %vecinit9 = insertelement <4 x float> %vecinit6, float %conv8, i32 3 + ret <4 x float> %vecinit9 +} + +; Function Attrs: norecurse nounwind readnone +define <4 x float> @test_ui_to_f(<4 x i32> %a) { +; CHECK-LABEL: test_ui_to_f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvcvuxwsp 34, 34 +; CHECK-NEXT: blr +; +; CHECK-PWR9-LABEL: test_ui_to_f: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: xvcvuxwsp 34, 34 +; CHECK-PWR9-NEXT: blr +entry: + %0 = uitofp <4 x i32> %a to <4 x float> + ret <4 x float> %0 +} + +; Function Attrs: norecurse nounwind readnone +define <2 x double> @test_sl_to_d(<2 x i64> %a) { +; CHECK-LABEL: test_sl_to_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvcvsxddp 34, 34 +; CHECK-NEXT: blr +; +; CHECK-PWR9-LABEL: test_sl_to_d: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: xvcvsxddp 34, 34 +; CHECK-PWR9-NEXT: blr +entry: + %0 = sitofp <2 x i64> %a to <2 x double> + ret <2 x double> %0 +} + +; Function Attrs: norecurse nounwind readnone +define <2 x double> @test_ul_to_d(<2 x i64> %a) { +; CHECK-LABEL: test_ul_to_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltd 34, 34, 1 +; CHECK-NEXT: xvcvuxddp 34, 34 +; CHECK-NEXT: blr +; +; CHECK-PWR9-LABEL: test_ul_to_d: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: xxspltd 34, 34, 1 +; CHECK-PWR9-NEXT: xvcvuxddp 34, 34 +; CHECK-PWR9-NEXT: blr +entry: + %vecext = extractelement <2 x i64> %a, i32 0 + %conv = uitofp i64 %vecext to double + %vecinit = insertelement <2 x double> undef, double %conv, i32 0 + %vecinit3 = shufflevector <2 x double> %vecinit, <2 x double> undef, <2 x i32> zeroinitializer + ret <2 x double> %vecinit3 +} + +define dso_local <2 x double> @test_2ul_to_d(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_2ul_to_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxpermdi 34, 35, 34, 1 +; CHECK-NEXT: xvcvsxddp 34, 34 +; CHECK-NEXT: blr +; +; CHECK-PWR9-LABEL: test_2ul_to_d: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: xxpermdi 34, 35, 34, 1 +; CHECK-PWR9-NEXT: xvcvsxddp 34, 34 +; CHECK-PWR9-NEXT: blr +entry: + %vecext = extractelement <2 x i64> %a, i32 0 + %conv = sitofp i64 %vecext to double + %vecinit = insertelement <2 x double> undef, double %conv, i32 0 + %vecext1 = extractelement <2 x i64> %b, i32 1 + %conv2 = sitofp i64 %vecext1 to double + %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1 + ret <2 x double> %vecinit3 +} diff --git a/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll b/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll --- a/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll +++ b/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll @@ -14,61 +14,42 @@ define dso_local <2 x double> @test1(<8 x i16> %a) { ; P9BE-LABEL: test1: ; P9BE: # %bb.0: # %entry -; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: rlwinm r3, r3, 0, 16, 31 -; P9BE-NEXT: mtfprwz f0, r3 -; P9BE-NEXT: li r3, 2 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: rlwinm r3, r3, 0, 16, 31 -; P9BE-NEXT: mtfprwz f1, r3 -; P9BE-NEXT: xscvuxddp f0, f0 -; P9BE-NEXT: xscvuxddp f1, f1 -; P9BE-NEXT: xxmrghd v2, vs0, vs1 +; P9BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI0_0@toc@l +; P9BE-NEXT: lxvx v3, 0, r3 +; P9BE-NEXT: xxlxor v4, v4, v4 +; P9BE-NEXT: vperm v2, v4, v2, v3 +; P9BE-NEXT: xvcvuxddp v2, v2 ; P9BE-NEXT: blr ; ; P9LE-LABEL: test1: ; P9LE: # %bb.0: # %entry -; P9LE-NEXT: li r3, 0 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: rlwinm r3, r3, 0, 16, 31 -; P9LE-NEXT: mtfprwz f0, r3 -; P9LE-NEXT: li r3, 2 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: rlwinm r3, r3, 0, 16, 31 -; P9LE-NEXT: mtfprwz f1, r3 -; P9LE-NEXT: xscvuxddp f0, f0 -; P9LE-NEXT: xscvuxddp f1, f1 -; P9LE-NEXT: xxmrghd v2, vs1, vs0 +; P9LE-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI0_0@toc@l +; P9LE-NEXT: lxvx v3, 0, r3 +; P9LE-NEXT: xxlxor v4, v4, v4 +; P9LE-NEXT: vperm v2, v2, v4, v3 +; P9LE-NEXT: xvcvuxddp v2, v2 ; P9LE-NEXT: blr ; ; P8BE-LABEL: test1: ; P8BE: # %bb.0: # %entry -; P8BE-NEXT: mfvsrd r3, v2 -; P8BE-NEXT: rldicl r4, r3, 16, 48 -; P8BE-NEXT: rldicl r3, r3, 32, 48 -; P8BE-NEXT: rlwinm r4, r4, 0, 16, 31 -; P8BE-NEXT: rlwinm r3, r3, 0, 16, 31 -; P8BE-NEXT: mtfprwz f0, r4 -; P8BE-NEXT: mtfprwz f1, r3 -; P8BE-NEXT: xscvuxddp f0, f0 -; P8BE-NEXT: xscvuxddp f1, f1 -; P8BE-NEXT: xxmrghd v2, vs0, vs1 +; P8BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; P8BE-NEXT: xxlxor v4, v4, v4 +; P8BE-NEXT: addi r3, r3, .LCPI0_0@toc@l +; P8BE-NEXT: lxvw4x v3, 0, r3 +; P8BE-NEXT: vperm v2, v4, v2, v3 +; P8BE-NEXT: xvcvuxddp v2, v2 ; P8BE-NEXT: blr ; ; P8LE-LABEL: test1: ; P8LE: # %bb.0: # %entry -; P8LE-NEXT: xxswapd vs0, v2 -; P8LE-NEXT: mfvsrd r3, f0 -; P8LE-NEXT: clrldi r4, r3, 48 -; P8LE-NEXT: rldicl r3, r3, 48, 48 -; P8LE-NEXT: rlwinm r4, r4, 0, 16, 31 -; P8LE-NEXT: rlwinm r3, r3, 0, 16, 31 -; P8LE-NEXT: mtfprwz f0, r4 -; P8LE-NEXT: mtfprwz f1, r3 -; P8LE-NEXT: xscvuxddp f0, f0 -; P8LE-NEXT: xscvuxddp f1, f1 -; P8LE-NEXT: xxmrghd v2, vs1, vs0 +; P8LE-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; P8LE-NEXT: xxlxor v4, v4, v4 +; P8LE-NEXT: addi r3, r3, .LCPI0_0@toc@l +; P8LE-NEXT: lvx v3, 0, r3 +; P8LE-NEXT: vperm v2, v2, v4, v3 +; P8LE-NEXT: xvcvuxddp v2, v2 ; P8LE-NEXT: blr entry: %vecext = extractelement <8 x i16> %a, i32 0 @@ -83,32 +64,34 @@ define dso_local <2 x double> @test2(<4 x i32> %a, <4 x i32> %b) { ; P9BE-LABEL: test2: ; P9BE: # %bb.0: # %entry -; P9BE-NEXT: xxextractuw f0, v2, 0 -; P9BE-NEXT: xxextractuw f1, v3, 4 -; P9BE-NEXT: xscvuxddp f0, f0 -; P9BE-NEXT: xscvuxddp f1, f1 -; P9BE-NEXT: xxmrghd v2, vs0, vs1 +; P9BE-NEXT: li r3, 0 +; P9BE-NEXT: vextuwlx r3, r3, v2 +; P9BE-NEXT: mfvsrwz r4, v3 +; P9BE-NEXT: mtvsrdd v2, r3, r4 +; P9BE-NEXT: xvcvuxddp v2, v2 ; P9BE-NEXT: blr ; ; P9LE-LABEL: test2: ; P9LE: # %bb.0: # %entry -; P9LE-NEXT: xxextractuw f0, v2, 12 -; P9LE-NEXT: xxextractuw f1, v3, 8 -; P9LE-NEXT: xscvuxddp f0, f0 -; P9LE-NEXT: xscvuxddp f1, f1 -; P9LE-NEXT: xxmrghd v2, vs1, vs0 +; P9LE-NEXT: li r3, 0 +; P9LE-NEXT: li r4, 4 +; P9LE-NEXT: vextuwrx r3, r3, v2 +; P9LE-NEXT: vextuwrx r4, r4, v3 +; P9LE-NEXT: mtvsrdd v2, r4, r3 +; P9LE-NEXT: xvcvuxddp v2, v2 ; P9LE-NEXT: blr ; ; P8BE-LABEL: test2: ; P8BE: # %bb.0: # %entry ; P8BE-NEXT: xxsldwi vs0, v2, v2, 3 -; P8BE-NEXT: mfvsrwz r4, v3 -; P8BE-NEXT: mtfprwz f1, r4 -; P8BE-NEXT: mfvsrwz r3, f0 -; P8BE-NEXT: xscvuxddp f1, f1 -; P8BE-NEXT: mtfprwz f0, r3 -; P8BE-NEXT: xscvuxddp f0, f0 -; P8BE-NEXT: xxmrghd v2, vs0, vs1 +; P8BE-NEXT: mfvsrwz r3, v3 +; P8BE-NEXT: clrldi r3, r3, 32 +; P8BE-NEXT: mfvsrwz r4, f0 +; P8BE-NEXT: mtvsrd f0, r3 +; P8BE-NEXT: clrldi r3, r4, 32 +; P8BE-NEXT: mtvsrd f1, r3 +; P8BE-NEXT: xxmrghd v2, vs1, vs0 +; P8BE-NEXT: xvcvuxddp v2, v2 ; P8BE-NEXT: blr ; ; P8LE-LABEL: test2: @@ -117,11 +100,12 @@ ; P8LE-NEXT: xxsldwi vs1, v3, v3, 1 ; P8LE-NEXT: mfvsrwz r3, f0 ; P8LE-NEXT: mfvsrwz r4, f1 -; P8LE-NEXT: mtfprwz f0, r3 -; P8LE-NEXT: mtfprwz f1, r4 -; P8LE-NEXT: xscvuxddp f0, f0 -; P8LE-NEXT: xscvuxddp f1, f1 +; P8LE-NEXT: clrldi r3, r3, 32 +; P8LE-NEXT: clrldi r4, r4, 32 +; P8LE-NEXT: mtvsrd f0, r3 +; P8LE-NEXT: mtvsrd f1, r4 ; P8LE-NEXT: xxmrghd v2, vs1, vs0 +; P8LE-NEXT: xvcvuxddp v2, v2 ; P8LE-NEXT: blr entry: %vecext = extractelement <4 x i32> %a, i32 0 diff --git a/test/CodeGen/PowerPC/vsx-partword-int-loads-and-stores.ll b/test/CodeGen/PowerPC/vsx-partword-int-loads-and-stores.ll --- a/test/CodeGen/PowerPC/vsx-partword-int-loads-and-stores.ll +++ b/test/CodeGen/PowerPC/vsx-partword-int-loads-and-stores.ll @@ -137,14 +137,12 @@ ret <4 x float> %splat.splat ; CHECK-LABEL: vecfuc ; CHECK: lxsibzx [[LD:[0-9]+]], 0, 3 -; CHECK-NEXT: xscvuxdsp [[CONVD:[0-9]+]], [[LD]] -; CHECK-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]] -; CHECK-NEXT: xxspltw 34, [[CONVS]], 0 +; CHECK-NEXT: xxspltw [[SPLT:[0-9]+]], [[LD]], 1 +; CHECK-NEXT: xvcvuxwsp 34, [[SPLT]] ; CHECK-BE-LABEL: vecfuc ; CHECK-BE: lxsibzx [[LD:[0-9]+]], 0, 3 -; CHECK-BE-NEXT: xscvuxdsp [[CONVD:[0-9]+]], [[LD]] -; CHECK-BE-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]] -; CHECK-BE-NEXT: xxspltw 34, [[CONVS]], 0 +; CHECK-BE-NEXT: xxspltw [[SPLT:[0-9]+]], [[LD]], 1 +; CHECK-BE-NEXT: xvcvuxwsp 34, [[SPLT]] } ; Function Attrs: norecurse nounwind readonly @@ -157,12 +155,12 @@ ret <2 x double> %splat.splat ; CHECK-LABEL: vecduc ; CHECK: lxsibzx [[LD:[0-9]+]], 0, 3 -; CHECK-NEXT: xscvuxddp [[CONVD:[0-9]+]], [[LD]] -; CHECK-NEXT: xxspltd 34, [[CONVD]], 0 +; CHECK-NEXT: xxspltd [[SPLT:[0-9]+]], [[LD]], 0 +; CHECK-NEXT: xvcvuxddp 34, [[SPLT]] ; CHECK-BE-LABEL: vecduc ; CHECK-BE: lxsibzx [[LD:[0-9]+]], 0, 3 -; CHECK-BE-NEXT: xscvuxddp [[CONVD:[0-9]+]], [[LD]] -; CHECK-BE-NEXT: xxspltd 34, [[CONVD]], 0 +; CHECK-BE-NEXT: xxspltd [[SPLT:[0-9]+]], [[LD]], 0 +; CHECK-BE-NEXT: xvcvuxddp 34, [[SPLT]] } ; Function Attrs: norecurse nounwind readonly @@ -277,16 +275,14 @@ ret <4 x float> %splat.splat ; CHECK-LABEL: vecfsc ; CHECK: lxsibzx -; CHECK-NEXT: vextsb2d -; CHECK-NEXT: xscvsxdsp [[CONVD:[0-9]+]], -; CHECK-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]] -; CHECK-NEXT: xxspltw 34, [[CONVS]], 0 +; CHECK-NEXT: vextsb2w +; CHECK-NEXT: xxspltw [[SPLT:[0-9]+]], +; CHECK-NEXT: xvcvsxwsp 34, [[SPLT]] ; CHECK-BE-LABEL: vecfsc ; CHECK-BE: lxsibzx [[LD:[0-9]+]], 0, 3 -; CHECK-BE-NEXT: vextsb2d -; CHECK-BE-NEXT: xscvsxdsp [[CONVD:[0-9]+]], -; CHECK-BE-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]] -; CHECK-BE-NEXT: xxspltw 34, [[CONVS]], 0 +; CHECK-BE-NEXT: vextsb2w +; CHECK-BE-NEXT: xxspltw [[SPLT:[0-9]+]], +; CHECK-BE-NEXT: xvcvsxwsp 34, [[SPLT]] } ; Function Attrs: norecurse nounwind readonly @@ -300,13 +296,13 @@ ; CHECK-LABEL: vecdsc ; CHECK: lxsibzx ; CHECK-NEXT: vextsb2d -; CHECK-NEXT: xscvsxddp [[CONVD:[0-9]+]], -; CHECK-NEXT: xxspltd 34, [[CONVD]], 0 +; CHECK-NEXT: xxspltd [[SPLT:[0-9]+]], +; CHECK-NEXT: xvcvsxddp 34, [[SPLT]] ; CHECK-BE-LABEL: vecdsc ; CHECK-BE: lxsibzx ; CHECK-BE-NEXT: vextsb2d -; CHECK-BE-NEXT: xscvsxddp [[CONVD:[0-9]+]], -; CHECK-BE-NEXT: xxspltd 34, [[CONVD]], 0 +; CHECK-BE-NEXT: xxspltd [[SPLT:[0-9]+]], +; CHECK-BE-NEXT: xvcvsxddp 34, [[SPLT]] } ; Function Attrs: norecurse nounwind readonly @@ -447,14 +443,12 @@ ret <4 x float> %splat.splat ; CHECK-LABEL: vecfus ; CHECK: lxsihzx [[LD:[0-9]+]], 0, 3 -; CHECK-NEXT: xscvuxdsp [[CONVD:[0-9]+]], [[LD]] -; CHECK-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]] -; CHECK-NEXT: xxspltw 34, [[CONVS]], 0 +; CHECK-NEXT: xxspltw [[SPLT:[0-9]+]], [[LD]], 1 +; CHECK-NEXT: xvcvuxwsp 34, [[SPLT]] ; CHECK-BE-LABEL: vecfus ; CHECK-BE: lxsihzx [[LD:[0-9]+]], 0, 3 -; CHECK-BE-NEXT: xscvuxdsp [[CONVD:[0-9]+]], [[LD]] -; CHECK-BE-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]] -; CHECK-BE-NEXT: xxspltw 34, [[CONVS]], 0 +; CHECK-BE-NEXT: xxspltw [[SPLT:[0-9]+]], [[LD]], 1 +; CHECK-BE-NEXT: xvcvuxwsp 34, [[SPLT]] } ; Function Attrs: norecurse nounwind readonly @@ -467,12 +461,12 @@ ret <2 x double> %splat.splat ; CHECK-LABEL: vecdus ; CHECK: lxsihzx [[LD:[0-9]+]], 0, 3 -; CHECK-NEXT: xscvuxddp [[CONVD:[0-9]+]], [[LD]] -; CHECK-NEXT: xxspltd 34, [[CONVD]], 0 +; CHECK-NEXT: xxspltd [[SPLT:[0-9]+]], [[LD]], 0 +; CHECK-NEXT: xvcvuxddp 34, [[SPLT]] ; CHECK-BE-LABEL: vecdus ; CHECK-BE: lxsihzx [[LD:[0-9]+]], 0, 3 -; CHECK-BE-NEXT: xscvuxddp [[CONVD:[0-9]+]], [[LD]] -; CHECK-BE-NEXT: xxspltd 34, [[CONVD]], 0 +; CHECK-BE-NEXT: xxspltd [[SPLT:[0-9]+]], [[LD]], 0 +; CHECK-BE-NEXT: xvcvuxddp 34, [[SPLT]] } ; Function Attrs: norecurse nounwind readonly @@ -591,16 +585,14 @@ ret <4 x float> %splat.splat ; CHECK-LABEL: vecfss ; CHECK: lxsihzx -; CHECK-NEXT: vextsh2d -; CHECK-NEXT: xscvsxdsp [[CONVD:[0-9]+]], -; CHECK-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]] -; CHECK-NEXT: xxspltw 34, [[CONVS]], 0 +; CHECK-NEXT: vextsh2w +; CHECK-NEXT: xxspltw [[CONVS:[0-9]+]], {{[0-9]+}}, 1 +; CHECK-NEXT: xvcvsxwsp 34, [[CONVS]] ; CHECK-BE-LABEL: vecfss ; CHECK-BE: lxsihzx [[LD:[0-9]+]], 0, 3 -; CHECK-BE-NEXT: vextsh2d -; CHECK-BE-NEXT: xscvsxdsp [[CONVD:[0-9]+]], -; CHECK-BE-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]] -; CHECK-BE-NEXT: xxspltw 34, [[CONVS]], 0 +; CHECK-BE-NEXT: vextsh2w +; CHECK-BE-NEXT: xxspltw [[CONVS:[0-9]+]], {{[0-9]+}}, 1 +; CHECK-BE-NEXT: xvcvsxwsp 34, [[CONVS]] } ; Function Attrs: norecurse nounwind readonly @@ -614,13 +606,13 @@ ; CHECK-LABEL: vecdss ; CHECK: lxsihzx ; CHECK-NEXT: vextsh2d -; CHECK-NEXT: xscvsxddp [[CONVD:[0-9]+]], -; CHECK-NEXT: xxspltd 34, [[CONVD]], 0 +; CHECK-NEXT: xxspltd [[SPLT:[0-9]+]], +; CHECK-NEXT: xvcvsxddp 34, [[SPLT]] ; CHECK-BE-LABEL: vecdss ; CHECK-BE: lxsihzx ; CHECK-BE-NEXT: vextsh2d -; CHECK-BE-NEXT: xscvsxddp [[CONVD:[0-9]+]], -; CHECK-BE-NEXT: xxspltd 34, [[CONVD]], 0 +; CHECK-BE-NEXT: xxspltd [[SPLT:[0-9]+]], +; CHECK-BE-NEXT: xvcvsxddp 34, [[SPLT]] } ; Function Attrs: norecurse nounwind