Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -11695,6 +11695,43 @@ return SDValue(); } +static SDValue combineBVOfFpConversions(SDNode *N, + SelectionDAG &DAG) { + assert(N->getOpcode() == ISD::BUILD_VECTOR && + "Should be called with a BUILD_VECTOR node"); + + for (int i = 0, e = N->getNumOperands(); i < e; i++) + if (N->getOperand(i).getOpcode() != ISD::SINT_TO_FP && + N->getOperand(i).getOpcode() != ISD::UINT_TO_FP) + return SDValue(); + + SDLoc dl(N); + bool Signed = N->getOperand(0).getOpcode() == ISD::SINT_TO_FP; + EVT FinalVecType = N->getValueType(0); + EVT IntermVecType = FinalVecType.changeVectorElementTypeToInteger(); + SmallVector Ops; + auto extendIfNecessary = [&](SDValue Op) { + EVT ScalarVT = IntermVecType.getScalarType(); + unsigned InputWidth = Op.getValueType().getSizeInBits(); + unsigned OutputWidth = ScalarVT.getSizeInBits(); + if (InputWidth < OutputWidth) + return DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, + ScalarVT, Op); + else if (InputWidth > OutputWidth) + return SDValue(); + return Op; + }; + for (int i = 0, e = N->getNumOperands(); i < e; i++) { + SDValue InOp = N->getOperand(i).getOperand(0); + InOp = extendIfNecessary(InOp); + if (!InOp) + return SDValue(); + Ops.push_back(InOp); + } + SDValue BV = DAG.getBuildVector(IntermVecType, dl, Ops); + return DAG.getNode(N->getOperand(0).getOpcode(), dl, FinalVecType, BV); +} + /// \brief Reduce the number of loads when building a vector. /// /// Building a vector out of multiple loads can be converted to a load @@ -11941,6 +11978,10 @@ Reduced = combineBVOfVecSExt(N, DAG); if (Reduced) return Reduced; + // Combine (build_vector (sint_to_fp*)) to (sint_to_fp (build_vector)). + Reduced = combineBVOfFpConversions(N, DAG); + if (Reduced) + return Reduced; } Index: test/CodeGen/PowerPC/int-to-fp-build-vectors.ll =================================================================== --- test/CodeGen/PowerPC/int-to-fp-build-vectors.ll +++ test/CodeGen/PowerPC/int-to-fp-build-vectors.ll @@ -0,0 +1,503 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 < %s | FileCheck -check-prefix=CHECK-PWR9 %s +; Function Attrs: norecurse nounwind readnone +define <2 x double> @test_sc_to_d(<16 x i8> %a) { +; CHECK-LABEL: test_sc_to_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd 0, 34 +; CHECK-NEXT: mfvsrd 4, 34 +; CHECK-NEXT: rldicl 4, 4, 8, 56 +; CHECK-NEXT: mfvsrd 3, 0 +; CHECK-NEXT: extsb 12, 4 +; CHECK-NEXT: mtvsrwa 1, 12 +; CHECK-NEXT: rldicl 3, 3, 8, 56 +; CHECK-NEXT: extsb 3, 3 +; CHECK-NEXT: xscvsxddp 1, 1 +; CHECK-NEXT: mtvsrwa 0, 3 +; CHECK-NEXT: xscvsxddp 0, 0 +; CHECK-NEXT: xxmrghd 34, 1, 0 +; CHECK-NEXT: blr +; +; CHECK-PWR9-LABEL: test_sc_to_d: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: vsldoi 2, 2, 2, 9 +; CHECK-PWR9-NEXT: vextsb2d 2, 2 +; CHECK-PWR9-NEXT: xvcvsxddp 34, 34 +; CHECK-PWR9-NEXT: blr +entry: + %vecext = extractelement <16 x i8> %a, i32 7 + %conv = sitofp i8 %vecext to double + %vecinit = insertelement <2 x double> undef, double %conv, i32 0 + %vecext1 = extractelement <16 x i8> %a, i32 15 + %conv2 = sitofp i8 %vecext1 to double + %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1 + ret <2 x double> %vecinit3 +} + +; Function Attrs: norecurse nounwind readnone +define <2 x double> @test_uc_to_d(<16 x i8> %a) { +; CHECK-LABEL: test_uc_to_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrd 3, 34 +; CHECK-NEXT: clrldi 4, 3, 56 +; CHECK-NEXT: rldicl 3, 3, 32, 56 +; CHECK-NEXT: rlwinm 4, 4, 0, 24, 31 +; CHECK-NEXT: rlwinm 3, 3, 0, 24, 31 +; CHECK-NEXT: mtvsrwz 0, 4 +; CHECK-NEXT: mtvsrwz 1, 3 +; CHECK-NEXT: xscvuxddp 0, 0 +; CHECK-NEXT: xscvuxddp 1, 1 +; CHECK-NEXT: xxmrghd 34, 1, 0 +; CHECK-NEXT: blr +; +; CHECK-PWR9-LABEL: test_uc_to_d: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: li 3, 8 +; CHECK-PWR9-NEXT: li 4, 12 +; CHECK-PWR9-NEXT: vextubrx 3, 3, 2 +; CHECK-PWR9-NEXT: vextubrx 4, 4, 2 +; CHECK-PWR9-NEXT: clrldi 3, 3, 56 +; CHECK-PWR9-NEXT: clrldi 4, 4, 56 +; CHECK-PWR9-NEXT: mtvsrdd 51, 4, 3 +; CHECK-PWR9-NEXT: xvcvuxddp 34, 51 +; CHECK-PWR9-NEXT: blr +entry: + %vecext = extractelement <16 x i8> %a, i32 8 + %conv = uitofp i8 %vecext to double + %vecinit = insertelement <2 x double> undef, double %conv, i32 0 + %vecext1 = extractelement <16 x i8> %a, i32 12 + %conv2 = uitofp i8 %vecext1 to double + %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1 + ret <2 x double> %vecinit3 +} + +; Function Attrs: norecurse nounwind readnone +define <4 x float> @test_sc_to_f(<16 x i8> %a) { +; CHECK-LABEL: test_sc_to_f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd 0, 34 +; CHECK-NEXT: mfvsrd 3, 34 +; CHECK-NEXT: rldicl 3, 3, 40, 56 +; CHECK-NEXT: mfvsrd 4, 0 +; CHECK-NEXT: extsb 3, 3 +; CHECK-NEXT: mtvsrwz 0, 3 +; CHECK-NEXT: clrldi 5, 4, 56 +; CHECK-NEXT: rldicl 3, 4, 8, 56 +; CHECK-NEXT: rldicl 4, 4, 40, 56 +; CHECK-NEXT: extsb 5, 5 +; CHECK-NEXT: extsb 3, 3 +; CHECK-NEXT: extsb 4, 4 +; CHECK-NEXT: mtvsrwz 3, 4 +; CHECK-NEXT: mtvsrwz 1, 5 +; CHECK-NEXT: mtvsrwz 2, 3 +; CHECK-NEXT: xxmrghd 35, 0, 3 +; CHECK-NEXT: xxmrghd 51, 2, 1 +; CHECK-NEXT: vmrgow 2, 3, 19 +; CHECK-NEXT: xvcvsxwsp 34, 34 +; CHECK-NEXT: blr +; +; CHECK-PWR9-LABEL: test_sc_to_f: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: addis 3, 2, .LCPI2_0@toc@ha +; CHECK-PWR9-NEXT: addi 3, 3, .LCPI2_0@toc@l +; CHECK-PWR9-NEXT: lxvx 35, 0, 3 +; CHECK-PWR9-NEXT: vperm 2, 2, 2, 3 +; CHECK-PWR9-NEXT: vextsb2w 2, 2 +; CHECK-PWR9-NEXT: xvcvsxwsp 34, 34 +; CHECK-PWR9-NEXT: blr +entry: + %vecext = extractelement <16 x i8> %a, i32 0 + %conv = sitofp i8 %vecext to float + %vecinit = insertelement <4 x float> undef, float %conv, i32 0 + %vecext1 = extractelement <16 x i8> %a, i32 3 + %conv2 = sitofp i8 %vecext1 to float + %vecinit3 = insertelement <4 x float> %vecinit, float %conv2, i32 1 + %vecext4 = extractelement <16 x i8> %a, i32 7 + %conv5 = sitofp i8 %vecext4 to float + %vecinit6 = insertelement <4 x float> %vecinit3, float %conv5, i32 2 + %vecext7 = extractelement <16 x i8> %a, i32 11 + %conv8 = sitofp i8 %vecext7 to float + %vecinit9 = insertelement <4 x float> %vecinit6, float %conv8, i32 3 + ret <4 x float> %vecinit9 +} + +; Function Attrs: norecurse nounwind readnone +define <4 x float> @test_uc_to_f(<16 x i8> %a) { +; CHECK-LABEL: test_uc_to_f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd 0, 34 +; CHECK-NEXT: mfvsrd 3, 34 +; CHECK-NEXT: rldicl 3, 3, 40, 56 +; CHECK-NEXT: mfvsrd 4, 0 +; CHECK-NEXT: rlwinm 3, 3, 0, 24, 31 +; CHECK-NEXT: mtvsrwz 0, 3 +; CHECK-NEXT: clrldi 5, 4, 56 +; CHECK-NEXT: rldicl 3, 4, 8, 56 +; CHECK-NEXT: rldicl 4, 4, 40, 56 +; CHECK-NEXT: rlwinm 5, 5, 0, 24, 31 +; CHECK-NEXT: rlwinm 3, 3, 0, 24, 31 +; CHECK-NEXT: rlwinm 4, 4, 0, 24, 31 +; CHECK-NEXT: mtvsrwz 3, 4 +; CHECK-NEXT: mtvsrwz 1, 5 +; CHECK-NEXT: mtvsrwz 2, 3 +; CHECK-NEXT: xxmrghd 35, 0, 3 +; CHECK-NEXT: xxmrghd 51, 2, 1 +; CHECK-NEXT: vmrgow 2, 3, 19 +; CHECK-NEXT: xvcvuxwsp 34, 34 +; CHECK-NEXT: blr +; +; CHECK-PWR9-LABEL: test_uc_to_f: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: li 3, 0 +; CHECK-PWR9-NEXT: li 4, 3 +; CHECK-PWR9-NEXT: li 5, 7 +; CHECK-PWR9-NEXT: li 6, 11 +; CHECK-PWR9-NEXT: vextubrx 3, 3, 2 +; CHECK-PWR9-NEXT: vextubrx 4, 4, 2 +; CHECK-PWR9-NEXT: vextubrx 5, 5, 2 +; CHECK-PWR9-NEXT: vextubrx 6, 6, 2 +; CHECK-PWR9-NEXT: rlwinm 3, 3, 0, 24, 31 +; CHECK-PWR9-NEXT: rlwinm 5, 5, 0, 24, 31 +; CHECK-PWR9-NEXT: rlwinm 4, 4, 0, 24, 31 +; CHECK-PWR9-NEXT: rlwinm 6, 6, 0, 24, 31 +; CHECK-PWR9-NEXT: mtvsrdd 51, 5, 3 +; CHECK-PWR9-NEXT: mtvsrdd 35, 6, 4 +; CHECK-PWR9-NEXT: vmrgow 2, 3, 19 +; CHECK-PWR9-NEXT: xvcvuxwsp 34, 34 +; CHECK-PWR9-NEXT: blr +entry: + %vecext = extractelement <16 x i8> %a, i32 0 + %conv = uitofp i8 %vecext to float + %vecinit = insertelement <4 x float> undef, float %conv, i32 0 + %vecext1 = extractelement <16 x i8> %a, i32 3 + %conv2 = uitofp i8 %vecext1 to float + %vecinit3 = insertelement <4 x float> %vecinit, float %conv2, i32 1 + %vecext4 = extractelement <16 x i8> %a, i32 7 + %conv5 = uitofp i8 %vecext4 to float + %vecinit6 = insertelement <4 x float> %vecinit3, float %conv5, i32 2 + %vecext7 = extractelement <16 x i8> %a, i32 11 + %conv8 = uitofp i8 %vecext7 to float + %vecinit9 = insertelement <4 x float> %vecinit6, float %conv8, i32 3 + ret <4 x float> %vecinit9 +} + +; Function Attrs: norecurse nounwind readnone +define <2 x double> @test_ss_to_d(<8 x i16> %a) { +; CHECK-LABEL: test_ss_to_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd 0, 34 +; CHECK-NEXT: mfvsrd 4, 34 +; CHECK-NEXT: clrldi 4, 4, 48 +; CHECK-NEXT: mfvsrd 3, 0 +; CHECK-NEXT: extsh 12, 4 +; CHECK-NEXT: mtvsrwa 1, 12 +; CHECK-NEXT: clrldi 3, 3, 48 +; CHECK-NEXT: extsh 3, 3 +; CHECK-NEXT: xscvsxddp 1, 1 +; CHECK-NEXT: mtvsrwa 0, 3 +; CHECK-NEXT: xscvsxddp 0, 0 +; CHECK-NEXT: xxmrghd 34, 1, 0 +; CHECK-NEXT: blr +; +; CHECK-PWR9-LABEL: test_ss_to_d: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: vextsh2d 2, 2 +; CHECK-PWR9-NEXT: xvcvsxddp 34, 34 +; CHECK-PWR9-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %a, i32 0 + %conv = sitofp i16 %vecext to double + %vecinit = insertelement <2 x double> undef, double %conv, i32 0 + %vecext1 = extractelement <8 x i16> %a, i32 4 + %conv2 = sitofp i16 %vecext1 to double + %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1 + ret <2 x double> %vecinit3 +} + +; Function Attrs: norecurse nounwind readnone +define <2 x double> @test_us_to_d(<8 x i16> %a) { +; CHECK-LABEL: test_us_to_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd 0, 34 +; CHECK-NEXT: mfvsrd 4, 34 +; CHECK-NEXT: clrldi 4, 4, 48 +; CHECK-NEXT: mfvsrd 3, 0 +; CHECK-NEXT: rlwinm 12, 4, 0, 16, 31 +; CHECK-NEXT: mtvsrwz 1, 12 +; CHECK-NEXT: clrldi 3, 3, 48 +; CHECK-NEXT: rlwinm 3, 3, 0, 16, 31 +; CHECK-NEXT: xscvuxddp 1, 1 +; CHECK-NEXT: mtvsrwz 0, 3 +; CHECK-NEXT: xscvuxddp 0, 0 +; CHECK-NEXT: xxmrghd 34, 1, 0 +; CHECK-NEXT: blr +; +; CHECK-PWR9-LABEL: test_us_to_d: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: li 3, 0 +; CHECK-PWR9-NEXT: li 4, 8 +; CHECK-PWR9-NEXT: vextuhrx 3, 3, 2 +; CHECK-PWR9-NEXT: vextuhrx 4, 4, 2 +; CHECK-PWR9-NEXT: clrldi 3, 3, 48 +; CHECK-PWR9-NEXT: clrldi 4, 4, 48 +; CHECK-PWR9-NEXT: mtvsrdd 51, 4, 3 +; CHECK-PWR9-NEXT: xvcvuxddp 34, 51 +; CHECK-PWR9-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %a, i32 0 + %conv = uitofp i16 %vecext to double + %vecinit = insertelement <2 x double> undef, double %conv, i32 0 + %vecext1 = extractelement <8 x i16> %a, i32 4 + %conv2 = uitofp i16 %vecext1 to double + %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1 + ret <2 x double> %vecinit3 +} + +; Function Attrs: norecurse nounwind readnone +define <4 x float> @test_ss_to_f(<8 x i16> %a) { +; CHECK-LABEL: test_ss_to_f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd 0, 34 +; CHECK-NEXT: mfvsrd 3, 34 +; CHECK-NEXT: clrldi 4, 3, 48 +; CHECK-NEXT: rldicl 3, 3, 32, 48 +; CHECK-NEXT: mfvsrd 5, 0 +; CHECK-NEXT: extsh 4, 4 +; CHECK-NEXT: extsh 3, 3 +; CHECK-NEXT: mtvsrwz 0, 4 +; CHECK-NEXT: clrldi 4, 5, 48 +; CHECK-NEXT: rldicl 12, 5, 32, 48 +; CHECK-NEXT: mtvsrwz 1, 3 +; CHECK-NEXT: extsh 4, 4 +; CHECK-NEXT: extsh 3, 12 +; CHECK-NEXT: mtvsrwz 2, 4 +; CHECK-NEXT: mtvsrwz 3, 3 +; CHECK-NEXT: xxmrghd 51, 0, 2 +; CHECK-NEXT: xxmrghd 35, 1, 3 +; CHECK-NEXT: vmrgow 2, 3, 19 +; CHECK-NEXT: xvcvsxwsp 34, 34 +; CHECK-NEXT: blr +; +; CHECK-PWR9-LABEL: test_ss_to_f: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: vextsh2w 2, 2 +; CHECK-PWR9-NEXT: xvcvsxwsp 34, 34 +; CHECK-PWR9-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %a, i32 0 + %conv = sitofp i16 %vecext to float + %vecinit = insertelement <4 x float> undef, float %conv, i32 0 + %vecext1 = extractelement <8 x i16> %a, i32 2 + %conv2 = sitofp i16 %vecext1 to float + %vecinit3 = insertelement <4 x float> %vecinit, float %conv2, i32 1 + %vecext4 = extractelement <8 x i16> %a, i32 4 + %conv5 = sitofp i16 %vecext4 to float + %vecinit6 = insertelement <4 x float> %vecinit3, float %conv5, i32 2 + %vecext7 = extractelement <8 x i16> %a, i32 6 + %conv8 = sitofp i16 %vecext7 to float + %vecinit9 = insertelement <4 x float> %vecinit6, float %conv8, i32 3 + ret <4 x float> %vecinit9 +} + +; Function Attrs: norecurse nounwind readnone +define <4 x float> @test_us_to_f(<8 x i16> %a) { +; CHECK-LABEL: test_us_to_f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd 0, 34 +; CHECK-NEXT: mfvsrd 3, 34 +; CHECK-NEXT: clrldi 4, 3, 48 +; CHECK-NEXT: rldicl 3, 3, 32, 48 +; CHECK-NEXT: mfvsrd 5, 0 +; CHECK-NEXT: rlwinm 4, 4, 0, 16, 31 +; CHECK-NEXT: rlwinm 3, 3, 0, 16, 31 +; CHECK-NEXT: mtvsrwz 0, 4 +; CHECK-NEXT: clrldi 4, 5, 48 +; CHECK-NEXT: rldicl 12, 5, 32, 48 +; CHECK-NEXT: mtvsrwz 1, 3 +; CHECK-NEXT: rlwinm 4, 4, 0, 16, 31 +; CHECK-NEXT: rlwinm 3, 12, 0, 16, 31 +; CHECK-NEXT: mtvsrwz 2, 4 +; CHECK-NEXT: mtvsrwz 3, 3 +; CHECK-NEXT: xxmrghd 51, 0, 2 +; CHECK-NEXT: xxmrghd 35, 1, 3 +; CHECK-NEXT: vmrgow 2, 3, 19 +; CHECK-NEXT: xvcvuxwsp 34, 34 +; CHECK-NEXT: blr +; +; CHECK-PWR9-LABEL: test_us_to_f: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: li 3, 0 +; CHECK-PWR9-NEXT: li 4, 4 +; CHECK-PWR9-NEXT: li 5, 8 +; CHECK-PWR9-NEXT: li 6, 12 +; CHECK-PWR9-NEXT: vextuhrx 3, 3, 2 +; CHECK-PWR9-NEXT: vextuhrx 4, 4, 2 +; CHECK-PWR9-NEXT: vextuhrx 5, 5, 2 +; CHECK-PWR9-NEXT: vextuhrx 6, 6, 2 +; CHECK-PWR9-NEXT: rlwinm 3, 3, 0, 16, 31 +; CHECK-PWR9-NEXT: rlwinm 5, 5, 0, 16, 31 +; CHECK-PWR9-NEXT: rlwinm 4, 4, 0, 16, 31 +; CHECK-PWR9-NEXT: rlwinm 6, 6, 0, 16, 31 +; CHECK-PWR9-NEXT: mtvsrdd 51, 5, 3 +; CHECK-PWR9-NEXT: mtvsrdd 35, 6, 4 +; CHECK-PWR9-NEXT: vmrgow 2, 3, 19 +; CHECK-PWR9-NEXT: xvcvuxwsp 34, 34 +; CHECK-PWR9-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %a, i32 0 + %conv = uitofp i16 %vecext to float + %vecinit = insertelement <4 x float> undef, float %conv, i32 0 + %vecext1 = extractelement <8 x i16> %a, i32 2 + %conv2 = uitofp i16 %vecext1 to float + %vecinit3 = insertelement <4 x float> %vecinit, float %conv2, i32 1 + %vecext4 = extractelement <8 x i16> %a, i32 4 + %conv5 = uitofp i16 %vecext4 to float + %vecinit6 = insertelement <4 x float> %vecinit3, float %conv5, i32 2 + %vecext7 = extractelement <8 x i16> %a, i32 6 + %conv8 = uitofp i16 %vecext7 to float + %vecinit9 = insertelement <4 x float> %vecinit6, float %conv8, i32 3 + ret <4 x float> %vecinit9 +} + +; Function Attrs: norecurse nounwind readnone +define <2 x double> @test_si_to_d(<4 x i32> %a) { +; CHECK-LABEL: test_si_to_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltw 0, 34, 3 +; CHECK-NEXT: xxspltw 1, 34, 1 +; CHECK-NEXT: xvcvsxwdp 0, 0 +; CHECK-NEXT: xvcvsxwdp 1, 1 +; CHECK-NEXT: xxmrghd 34, 1, 0 +; CHECK-NEXT: blr +; +; CHECK-PWR9-LABEL: test_si_to_d: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: vextsw2d 2, 2 +; CHECK-PWR9-NEXT: xvcvsxddp 34, 34 +; CHECK-PWR9-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 0 + %conv = sitofp i32 %vecext to double + %vecinit = insertelement <2 x double> undef, double %conv, i32 0 + %vecext1 = extractelement <4 x i32> %a, i32 2 + %conv2 = sitofp i32 %vecext1 to double + %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1 + ret <2 x double> %vecinit3 +} + +; Function Attrs: norecurse nounwind readnone +define <2 x double> @test_ui_to_d(<4 x i32> %a) { +; CHECK-LABEL: test_ui_to_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd 0, 34 +; CHECK-NEXT: xxlxor 1, 1, 1 +; CHECK-NEXT: mfvsrwz 3, 0 +; CHECK-NEXT: mtvsrwz 0, 3 +; CHECK-NEXT: xscvuxddp 0, 0 +; CHECK-NEXT: xxmrghd 34, 1, 0 +; CHECK-NEXT: blr +; +; CHECK-PWR9-LABEL: test_ui_to_d: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: xxextractuw 0, 34, 12 +; CHECK-PWR9-NEXT: xxlxor 1, 1, 1 +; CHECK-PWR9-NEXT: xscvuxddp 0, 0 +; CHECK-PWR9-NEXT: xxmrghd 34, 1, 0 +; CHECK-PWR9-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 0 + %conv = uitofp i32 %vecext to double + %vecinit3 = insertelement <2 x double> , double %conv, i32 0 + ret <2 x double> %vecinit3 +} + +; Function Attrs: norecurse nounwind readnone +define <4 x float> @test_si_to_f(<4 x i32> %a) { +; CHECK-LABEL: test_si_to_f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis 3, 2, .LCPI10_0@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI10_0@toc@l +; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: vperm 2, 2, 2, 3 +; CHECK-NEXT: xvcvsxwsp 34, 34 +; CHECK-NEXT: blr +; +; CHECK-PWR9-LABEL: test_si_to_f: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: addis 3, 2, .LCPI10_0@toc@ha +; CHECK-PWR9-NEXT: addi 3, 3, .LCPI10_0@toc@l +; CHECK-PWR9-NEXT: lxvx 35, 0, 3 +; CHECK-PWR9-NEXT: vperm 2, 2, 2, 3 +; CHECK-PWR9-NEXT: xvcvsxwsp 34, 34 +; CHECK-PWR9-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 0 + %conv = sitofp i32 %vecext to float + %vecinit = insertelement <4 x float> undef, float %conv, i32 0 + %vecext1 = extractelement <4 x i32> %a, i32 3 + %conv2 = sitofp i32 %vecext1 to float + %vecinit3 = insertelement <4 x float> %vecinit, float %conv2, i32 1 + %vecext4 = extractelement <4 x i32> %a, i32 1 + %conv5 = sitofp i32 %vecext4 to float + %vecinit6 = insertelement <4 x float> %vecinit3, float %conv5, i32 2 + %vecext7 = extractelement <4 x i32> %a, i32 2 + %conv8 = sitofp i32 %vecext7 to float + %vecinit9 = insertelement <4 x float> %vecinit6, float %conv8, i32 3 + ret <4 x float> %vecinit9 +} + +; Function Attrs: norecurse nounwind readnone +define <4 x float> @test_ui_to_f(<4 x i32> %a) { +; CHECK-LABEL: test_ui_to_f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvcvuxwsp 34, 34 +; CHECK-NEXT: blr +; +; CHECK-PWR9-LABEL: test_ui_to_f: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: xvcvuxwsp 34, 34 +; CHECK-PWR9-NEXT: blr +entry: + %0 = uitofp <4 x i32> %a to <4 x float> + ret <4 x float> %0 +} + +; Function Attrs: norecurse nounwind readnone +define <2 x double> @test_sl_to_d(<2 x i64> %a) { +; CHECK-LABEL: test_sl_to_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvcvsxddp 34, 34 +; CHECK-NEXT: blr +; +; CHECK-PWR9-LABEL: test_sl_to_d: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: xvcvsxddp 34, 34 +; CHECK-PWR9-NEXT: blr +entry: + %0 = sitofp <2 x i64> %a to <2 x double> + ret <2 x double> %0 +} + +; Function Attrs: norecurse nounwind readnone +define <2 x double> @test_ul_to_d(<2 x i64> %a) { +; CHECK-LABEL: test_ul_to_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltd 34, 34, 1 +; CHECK-NEXT: xvcvuxddp 34, 34 +; CHECK-NEXT: blr +; +; CHECK-PWR9-LABEL: test_ul_to_d: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: xxspltd 34, 34, 1 +; CHECK-PWR9-NEXT: xvcvuxddp 34, 34 +; CHECK-PWR9-NEXT: blr +entry: + %vecext = extractelement <2 x i64> %a, i32 0 + %conv = uitofp i64 %vecext to double + %vecinit = insertelement <2 x double> undef, double %conv, i32 0 + %vecinit3 = shufflevector <2 x double> %vecinit, <2 x double> undef, <2 x i32> zeroinitializer + ret <2 x double> %vecinit3 +}