diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -97,6 +97,11 @@ /// XXSPLT, + /// XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for + /// converting immediate single precision numbers to double precision + /// vector or scalar. + XXSPLTI_SP_TO_DP, + /// VECINSERT - The PPC vector insert instruction /// VECINSERT, @@ -1273,6 +1278,9 @@ bool isIntS16Immediate(SDNode *N, int16_t &Imm); bool isIntS16Immediate(SDValue Op, int16_t &Imm); + bool convertToNonDenormSingle(APInt &ArgAPInt); + bool convertToNonDenormSingle(APFloat &ArgAPFloat); + } // end namespace llvm #endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1473,6 +1473,7 @@ case PPCISD::STFIWX: return "PPCISD::STFIWX"; case PPCISD::VPERM: return "PPCISD::VPERM"; case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; + case PPCISD::XXSPLTI_SP_TO_DP:return "PPCISD::XXSPLTI_SP_TO_DP"; case PPCISD::VECINSERT: return "PPCISD::VECINSERT"; case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI"; case PPCISD::VECSHL: return "PPCISD::VECSHL"; @@ -8948,19 +8949,21 @@ // Vector related lowering. // -/// BuildSplatI - Build a canonical splati of Val with an element size of -/// SplatSize. Cast the result to VT. -static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, - SelectionDAG &DAG, const SDLoc &dl) { +/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an +/// element size of SplatSize. Cast the result to VT. +static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, + SelectionDAG &DAG, const SDLoc &dl) { static const MVT VTys[] = { // canonical VT to use for each size. MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 }; EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; - // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. - if (Val == -1) + // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize. + if (Val == ((1LU << (SplatSize * 8)) - 1)) { SplatSize = 1; + Val = 0xFF; + } EVT CanonicalVT = VTys[SplatSize-1]; @@ -9095,6 +9098,34 @@ return ISD::isNormalLoad(LD) ? InputLoad : nullptr; } +// Convert the argument APFloat to a single precision APFloat if there is no +// loss in information during the conversion to single precision APFloat and the +// resulting number is not a denormal number. Return true if successful. +bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) { + APFloat APFloatToConvert = ArgAPFloat; + bool LosesInfo = true; + APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, + &LosesInfo); + bool Success = (!LosesInfo && !APFloatToConvert.isDenormal()); + if (Success) + ArgAPFloat = APFloatToConvert; + return Success; +} + +// Bitcast the argument APInt to a double and convert it to a single precision +// APFloat, bitcast the APFloat to an APInt and assign it to the original +// argument if there is no loss in information during the conversion from +// double to single precision APFloat and the resulting number is not a denormal +// number. Return true if successful. +bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) { + double DpValue = ArgAPInt.bitsToDouble(); + APFloat APFloatDp(DpValue); + bool Success = convertToNonDenormSingle(APFloatDp); + if (Success) + ArgAPInt = APFloatDp.bitcastToAPInt(); + return Success; +} + // If this is a case we can't handle, return null and let the default // expansion code take care of it. If we CAN select this case, and if it // selects to a single instruction, return Op. Otherwise, if we can codegen @@ -9214,9 +9245,23 @@ APInt APSplatBits, APSplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; - if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, - HasAnyUndefs, 0, !Subtarget.isLittleEndian()) || - SplatBitSize > 32) { + bool BVNIsConstantSplat = + BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, + HasAnyUndefs, 0, !Subtarget.isLittleEndian()); + + // If it is a splat of a double, check if we can shrink it to a 32 bit + // non-denormal float which when converted back to double gives us the same + // double. This is to exploit the XXSPLTIDP instruction. + if (BVNIsConstantSplat && Subtarget.hasPrefixInstrs() && + (SplatBitSize == 64) && (Op->getValueType(0) == MVT::v2f64) && + convertToNonDenormSingle(APSplatBits)) { + SDValue SplatNode = DAG.getNode( + PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64, + DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32)); + return DAG.getBitcast(Op.getValueType(), SplatNode); + } + + if (!BVNIsConstantSplat || SplatBitSize > 32) { const SDValue *InputLoad = getNormalLoadInput(Op.getOperand(0)); // Handle load-and-splat patterns as we have instructions that will do this @@ -9255,8 +9300,8 @@ return SDValue(); } - unsigned SplatBits = APSplatBits.getZExtValue(); - unsigned SplatUndef = APSplatUndef.getZExtValue(); + uint64_t SplatBits = APSplatBits.getZExtValue(); + uint64_t SplatUndef = APSplatUndef.getZExtValue(); unsigned SplatSize = SplatBitSize / 8; // First, handle single instruction cases. @@ -9271,17 +9316,33 @@ return Op; } - // We have XXSPLTIB for constant splats one byte wide + // We have XXSPLTIW for constant splats four bytes wide. + // Given vector length is a multiple of 4, 2-byte splats can be replaced + // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to + // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be + // turned into a 4-byte splat of 0xABABABAB. + if (Subtarget.hasPrefixInstrs() && SplatSize == 2) + return getCanonicalConstSplat((SplatBits |= SplatBits << 16), + SplatSize * 2, Op.getValueType(), DAG, dl); + + if (Subtarget.hasPrefixInstrs() && SplatSize == 4) + return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), + DAG, dl); + + // We have XXSPLTIB for constant splats one byte wide. // FIXME: SplatBits is an unsigned int being cast to an int while passing it - // as an argument to BuildSplatiI. Given SplatSize == 1 it is okay here. + // as an argument to getCanonicalConstSplat. Given SplatSize == 1 it is okay + // in this case. if (Subtarget.hasP9Vector() && SplatSize == 1) - return BuildSplatI(SplatBits, SplatSize, Op.getValueType(), DAG, dl); + return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG, + dl); // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> (32-SplatBitSize)); if (SextVal >= -16 && SextVal <= 15) - return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl); + return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG, + dl); // Two instruction sequences. @@ -9312,7 +9373,7 @@ // for fneg/fabs. if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { // Make -1 and vspltisw -1: - SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl); + SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl); // Make the VSLW intrinsic, computing 0x8000_0000. SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, @@ -9340,7 +9401,7 @@ // vsplti + shl self. if (SextVal == (int)((unsigned)i << TypeShiftAmt)) { - SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); + SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl); static const unsigned IIDs[] = { // Intrinsic to use for each size. Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, Intrinsic::ppc_altivec_vslw @@ -9351,7 +9412,7 @@ // vsplti + srl self. if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { - SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); + SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl); static const unsigned IIDs[] = { // Intrinsic to use for each size. Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, Intrinsic::ppc_altivec_vsrw @@ -9362,7 +9423,7 @@ // vsplti + sra self. if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { - SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); + SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl); static const unsigned IIDs[] = { // Intrinsic to use for each size. Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, Intrinsic::ppc_altivec_vsraw @@ -9374,7 +9435,7 @@ // vsplti + rol self. if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { - SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); + SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl); static const unsigned IIDs[] = { // Intrinsic to use for each size. Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, Intrinsic::ppc_altivec_vrlw @@ -9385,19 +9446,19 @@ // t = vsplti c, result = vsldoi t, t, 1 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) { - SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); + SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl); unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1; return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); } // t = vsplti c, result = vsldoi t, t, 2 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) { - SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); + SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl); unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2; return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); } // t = vsplti c, result = vsldoi t, t, 3 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) { - SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); + SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl); unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3; return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); } @@ -10799,9 +10860,9 @@ if (Op.getValueType() == MVT::v4i32) { SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); - SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl); - SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt. - + SDValue Zero = getCanonicalConstSplat( 0, 1, MVT::v4i32, DAG, dl); + // +16 as shift amt. + SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl); SDValue RHSSwap = // = vrlw RHS, 16 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl); @@ -16220,7 +16281,14 @@ // false. Examples: f16, f80. return false; case MVT::f32: - case MVT::f64: + case MVT::f64: { + if (!Subtarget.hasPrefixInstrs()) + return Imm.isPosZero(); + else { + APFloat APFloatOfImm = Imm; + return convertToNonDenormSingle(APFloatOfImm); + } + } case MVT::ppcf128: return Imm.isPosZero(); } diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -50,6 +50,10 @@ SDTCisVec<1>, SDTCisInt<2> ]>; +def SDT_PPCSpToDp : SDTypeProfile<1, 1, [ SDTCisVT<0, v2f64>, + SDTCisInt<1> +]>; + def SDT_PPCVecShift : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisPtrTy<3> ]>; @@ -194,6 +198,7 @@ def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>; +def PPCxxspltidp : SDNode<"PPCISD::XXSPLTI_SP_TO_DP", SDT_PPCSpToDp, []>; def PPCvecinsert : SDNode<"PPCISD::VECINSERT", SDT_PPCVecInsert, []>; def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>; def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>; @@ -326,6 +331,23 @@ // PowerPC specific transformation functions and pattern fragments. // +// A floating point immediate that is not a positive zero and can be converted +// to a single precision floating point non-denormal immediate without loss of +// information. +def nzFPImmAsi32 : PatLeaf<(fpimm), [{ + APFloat APFloatOfN = N->getValueAPF(); + return convertToNonDenormSingle(APFloatOfN) && !N->isExactlyValue(+0.0); +}]>; + +// Convert the floating point immediate into a 32 bit floating point immediate +// and get a i32 with the resulting bits. +def getFPAs32BitInt : SDNodeXFormgetValueAPF(); + convertToNonDenormSingle(APFloatOfN); + return CurDAG->getTargetConstant(APFloatOfN.bitcastToAPInt().getZExtValue(), + SDLoc(N), MVT::i32); +}]>; + def SHL32 : SDNodeXFormgetZExtValue(), SDLoc(N)); @@ -392,6 +414,7 @@ def immNonAllOneAnyExt8 : ImmLeaf(Imm) && (Imm != -1)) || (isUInt<8>(Imm) && (Imm != 0xFF)); }]>; +def i32immNonAllOneNonZero : ImmLeaf; def immSExt5NonZero : ImmLeaf(Imm); }]>; // imm16Shifted* - These match immediates where the low 16-bits are zero. There diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -704,7 +704,8 @@ def XXSPLTIDP : 8RR_DForm_IMM32_XT6<32, 2, (outs vsrc:$XT), (ins i32imm:$IMM32), "xxspltidp $XT, $IMM32", IIC_VecGeneral, - []>; + [(set v2f64:$XT, + (PPCxxspltidp i32:$IMM32))]>; def XXSPLTI32DX : 8RR_DForm_IMM32_XT6_IX<32, 0, (outs vsrc:$XT), (ins vsrc:$XTi, i1imm:$IX, i32imm:$IMM32), @@ -817,3 +818,17 @@ def : Pat<(v2i64 (int_ppc_vsx_xxgenpcvdm v2i64:$VRB, imm:$IMM)), (v2i64 (COPY_TO_REGCLASS (XXGENPCVDM $VRB, imm:$IMM), VRRC))>; } + +let AddedComplexity = 400, Predicates = [PrefixInstrs] in { + def : Pat<(v4i32 (build_vector i32immNonAllOneNonZero:$A, + i32immNonAllOneNonZero:$A, + i32immNonAllOneNonZero:$A, + i32immNonAllOneNonZero:$A)), + (v4i32 (XXSPLTIW imm:$A))>; + def : Pat<(f32 nzFPImmAsi32:$A), + (COPY_TO_REGCLASS (XXSPLTIDP (getFPAs32BitInt fpimm:$A)), + VSFRC)>; + def : Pat<(f64 nzFPImmAsi32:$A), + (COPY_TO_REGCLASS (XXSPLTIDP (getFPAs32BitInt fpimm:$A)), + VSFRC)>; +} diff --git a/llvm/test/CodeGen/PowerPC/power10-immediate-moves-and-splats.ll b/llvm/test/CodeGen/PowerPC/power10-immediate-moves-and-splats.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/power10-immediate-moves-and-splats.ll @@ -0,0 +1,526 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s \ +; RUN: --check-prefix=CHECK-P10 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s \ +; RUN: --check-prefix=CHECK-P10-BE + +define dso_local <4 x i32> @testZero() local_unnamed_addr { +; CHECK-P10-LABEL: testZero: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: xxlxor vs34, vs34, vs34 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: testZero: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xxlxor vs34, vs34, vs34 +; CHECK-P10-BE-NEXT: blr + +entry: + ret <4 x i32> zeroinitializer +} + +define dso_local <4 x float> @testZeroF() local_unnamed_addr { +; CHECK-P10-LABEL: testZeroF: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: xxlxor vs34, vs34, vs34 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: testZeroF: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xxlxor vs34, vs34, vs34 +; CHECK-P10-BE-NEXT: blr + +entry: + ret <4 x float> zeroinitializer +} + +define dso_local <4 x i32> @testAllOneS() local_unnamed_addr { +; CHECK-P10-LABEL: testAllOneS: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: xxleqv vs34, vs34, vs34 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: testAllOneS: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xxleqv vs34, vs34, vs34 +; CHECK-P10-BE-NEXT: blr + +entry: + ret <4 x i32> +} + +define dso_local <4 x i32> @test5Bit() local_unnamed_addr { +; CHECK-P10-LABEL: test5Bit: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: vspltisw v2, 7 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: test5Bit: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: vspltisw v2, 7 +; CHECK-P10-BE-NEXT: blr + +entry: + ret <4 x i32> +} + +define dso_local <16 x i8> @test1ByteChar() local_unnamed_addr { +; CHECK-P10-LABEL: test1ByteChar: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: xxspltib vs34, 7 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: test1ByteChar: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xxspltib vs34, 7 +; CHECK-P10-BE-NEXT: blr + +entry: + ret <16 x i8> +} + +define dso_local <4 x i32> @test1ByteSplatInt() local_unnamed_addr { +; Here the splat of 171 or 0xABABABAB can be done using a byte splat +; of 0xAB using xxspltib while avoiding the use of xxspltiw. +; CHECK-P10-LABEL: test1ByteSplatInt: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: xxspltib vs34, 171 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: test1ByteSplatInt: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xxspltib vs34, 171 +; CHECK-P10-BE-NEXT: blr + +entry: + ret <4 x i32> +} + +define dso_local <4 x i32> @test5Bit2Ins() local_unnamed_addr { +; Splats within the range [-32,31] can be done using two vsplti[bhw] +; instructions, but we prefer the xxspltiw instruction to them. +; CHECK-P10-LABEL: test5Bit2Ins: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: xxspltiw vs34, 16 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: test5Bit2Ins: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xxspltiw vs34, 16 +; CHECK-P10-BE-NEXT: blr + +entry: + ret <4 x i32> +} + +define dso_local <4 x float> @testFloatNegZero() local_unnamed_addr { +; 0.0f is not the same as -0.0f. We try to splat -0.0f +; CHECK-P10-LABEL: testFloatNegZero: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: xxspltiw vs34, -2147483648 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: testFloatNegZero: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xxspltiw vs34, -2147483648 +; CHECK-P10-BE-NEXT: blr + +entry: + ret <4 x float> +} + +define dso_local <4 x float> @testFloat() local_unnamed_addr { +; CHECK-P10-LABEL: testFloat: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: xxspltiw vs34, 1135323709 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: testFloat: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xxspltiw vs34, 1135323709 +; CHECK-P10-BE-NEXT: blr + +entry: + ret <4 x float> +} + +define dso_local <4 x float> @testIntToFloat() local_unnamed_addr { +; CHECK-P10-LABEL: testIntToFloat: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: xxspltiw vs34, 1135312896 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: testIntToFloat: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xxspltiw vs34, 1135312896 +; CHECK-P10-BE-NEXT: blr + +entry: + ret <4 x float> +} + +define dso_local <4 x i32> @testUndefInt() local_unnamed_addr { +; CHECK-P10-LABEL: testUndefInt: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: xxspltiw vs34, 18 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: testUndefInt: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xxspltiw vs34, 18 +; CHECK-P10-BE-NEXT: blr + +entry: + ret <4 x i32> +} + +define dso_local <4 x float> @testUndefIntToFloat() local_unnamed_addr { +; CHECK-P10-LABEL: testUndefIntToFloat: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: xxspltiw vs34, 1135312896 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: testUndefIntToFloat: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xxspltiw vs34, 1135312896 +; CHECK-P10-BE-NEXT: blr + +entry: + ret <4 x float> +} + +define dso_local <2 x i64> @testPseudo8Byte() local_unnamed_addr { +; CHECK-P10-LABEL: testPseudo8Byte: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: xxspltiw vs34, -1430532899 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: testPseudo8Byte: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xxspltiw vs34, -1430532899 +; CHECK-P10-BE-NEXT: blr + +entry: + ret <2 x i64> +} + +define dso_local <8 x i16> @test2Byte() local_unnamed_addr { +; CHECK-P10-LABEL: test2Byte: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: xxspltiw vs34, 1179666 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: test2Byte: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xxspltiw vs34, 1179666 +; CHECK-P10-BE-NEXT: blr + +entry: + ret <8 x i16> +} + +define dso_local <8 x i16> @test2ByteUndef() local_unnamed_addr { +; CHECK-P10-LABEL: test2ByteUndef: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: xxspltiw vs34, 1179666 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: test2ByteUndef: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xxspltiw vs34, 1179666 +; CHECK-P10-BE-NEXT: blr + +entry: + ret <8 x i16> +} + +define dso_local <2 x double> @testFloatToDouble() local_unnamed_addr { +; CHECK-P10-LABEL: testFloatToDouble: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: xxspltidp vs34, 1135290941 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: testFloatToDouble: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xxspltidp vs34, 1135290941 +; CHECK-P10-BE-NEXT: blr + +entry: + ret <2 x double> +} + +define dso_local <2 x double> @testDoubleToDoubleFail() local_unnamed_addr { +; CHECK-P10-LABEL: testDoubleToDoubleFail: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: plxv vs34, .LCPI16_0@PCREL(0), 1 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: testDoubleToDoubleFail: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: addis r3, r2, .LCPI16_0@toc@ha +; CHECK-P10-BE-NEXT: addi r3, r3, .LCPI16_0@toc@l +; CHECK-P10-BE-NEXT: lxvx vs34, 0, r3 +; CHECK-P10-BE-NEXT: blr + +entry: + ret <2 x double> +} + +define dso_local <2 x double> @testFloatDenormToDouble() local_unnamed_addr { +; CHECK-P10-LABEL: testFloatDenormToDouble: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: plxv vs34, .LCPI17_0@PCREL(0), 1 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: testFloatDenormToDouble: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: addis r3, r2, .LCPI17_0@toc@ha +; CHECK-P10-BE-NEXT: addi r3, r3, .LCPI17_0@toc@l +; CHECK-P10-BE-NEXT: lxvx vs34, 0, r3 +; CHECK-P10-BE-NEXT: blr + +entry: + ret <2 x double> +} + +define dso_local <2 x double> @testDoubleLower4ByteZero() local_unnamed_addr { +; The expanded double will have 0 in the last 32 bits. Imprecise handling of +; return value of data structures like APInt, returned when calling getZextValue +; , like saving the return value into an unsigned instead of uint64_t may cause +; this test to fail. +; CHECK-P10-LABEL: testDoubleLower4ByteZero: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: xxspltidp vs34, 1093664768 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: testDoubleLower4ByteZero: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xxspltidp vs34, 1093664768 +; CHECK-P10-BE-NEXT: blr + +entry: + ret <2 x double> +} + +define dso_local <2 x double> @testDoubleToDoubleZero() local_unnamed_addr { +; Should be using canonicalized form to splat zero and use shorter instructions +; than xxspltidp. +; CHECK-P10-LABEL: testDoubleToDoubleZero: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: xxlxor vs34, vs34, vs34 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: testDoubleToDoubleZero: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xxlxor vs34, vs34, vs34 +; CHECK-P10-BE-NEXT: blr + +entry: + ret <2 x double> zeroinitializer +} + +define dso_local <2 x double> @testDoubleToDoubleNegZero() local_unnamed_addr { +; CHECK-P10-LABEL: testDoubleToDoubleNegZero: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: xxspltidp vs34, -2147483648 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: testDoubleToDoubleNegZero: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xxspltidp vs34, -2147483648 +; CHECK-P10-BE-NEXT: blr + +entry: + ret <2 x double> +} + +define dso_local <2 x double> @testDoubleToDoubleNaN() local_unnamed_addr { +; CHECK-P10-LABEL: testDoubleToDoubleNaN: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: xxspltidp vs34, -16 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: testDoubleToDoubleNaN: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xxspltidp vs34, -16 +; CHECK-P10-BE-NEXT: blr + +entry: + ret <2 x double> +} + +define dso_local <2 x double> @testDoubleToDoubleNaNFail() local_unnamed_addr { +; CHECK-P10-LABEL: testDoubleToDoubleNaNFail: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: plxv vs34, .LCPI22_0@PCREL(0), 1 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: testDoubleToDoubleNaNFail: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: addis r3, r2, .LCPI22_0@toc@ha +; CHECK-P10-BE-NEXT: addi r3, r3, .LCPI22_0@toc@l +; CHECK-P10-BE-NEXT: lxvx vs34, 0, r3 +; CHECK-P10-BE-NEXT: blr + +entry: + ret <2 x double> +} + +define dso_local <2 x double> @testDoubleToDoubleInfinity() local_unnamed_addr { +; CHECK-P10-LABEL: testDoubleToDoubleInfinity: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: xxspltidp vs34, 2139095040 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: testDoubleToDoubleInfinity: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xxspltidp vs34, 2139095040 +; CHECK-P10-BE-NEXT: blr + +entry: + ret <2 x double> +} + +define dso_local <2 x double> @testFloatToDoubleNaN() local_unnamed_addr { +; CHECK-P10-LABEL: testFloatToDoubleNaN: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: xxspltidp vs34, -1 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: testFloatToDoubleNaN: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xxspltidp vs34, -1 +; CHECK-P10-BE-NEXT: blr + +entry: + ret <2 x double> +} + +define dso_local <2 x double> @testFloatToDoubleInfinity() local_unnamed_addr { +; CHECK-P10-LABEL: testFloatToDoubleInfinity: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: xxspltidp vs34, 2139095040 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: testFloatToDoubleInfinity: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xxspltidp vs34, 2139095040 +; CHECK-P10-BE-NEXT: blr + +entry: + ret <2 x double> +} + +define dso_local float @testFloatScalar() local_unnamed_addr { +; CHECK-P10-LABEL: testFloatScalar: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: xxspltidp vs1, 1135290941 +; CHECK-P10-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: testFloatScalar: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xxspltidp vs1, 1135290941 +; CHECK-P10-BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-P10-BE-NEXT: blr + +entry: + ret float 0x40756547A0000000 +} + +define dso_local float @testFloatZeroScalar() local_unnamed_addr { +; CHECK-P10-LABEL: testFloatZeroScalar: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: xxlxor f1, f1, f1 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: testFloatZeroScalar: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xxlxor f1, f1, f1 +; CHECK-P10-BE-NEXT: blr + +entry: + ret float 0.000000e+00 +} + +define dso_local double @testDoubleRepresentableScalar() local_unnamed_addr { +; CHECK-P10-LABEL: testDoubleRepresentableScalar: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: xxspltidp vs1, 1135290941 +; CHECK-P10-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: testDoubleRepresentableScalar: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xxspltidp vs1, 1135290941 +; CHECK-P10-BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-P10-BE-NEXT: blr + +entry: + ret double 0x40756547A0000000 +} + +define dso_local double @testDoubleNonRepresentableScalar() local_unnamed_addr { +; CHECK-P10-LABEL: testDoubleNonRepresentableScalar: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: plfd f1, .LCPI29_0@PCREL(0), 1 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: testDoubleNonRepresentableScalar: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: addis r3, r2, .LCPI29_0@toc@ha +; CHECK-P10-BE-NEXT: lfd f1, .LCPI29_0@toc@l(r3) +; CHECK-P10-BE-NEXT: blr + +entry: + ret double 3.423300e+02 +} + +define dso_local float @testFloatDenormScalar() local_unnamed_addr { +; CHECK-P10-LABEL: testFloatDenormScalar: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: plfs f1, .LCPI30_0@PCREL(0), 1 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: testFloatDenormScalar: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: addis r3, r2, .LCPI30_0@toc@ha +; CHECK-P10-BE-NEXT: lfs f1, .LCPI30_0@toc@l(r3) +; CHECK-P10-BE-NEXT: blr + +entry: + ret float 0x380B38FB80000000 +} + +define dso_local double @testFloatDenormToDoubleScalar() local_unnamed_addr { +; CHECK-P10-LABEL: testFloatDenormToDoubleScalar: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: plfs f1, .LCPI31_0@PCREL(0), 1 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: testFloatDenormToDoubleScalar: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: addis r3, r2, .LCPI31_0@toc@ha +; CHECK-P10-BE-NEXT: lfs f1, .LCPI31_0@toc@l(r3) +; CHECK-P10-BE-NEXT: blr + +entry: + ret double 0x380B38FB80000000 +} + +define dso_local double @testDoubleZeroScalar() local_unnamed_addr { +; CHECK-P10-LABEL: testDoubleZeroScalar: +; CHECK-P10: # %bb.0: # %entry +; CHECK-P10-NEXT: xxlxor f1, f1, f1 +; CHECK-P10-NEXT: blr +; +; CHECK-P10-BE-LABEL: testDoubleZeroScalar: +; CHECK-P10-BE: # %bb.0: # %entry +; CHECK-P10-BE-NEXT: xxlxor f1, f1, f1 +; CHECK-P10-BE-NEXT: blr + +entry: + ret double 0.000000e+00 +}