diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -97,6 +97,11 @@ /// XXSPLT, + /// XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for + /// converting immediate single precision numbers to double precision + /// vector or scalar. + XXSPLTI_SP_TO_DP, + /// VECINSERT - The PPC vector insert instruction /// VECINSERT, @@ -1273,6 +1278,9 @@ bool isIntS16Immediate(SDNode *N, int16_t &Imm); bool isIntS16Immediate(SDValue Op, int16_t &Imm); + bool convertToNonDenormSingle(APInt &ArgAPInt); + bool convertToNonDenormSingle(APFloat &ArgAPFloat); + } // end namespace llvm #endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1473,6 +1473,8 @@ case PPCISD::STFIWX: return "PPCISD::STFIWX"; case PPCISD::VPERM: return "PPCISD::VPERM"; case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; + case PPCISD::XXSPLTI_SP_TO_DP: + return "PPCISD::XXSPLTI_SP_TO_DP"; case PPCISD::VECINSERT: return "PPCISD::VECINSERT"; case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI"; case PPCISD::VECSHL: return "PPCISD::VECSHL"; @@ -8966,19 +8968,21 @@ // Vector related lowering. // -/// BuildSplatI - Build a canonical splati of Val with an element size of -/// SplatSize. Cast the result to VT. -static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, - SelectionDAG &DAG, const SDLoc &dl) { +/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an +/// element size of SplatSize. Cast the result to VT. +static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, + SelectionDAG &DAG, const SDLoc &dl) { static const MVT VTys[] = { // canonical VT to use for each size. MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 }; EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; - // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. - if (Val == -1) + // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize. + if (Val == ((1LU << (SplatSize * 8)) - 1)) { SplatSize = 1; + Val = 0xFF; + } EVT CanonicalVT = VTys[SplatSize-1]; @@ -9113,6 +9117,34 @@ return ISD::isNormalLoad(LD) ? InputLoad : nullptr; } +// Convert the argument APFloat to a single precision APFloat if there is no +// loss in information during the conversion to single precision APFloat and the +// resulting number is not a denormal number. Return true if successful. +bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) { + APFloat APFloatToConvert = ArgAPFloat; + bool LosesInfo = true; + APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, + &LosesInfo); + bool Success = (!LosesInfo && !APFloatToConvert.isDenormal()); + if (Success) + ArgAPFloat = APFloatToConvert; + return Success; +} + +// Bitcast the argument APInt to a double and convert it to a single precision +// APFloat, bitcast the APFloat to an APInt and assign it to the original +// argument if there is no loss in information during the conversion from +// double to single precision APFloat and the resulting number is not a denormal +// number. Return true if successful. +bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) { + double DpValue = ArgAPInt.bitsToDouble(); + APFloat APFloatDp(DpValue); + bool Success = convertToNonDenormSingle(APFloatDp); + if (Success) + ArgAPInt = APFloatDp.bitcastToAPInt(); + return Success; +} + // If this is a case we can't handle, return null and let the default // expansion code take care of it. If we CAN select this case, and if it // selects to a single instruction, return Op. Otherwise, if we can codegen @@ -9232,9 +9264,23 @@ APInt APSplatBits, APSplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; - if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, - HasAnyUndefs, 0, !Subtarget.isLittleEndian()) || - SplatBitSize > 32) { + bool BVNIsConstantSplat = + BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, + HasAnyUndefs, 0, !Subtarget.isLittleEndian()); + + // If it is a splat of a double, check if we can shrink it to a 32 bit + // non-denormal float which when converted back to double gives us the same + // double. This is to exploit the XXSPLTIDP instruction. + if (BVNIsConstantSplat && Subtarget.hasPrefixInstrs() && + (SplatBitSize == 64) && (Op->getValueType(0) == MVT::v2f64) && + convertToNonDenormSingle(APSplatBits)) { + SDValue SplatNode = DAG.getNode( + PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64, + DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32)); + return DAG.getBitcast(Op.getValueType(), SplatNode); + } + + if (!BVNIsConstantSplat || SplatBitSize > 32) { const SDValue *InputLoad = getNormalLoadInput(Op.getOperand(0)); // Handle load-and-splat patterns as we have instructions that will do this @@ -9273,8 +9319,8 @@ return SDValue(); } - unsigned SplatBits = APSplatBits.getZExtValue(); - unsigned SplatUndef = APSplatUndef.getZExtValue(); + uint64_t SplatBits = APSplatBits.getZExtValue(); + uint64_t SplatUndef = APSplatUndef.getZExtValue(); unsigned SplatSize = SplatBitSize / 8; // First, handle single instruction cases. @@ -9289,17 +9335,30 @@ return Op; } - // We have XXSPLTIB for constant splats one byte wide - // FIXME: SplatBits is an unsigned int being cast to an int while passing it - // as an argument to BuildSplatiI. Given SplatSize == 1 it is okay here. + // We have XXSPLTIW for constant splats four bytes wide. + // Given vector length is a multiple of 4, 2-byte splats can be replaced + // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to + // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be + // turned into a 4-byte splat of 0xABABABAB. + if (Subtarget.hasPrefixInstrs() && SplatSize == 2) + return getCanonicalConstSplat((SplatBits |= SplatBits << 16), SplatSize * 2, + Op.getValueType(), DAG, dl); + + if (Subtarget.hasPrefixInstrs() && SplatSize == 4) + return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG, + dl); + + // We have XXSPLTIB for constant splats one byte wide. if (Subtarget.hasP9Vector() && SplatSize == 1) - return BuildSplatI(SplatBits, SplatSize, Op.getValueType(), DAG, dl); + return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG, + dl); // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> (32-SplatBitSize)); if (SextVal >= -16 && SextVal <= 15) - return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl); + return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG, + dl); // Two instruction sequences. @@ -9330,7 +9389,7 @@ // for fneg/fabs. if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { // Make -1 and vspltisw -1: - SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl); + SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl); // Make the VSLW intrinsic, computing 0x8000_0000. SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, @@ -9358,7 +9417,7 @@ // vsplti + shl self. if (SextVal == (int)((unsigned)i << TypeShiftAmt)) { - SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); + SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl); static const unsigned IIDs[] = { // Intrinsic to use for each size. Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, Intrinsic::ppc_altivec_vslw @@ -9369,7 +9428,7 @@ // vsplti + srl self. if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { - SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); + SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl); static const unsigned IIDs[] = { // Intrinsic to use for each size. Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, Intrinsic::ppc_altivec_vsrw @@ -9380,7 +9439,7 @@ // vsplti + sra self. if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { - SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); + SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl); static const unsigned IIDs[] = { // Intrinsic to use for each size. Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, Intrinsic::ppc_altivec_vsraw @@ -9392,7 +9451,7 @@ // vsplti + rol self. if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { - SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); + SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl); static const unsigned IIDs[] = { // Intrinsic to use for each size. Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, Intrinsic::ppc_altivec_vrlw @@ -9403,19 +9462,19 @@ // t = vsplti c, result = vsldoi t, t, 1 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) { - SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); + SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl); unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1; return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); } // t = vsplti c, result = vsldoi t, t, 2 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) { - SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); + SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl); unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2; return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); } // t = vsplti c, result = vsldoi t, t, 3 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) { - SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); + SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl); unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3; return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); } @@ -10817,9 +10876,9 @@ if (Op.getValueType() == MVT::v4i32) { SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); - SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl); - SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt. - + SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl); + // +16 as shift amt. + SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl); SDValue RHSSwap = // = vrlw RHS, 16 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl); @@ -16239,6 +16298,13 @@ return false; case MVT::f32: case MVT::f64: + if (Subtarget.hasPrefixInstrs()) { + // With prefixed instructions, we can materialize anything that can be + // represented with a 32-bit immediate, not just positive zero. + APFloat APFloatOfImm = Imm; + return convertToNonDenormSingle(APFloatOfImm); + } + LLVM_FALLTHROUGH; case MVT::ppcf128: return Imm.isPosZero(); } diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -50,6 +50,10 @@ SDTCisVec<1>, SDTCisInt<2> ]>; +def SDT_PPCSpToDp : SDTypeProfile<1, 1, [ SDTCisVT<0, v2f64>, + SDTCisInt<1> +]>; + def SDT_PPCVecShift : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisPtrTy<3> ]>; @@ -194,6 +198,7 @@ def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>; +def PPCxxspltidp : SDNode<"PPCISD::XXSPLTI_SP_TO_DP", SDT_PPCSpToDp, []>; def PPCvecinsert : SDNode<"PPCISD::VECINSERT", SDT_PPCVecInsert, []>; def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>; def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>; @@ -326,6 +331,23 @@ // PowerPC specific transformation functions and pattern fragments. // +// A floating point immediate that is not a positive zero and can be converted +// to a single precision floating point non-denormal immediate without loss of +// information. +def nzFPImmAsi32 : PatLeaf<(fpimm), [{ + APFloat APFloatOfN = N->getValueAPF(); + return convertToNonDenormSingle(APFloatOfN) && !N->isExactlyValue(+0.0); +}]>; + +// Convert the floating point immediate into a 32 bit floating point immediate +// and get a i32 with the resulting bits. +def getFPAs32BitInt : SDNodeXFormgetValueAPF(); + convertToNonDenormSingle(APFloatOfN); + return CurDAG->getTargetConstant(APFloatOfN.bitcastToAPInt().getZExtValue(), + SDLoc(N), MVT::i32); +}]>; + def SHL32 : SDNodeXFormgetZExtValue(), SDLoc(N)); @@ -392,6 +414,7 @@ def immNonAllOneAnyExt8 : ImmLeaf(Imm) && (Imm != -1)) || (isUInt<8>(Imm) && (Imm != 0xFF)); }]>; +def i32immNonAllOneNonZero : ImmLeaf; def immSExt5NonZero : ImmLeaf(Imm); }]>; // imm16Shifted* - These match immediates where the low 16-bits are zero. There diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -704,7 +704,8 @@ def XXSPLTIDP : 8RR_DForm_IMM32_XT6<32, 2, (outs vsrc:$XT), (ins i32imm:$IMM32), "xxspltidp $XT, $IMM32", IIC_VecGeneral, - []>; + [(set v2f64:$XT, + (PPCxxspltidp i32:$IMM32))]>; def XXSPLTI32DX : 8RR_DForm_IMM32_XT6_IX<32, 0, (outs vsrc:$XT), (ins vsrc:$XTi, i1imm:$IX, i32imm:$IMM32), @@ -822,3 +823,17 @@ def : Pat<(v2i64 (int_ppc_vsx_xxgenpcvdm v2i64:$VRB, imm:$IMM)), (v2i64 (COPY_TO_REGCLASS (XXGENPCVDM $VRB, imm:$IMM), VRRC))>; } + +let AddedComplexity = 400, Predicates = [PrefixInstrs] in { + def : Pat<(v4i32 (build_vector i32immNonAllOneNonZero:$A, + i32immNonAllOneNonZero:$A, + i32immNonAllOneNonZero:$A, + i32immNonAllOneNonZero:$A)), + (v4i32 (XXSPLTIW imm:$A))>; + def : Pat<(f32 nzFPImmAsi32:$A), + (COPY_TO_REGCLASS (XXSPLTIDP (getFPAs32BitInt fpimm:$A)), + VSFRC)>; + def : Pat<(f64 nzFPImmAsi32:$A), + (COPY_TO_REGCLASS (XXSPLTIDP (getFPAs32BitInt fpimm:$A)), + VSFRC)>; +} diff --git a/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll b/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll @@ -0,0 +1,111 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s \ +; RUN: --check-prefix=CHECK-NOPCREL +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ +; RUN: -mattr=-pcrelative-memops -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-NOPCREL +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -target-abi=elfv2 -mcpu=pwr10 < %s | \ +; RUN: FileCheck %s + +define dso_local <2 x double> @testDoubleToDoubleFail() local_unnamed_addr { +; CHECK-LABEL: testDoubleToDoubleFail: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plxv vs34, .LCPI0_0@PCREL(0), 1 +; CHECK-NEXT: blr +; +; CHECK-NOPCREL-LABEL: testDoubleToDoubleFail: +; CHECK-NOPCREL: # %bb.0: # %entry +; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; CHECK-NOPCREL-NEXT: addi r3, r3, .LCPI0_0@toc@l +; CHECK-NOPCREL-NEXT: lxvx vs34, 0, r3 +; CHECK-NOPCREL-NEXT: blr + +entry: + ret <2 x double> +} + +define dso_local <2 x double> @testFloatDenormToDouble() local_unnamed_addr { +; CHECK-LABEL: testFloatDenormToDouble: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plxv vs34, .LCPI1_0@PCREL(0), 1 +; CHECK-NEXT: blr +; +; CHECK-NOPCREL-LABEL: testFloatDenormToDouble: +; CHECK-NOPCREL: # %bb.0: # %entry +; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-NOPCREL-NEXT: addi r3, r3, .LCPI1_0@toc@l +; CHECK-NOPCREL-NEXT: lxvx vs34, 0, r3 +; CHECK-NOPCREL-NEXT: blr + +entry: + ret <2 x double> +} + +define dso_local <2 x double> @testDoubleToDoubleNaNFail() local_unnamed_addr { +; CHECK-LABEL: testDoubleToDoubleNaNFail: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plxv vs34, .LCPI2_0@PCREL(0), 1 +; CHECK-NEXT: blr +; +; CHECK-NOPCREL-LABEL: testDoubleToDoubleNaNFail: +; CHECK-NOPCREL: # %bb.0: # %entry +; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; CHECK-NOPCREL-NEXT: addi r3, r3, .LCPI2_0@toc@l +; CHECK-NOPCREL-NEXT: lxvx vs34, 0, r3 +; CHECK-NOPCREL-NEXT: blr + +entry: + ret <2 x double> +} + +define dso_local double @testDoubleNonRepresentableScalar() local_unnamed_addr { +; CHECK-LABEL: testDoubleNonRepresentableScalar: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plfd f1, .LCPI3_0@PCREL(0), 1 +; CHECK-NEXT: blr +; +; CHECK-NOPCREL-LABEL: testDoubleNonRepresentableScalar: +; CHECK-NOPCREL: # %bb.0: # %entry +; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-NOPCREL-NEXT: lfd f1, .LCPI3_0@toc@l(r3) +; CHECK-NOPCREL-NEXT: blr + +entry: + ret double 3.423300e+02 +} + +define dso_local float @testFloatDenormScalar() local_unnamed_addr { +; CHECK-LABEL: testFloatDenormScalar: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plfs f1, .LCPI4_0@PCREL(0), 1 +; CHECK-NEXT: blr +; +; CHECK-NOPCREL-LABEL: testFloatDenormScalar: +; CHECK-NOPCREL: # %bb.0: # %entry +; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI4_0@toc@ha +; CHECK-NOPCREL-NEXT: lfs f1, .LCPI4_0@toc@l(r3) +; CHECK-NOPCREL-NEXT: blr + +entry: + ret float 0x380B38FB80000000 +} + +define dso_local double @testFloatDenormToDoubleScalar() local_unnamed_addr { +; CHECK-LABEL: testFloatDenormToDoubleScalar: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plfs f1, .LCPI5_0@PCREL(0), 1 +; CHECK-NEXT: blr +; +; CHECK-NOPCREL-LABEL: testFloatDenormToDoubleScalar: +; CHECK-NOPCREL: # %bb.0: # %entry +; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI5_0@toc@ha +; CHECK-NOPCREL-NEXT: lfs f1, .LCPI5_0@toc@l(r3) +; CHECK-NOPCREL-NEXT: blr + +entry: + ret double 0x380B38FB80000000 +} diff --git a/llvm/test/CodeGen/PowerPC/p10-splatImm.ll b/llvm/test/CodeGen/PowerPC/p10-splatImm.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/p10-splatImm.ll @@ -0,0 +1,288 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s + +define dso_local <4 x i32> @testZero() local_unnamed_addr { +; CHECK-LABEL: testZero: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor vs34, vs34, vs34 +; CHECK-NEXT: blr + +entry: + ret <4 x i32> zeroinitializer +} + +define dso_local <4 x float> @testZeroF() local_unnamed_addr { +; CHECK-LABEL: testZeroF: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor vs34, vs34, vs34 +; CHECK-NEXT: blr + +entry: + ret <4 x float> zeroinitializer +} + +define dso_local <4 x i32> @testAllOneS() local_unnamed_addr { +; CHECK-LABEL: testAllOneS: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxleqv vs34, vs34, vs34 +; CHECK-NEXT: blr + +entry: + ret <4 x i32> +} + +define dso_local <4 x i32> @test5Bit() local_unnamed_addr { +; CHECK-LABEL: test5Bit: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vspltisw v2, 7 +; CHECK-NEXT: blr + +entry: + ret <4 x i32> +} + +define dso_local <16 x i8> @test1ByteChar() local_unnamed_addr { +; CHECK-LABEL: test1ByteChar: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib vs34, 7 +; CHECK-NEXT: blr + +entry: + ret <16 x i8> +} + +define dso_local <4 x i32> @test1ByteSplatInt() local_unnamed_addr { +; Here the splat of 171 or 0xABABABAB can be done using a byte splat +; of 0xAB using xxspltib while avoiding the use of xxspltiw. +; CHECK-LABEL: test1ByteSplatInt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib vs34, 171 +; CHECK-NEXT: blr + +entry: + ret <4 x i32> +} + +define dso_local <4 x i32> @test5Bit2Ins() local_unnamed_addr { +; Splats within the range [-32,31] can be done using two vsplti[bhw] +; instructions, but we prefer the xxspltiw instruction to them. +; CHECK-LABEL: test5Bit2Ins: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw vs34, 16 +; CHECK-NEXT: blr + +entry: + ret <4 x i32> +} + +define dso_local <4 x float> @testFloatNegZero() local_unnamed_addr { +; 0.0f is not the same as -0.0f. We try to splat -0.0f +; CHECK-LABEL: testFloatNegZero: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw vs34, -2147483648 +; CHECK-NEXT: blr + +entry: + ret <4 x float> +} + +define dso_local <4 x float> @testFloat() local_unnamed_addr { +; CHECK-LABEL: testFloat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw vs34, 1135323709 +; CHECK-NEXT: blr + +entry: + ret <4 x float> +} + +define dso_local <4 x float> @testIntToFloat() local_unnamed_addr { +; CHECK-LABEL: testIntToFloat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw vs34, 1135312896 +; CHECK-NEXT: blr + +entry: + ret <4 x float> +} + +define dso_local <4 x i32> @testUndefInt() local_unnamed_addr { +; CHECK-LABEL: testUndefInt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw vs34, 18 +; CHECK-NEXT: blr + +entry: + ret <4 x i32> +} + +define dso_local <4 x float> @testUndefIntToFloat() local_unnamed_addr { +; CHECK-LABEL: testUndefIntToFloat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw vs34, 1135312896 +; CHECK-NEXT: blr + +entry: + ret <4 x float> +} + +define dso_local <2 x i64> @testPseudo8Byte() local_unnamed_addr { +; CHECK-LABEL: testPseudo8Byte: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw vs34, -1430532899 +; CHECK-NEXT: blr + +entry: + ret <2 x i64> +} + +define dso_local <8 x i16> @test2Byte() local_unnamed_addr { +; CHECK-LABEL: test2Byte: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw vs34, 1179666 +; CHECK-NEXT: blr + +entry: + ret <8 x i16> +} + +define dso_local <8 x i16> @test2ByteUndef() local_unnamed_addr { +; CHECK-LABEL: test2ByteUndef: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw vs34, 1179666 +; CHECK-NEXT: blr + +entry: + ret <8 x i16> +} + +define dso_local <2 x double> @testFloatToDouble() local_unnamed_addr { +; CHECK-LABEL: testFloatToDouble: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltidp vs34, 1135290941 +; CHECK-NEXT: blr + +entry: + ret <2 x double> +} + +define dso_local <2 x double> @testDoubleLower4ByteZero() local_unnamed_addr { +; The expanded double will have 0 in the last 32 bits. Imprecise handling of +; return value of data structures like APInt, returned when calling getZextValue +; , like saving the return value into an unsigned instead of uint64_t may cause +; this test to fail. +; CHECK-LABEL: testDoubleLower4ByteZero: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltidp vs34, 1093664768 +; CHECK-NEXT: blr + +entry: + ret <2 x double> +} + +define dso_local <2 x double> @testDoubleToDoubleZero() local_unnamed_addr { +; Should be using canonicalized form to splat zero and use shorter instructions +; than xxspltidp. +; CHECK-LABEL: testDoubleToDoubleZero: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor vs34, vs34, vs34 +; CHECK-NEXT: blr + +entry: + ret <2 x double> zeroinitializer +} + +define dso_local <2 x double> @testDoubleToDoubleNegZero() local_unnamed_addr { +; CHECK-LABEL: testDoubleToDoubleNegZero: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltidp vs34, -2147483648 +; CHECK-NEXT: blr + +entry: + ret <2 x double> +} + +define dso_local <2 x double> @testDoubleToDoubleNaN() local_unnamed_addr { +; CHECK-LABEL: testDoubleToDoubleNaN: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltidp vs34, -16 +; CHECK-NEXT: blr + +entry: + ret <2 x double> +} + +define dso_local <2 x double> @testDoubleToDoubleInfinity() local_unnamed_addr { +; CHECK-LABEL: testDoubleToDoubleInfinity: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltidp vs34, 2139095040 +; CHECK-NEXT: blr + +entry: + ret <2 x double> +} + +define dso_local <2 x double> @testFloatToDoubleNaN() local_unnamed_addr { +; CHECK-LABEL: testFloatToDoubleNaN: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltidp vs34, -1 +; CHECK-NEXT: blr + +entry: + ret <2 x double> +} + +define dso_local <2 x double> @testFloatToDoubleInfinity() local_unnamed_addr { +; CHECK-LABEL: testFloatToDoubleInfinity: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltidp vs34, 2139095040 +; CHECK-NEXT: blr + +entry: + ret <2 x double> +} + +define dso_local float @testFloatScalar() local_unnamed_addr { +; CHECK-LABEL: testFloatScalar: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltidp vs1, 1135290941 +; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-NEXT: blr + +entry: + ret float 0x40756547A0000000 +} + +define dso_local float @testFloatZeroScalar() local_unnamed_addr { +; CHECK-LABEL: testFloatZeroScalar: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor f1, f1, f1 +; CHECK-NEXT: blr + +entry: + ret float 0.000000e+00 +} + +define dso_local double @testDoubleRepresentableScalar() local_unnamed_addr { +; CHECK-LABEL: testDoubleRepresentableScalar: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltidp vs1, 1135290941 +; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-NEXT: blr + +entry: + ret double 0x40756547A0000000 +} + +define dso_local double @testDoubleZeroScalar() local_unnamed_addr { +; CHECK-LABEL: testDoubleZeroScalar: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor f1, f1, f1 +; CHECK-NEXT: blr + +entry: + ret double 0.000000e+00 +}