Index: lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp =================================================================== --- lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -222,7 +222,11 @@ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 16; const MCOperand &MO = MI.getOperand(OpNo); - if (MO.isImm()) + // PPC::ZERO and PPC::ZERO8 are register MachineOperand's but they really + // represent immediates. + bool IsImm = MO.isImm() || + (MO.isReg() && (MO.getReg() == PPC::ZERO8 || MO.getReg() == PPC::ZERO)); + if (IsImm) return (getMachineOpValue(MI, MO, Fixups, STI) & 0xFFFF) | RegBits; // Add a fixup for the displacement field. Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -975,6 +975,9 @@ unsigned combineRepeatedFPDivisors() const override; CCAssignFn *useFastISelCCs(unsigned Flag) const; + + SDValue combineBVOfFpToIntToFpToIntOfBV(SDNode *N, + DAGCombinerInfo &DCI) const; }; namespace PPC { Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -671,6 +671,10 @@ setOperationAction(ISD::FABS, MVT::v4f32, Legal); setOperationAction(ISD::FABS, MVT::v2f64, Legal); + if (Subtarget.hasDirectMove()) + setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); + addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); } @@ -683,9 +687,6 @@ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); } - - if (Subtarget.isISA3_0() && Subtarget.hasDirectMove()) - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); } if (Subtarget.hasQPX()) { @@ -7092,16 +7093,6 @@ return DAG.getNode(ISD::BITCAST, dl, VT, T); } -static bool isNonConstSplatBV(BuildVectorSDNode *BVN, EVT Type) { - if (BVN->isConstant() || BVN->getValueType(0) != Type) - return false; - auto OpZero = BVN->getOperand(0); - for (int i = 1, e = BVN->getNumOperands(); i < e; i++) - if (BVN->getOperand(i) != OpZero) - return false; - return true; -} - // If this is a case we can't handle, return null and let the default // expansion code take care of it. If we CAN select this case, and if it // selects to a single instruction, return Op. Otherwise, if we can codegen @@ -7224,14 +7215,45 @@ if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0, !Subtarget.isLittleEndian()) || SplatBitSize > 32) { - // We can splat a non-const value on CPU's that implement ISA 3.0 - // in two ways: LXVWSX (load and splat) and MTVSRWS(move and splat). - auto OpZero = BVN->getOperand(0); - bool CanLoadAndSplat = OpZero.getOpcode() == ISD::LOAD && - BVN->isOnlyUserOf(OpZero.getNode()); - if (Subtarget.isISA3_0() && !CanLoadAndSplat && - (isNonConstSplatBV(BVN, MVT::v4i32) || - isNonConstSplatBV(BVN, MVT::v2i64))) + if (!Subtarget.hasVSX()) + return SDValue(); + + // SDAG patterns are provided for building vectors out of values that are + // in registers. + bool RightType = Op.getValueType() == MVT::v2f64 || + Op.getValueType() == MVT::v4f32 || + (Op.getValueType() == MVT::v2i64 && Subtarget.hasDirectMove()) || + (Op.getValueType() == MVT::v4i32 && Subtarget.hasDirectMove()); + + // We have efficient patterns for BUILD_VECTOR nodes whose inputs + // are non-constant and non-undef. Also, if this is a load-and-splat, + // it is better handled through (splat (scalar_to_vector)). + auto haveEfficientPattern = [&](BuildVectorSDNode *V) -> bool { + bool IsSplat = true; + bool IsLoad = false; + SDValue Op0 = V->getOperand(0); + if (V->isConstant()) + return false; + for (int i = 0, e = V->getNumOperands(); i < e; i++) { + if (V->getOperand(i).isUndef()) + return false; + if (V->getOperand(i).getOpcode() == ISD::LOAD || + (V->getOperand(i).getOpcode() == ISD::FP_ROUND && + V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) || + (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT && + V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) || + (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT && + V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD)) + IsLoad = true; + // If the operands are different or the input is not a load and has more + // uses than just this BV node, then it isn't a splat. + if (V->getOperand(i) != Op0 || + (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode()))) + IsSplat = false; + } + return !(IsSplat && IsLoad); + }; + if (RightType && haveEfficientPattern(BVN)) return Op; return SDValue(); } @@ -7252,9 +7274,24 @@ return Op; } + fprintf(stderr, "SplatBits = %u, SplatUndef = %u, SplatSize = %u\n", + SplatBits, SplatUndef, SplatSize); // We have XXSPLTIB for constant splats one byte wide - if (Subtarget.isISA3_0() && Op.getValueType() == MVT::v16i8) + if (Subtarget.hasP9Vector() && SplatSize == 1) { + fprintf(stderr, "Have P9 vector and SplatSize is 1\n"); + // This is a splat other than some of the inputs are undef. Convert to + // a constant splat. + if (HasAnyUndefs || ISD::isBuildVectorAllOnes(BVN)) { + fprintf(stderr, "Returning a new BV node.\n"); + SmallVector Ops(16, DAG.getConstant(SplatBits, + dl, MVT::i32)); + SDValue NewBV = DAG.getBuildVector(MVT::v16i8, dl, Ops); + if (Op.getValueType() != MVT::v16i8) + return DAG.getBitcast(Op.getValueType(), NewBV); + return NewBV; + } return Op; + } // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> @@ -7502,7 +7539,7 @@ // If the source for the shuffle is a scalar_to_vector that came from a // 32-bit load, it will have used LXVWSX so we don't need to splat again. - if (Subtarget.isISA3_0() && + if (Subtarget.hasP9Vector() && ((isLittleEndian && SplatIdx == 3) || (!isLittleEndian && SplatIdx == 0))) { SDValue Src = V1.getOperand(0); @@ -10505,6 +10542,164 @@ ShiftCst); } +/// combineBVOfFpToIntToFpToIntOfBV - If this node is a build_vector of +/// fp-to-int conversions, reduce it to a fp-to-int of a build_vector +/// of fp values. +SDValue +PPCTargetLowering::combineBVOfFpToIntToFpToIntOfBV(SDNode *N, + DAGCombinerInfo &DCI) const { + assert(N->getOpcode() == ISD::BUILD_VECTOR && + "Should be called with a BUILD_VECTOR node"); + + SelectionDAG &DAG = DCI.DAG; + SDLoc dl(N); + + SDValue FirstInput = N->getOperand(0); + assert(FirstInput.getOpcode() == PPCISD::MFVSR && + "The input operand must be an fp-to-int conversion."); + unsigned FirstConversion = FirstInput.getOperand(0).getOpcode(); + if (FirstConversion == PPCISD::FCTIDZ || + FirstConversion == PPCISD::FCTIDUZ || + FirstConversion == PPCISD::FCTIWZ || + FirstConversion == PPCISD::FCTIWUZ) { + bool IsSplat = true; + bool Is32Bit = FirstConversion == PPCISD::FCTIWZ || + FirstConversion == PPCISD::FCTIWUZ; + EVT SrcVT = FirstInput.getOperand(0).getValueType(); + SmallVector Ops; + EVT TargetVT = N->getValueType(0); + for (int i = 0, e = N->getNumOperands(); i < e; i++) { + if (N->getOperand(i).getOpcode() != PPCISD::MFVSR) + return SDValue(); + unsigned NextConversion = N->getOperand(i).getOperand(0).getOpcode(); + if (NextConversion != FirstConversion) + return SDValue(); + if (N->getOperand(i) != FirstInput) + IsSplat = false; + } + + // If this is a splat, we leave it as-is since there will be only a single + // fp-to-int conversion followed by a splat of the integer. This is better + // for 32-bit and smaller ints and neutral for 64-bit ints. + if (IsSplat) + return SDValue(); + + // Now that we know we have the right type of node, get its operands + for (int i = 0, e = N->getNumOperands(); i < e; i++) { + SDValue In = N->getOperand(i).getOperand(0); + // For 32-bit values, we need to add an FP_ROUND node. + if (Is32Bit) { + if (In.isUndef()) + Ops.push_back(DAG.getUNDEF(SrcVT)); + else { + SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl, + MVT::f32, In.getOperand(0), + DAG.getIntPtrConstant(1, dl)); + Ops.push_back(Trunc); + } + } else + Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0)); + } + + unsigned Opcode; + if(FirstConversion == PPCISD::FCTIDZ || + FirstConversion == PPCISD::FCTIWZ) + Opcode = ISD::FP_TO_SINT; + else + Opcode = ISD::FP_TO_UINT; + + EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32; + SDValue BV = DAG.getBuildVector(NewVT, dl, Ops); + SDValue Ret = DAG.getNode(Opcode, dl, TargetVT, BV); + return Ret; + } + return SDValue(); +} + +static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) { + assert(N->getOpcode() == ISD::BUILD_VECTOR && + "Should be called with a BUILD_VECTOR node"); + + SDLoc dl(N); + bool InputsAreConsecutiveLoads = true; + bool InputsAreReverseConsecutive = true; + unsigned ElemSize = N->getValueType(0).getScalarSizeInBits() / 8; + SDValue FirstInput = N->getOperand(0); + bool IsRoundOfExtLoad = false; + + if (FirstInput.getOpcode() == ISD::FP_ROUND && + FirstInput.getOperand(0).getOpcode() == ISD::LOAD) { + LoadSDNode *LD = dyn_cast(FirstInput.getOperand(0)); + IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD; + } + // Not a build vector of (possibly fp_rounded) loads. + if (!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) + return SDValue(); + + for (int i = 0, e = N->getNumOperands(); i < e; i++) { + // If any inputs are fp_round(extload), they all must be. + if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND) + return SDValue(); + + SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) : + N->getOperand(i); + if (NextInput.getOpcode() != ISD::LOAD) + return SDValue(); + + if (i == 0) + continue; + + SDValue PreviousInput = + IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1); + LoadSDNode *LD1 = dyn_cast(PreviousInput); + LoadSDNode *LD2 = dyn_cast(NextInput); + + // If any inputs are fp_round(extload), they all must be. + if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD) + return SDValue(); + + if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG)) + InputsAreConsecutiveLoads = false; + if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG)) + InputsAreReverseConsecutive = false; + + // Exit early if the loads are neither consecutive nor reverse consecutive. + if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive) + return SDValue(); + } + + assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) && + "The loads cannot be both consecutive and reverse consecutive."); + + SDValue FirstLoadOp = + IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput; + SDValue LastLoadOp = + IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) : + N->getOperand(N->getNumOperands()-1); + + LoadSDNode *LD1 = dyn_cast(FirstLoadOp); + LoadSDNode *LDL = dyn_cast(LastLoadOp); + if (InputsAreConsecutiveLoads) { + assert(LD1 && "Input needs to be a LoadSDNode."); + return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(), + LD1->getBasePtr(), LD1->getPointerInfo(), + LD1->getAlignment()); + } + if (InputsAreReverseConsecutive) { + assert(LDL && "Input needs to be a LoadSDNode."); + SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(), + LDL->getBasePtr(), LDL->getPointerInfo(), + LDL->getAlignment()); + std::vector Ops; + for (int i = N->getNumOperands() - 1; i >= 0; i--) + Ops.push_back(i); + + return DAG.getVectorShuffle(N->getValueType(0), dl, Load, + DAG.getUNDEF(N->getValueType(0)), Ops); + } + return SDValue(); +} + SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const { assert(N->getOpcode() == ISD::BUILD_VECTOR && @@ -10512,21 +10707,41 @@ SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); - if (N->getValueType(0) != MVT::v2f64 || !Subtarget.hasVSX()) + + if (!Subtarget.hasVSX()) + return SDValue(); + + // The target independent DAG combiner will leave a build_vector of + // float-to-int conversions intact. We can generate MUCH better code for + // a float-to-int conversion of a vector of floats. + SDValue FirstInput = N->getOperand(0); + if (FirstInput.getOpcode() == PPCISD::MFVSR) { + SDValue Reduced = combineBVOfFpToIntToFpToIntOfBV(N, DCI); + if (Reduced) + return Reduced; + } + + // If we're building a vector out of consecutive loads, just load that + // vector type. + SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG); + if (Reduced) + return Reduced; + + if (N->getValueType(0) != MVT::v2f64) return SDValue(); // Looking for: // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1)) - if (N->getOperand(0).getOpcode() != ISD::SINT_TO_FP && - N->getOperand(0).getOpcode() != ISD::UINT_TO_FP) + if (FirstInput.getOpcode() != ISD::SINT_TO_FP && + FirstInput.getOpcode() != ISD::UINT_TO_FP) return SDValue(); if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP && N->getOperand(1).getOpcode() != ISD::UINT_TO_FP) return SDValue(); - if (N->getOperand(0).getOpcode() != N->getOperand(1).getOpcode()) + if (FirstInput.getOpcode() != N->getOperand(1).getOpcode()) return SDValue(); - SDValue Ext1 = N->getOperand(0).getOperand(0); + SDValue Ext1 = FirstInput.getOperand(0); SDValue Ext2 = N->getOperand(1).getOperand(0); if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT || Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT) Index: lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.td +++ lib/Target/PowerPC/PPCInstrInfo.td @@ -327,6 +327,7 @@ return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue(); }], LO16>; def immSExt8 : ImmLeaf(Imm); }]>; +def immSExt5NonZero : ImmLeaf(Imm); }]>; // imm16Shifted* - These match immediates where the low 16-bits are zero. There // are two forms: imm16ShiftedSExt and imm16ShiftedZExt. These two forms are Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -570,18 +570,38 @@ (outs vsfrc:$XT), (ins vsfrc:$XB), "xscvdpsxds $XT, $XB", IIC_VecFP, [(set f64:$XT, (PPCfctidz f64:$XB))]>; + let isCodeGenOnly = 1 in + def XSCVDPSXDSs : XX2Form<60, 344, + (outs vssrc:$XT), (ins vssrc:$XB), + "xscvdpsxds $XT, $XB", IIC_VecFP, + [(set f32:$XT, (PPCfctidz f32:$XB))]>; def XSCVDPSXWS : XX2Form<60, 88, (outs vsfrc:$XT), (ins vsfrc:$XB), "xscvdpsxws $XT, $XB", IIC_VecFP, [(set f64:$XT, (PPCfctiwz f64:$XB))]>; + let isCodeGenOnly = 1 in + def XSCVDPSXWSs : XX2Form<60, 88, + (outs vssrc:$XT), (ins vssrc:$XB), + "xscvdpsxws $XT, $XB", IIC_VecFP, + [(set f32:$XT, (PPCfctiwz f32:$XB))]>; def XSCVDPUXDS : XX2Form<60, 328, (outs vsfrc:$XT), (ins vsfrc:$XB), "xscvdpuxds $XT, $XB", IIC_VecFP, [(set f64:$XT, (PPCfctiduz f64:$XB))]>; + let isCodeGenOnly = 1 in + def XSCVDPUXDSs : XX2Form<60, 328, + (outs vssrc:$XT), (ins vssrc:$XB), + "xscvdpuxds $XT, $XB", IIC_VecFP, + [(set f32:$XT, (PPCfctiduz f32:$XB))]>; def XSCVDPUXWS : XX2Form<60, 72, (outs vsfrc:$XT), (ins vsfrc:$XB), "xscvdpuxws $XT, $XB", IIC_VecFP, [(set f64:$XT, (PPCfctiwuz f64:$XB))]>; + let isCodeGenOnly = 1 in + def XSCVDPUXWSs : XX2Form<60, 72, + (outs vssrc:$XT), (ins vssrc:$XB), + "xscvdpuxws $XT, $XB", IIC_VecFP, + [(set f32:$XT, (PPCfctiwuz f32:$XB))]>; def XSCVSPDP : XX2Form<60, 329, (outs vsfrc:$XT), (ins vsfrc:$XB), "xscvspdp $XT, $XB", IIC_VecFP, []>; @@ -620,13 +640,15 @@ "xvcvspsxds $XT, $XB", IIC_VecFP, []>; def XVCVSPSXWS : XX2Form<60, 152, (outs vsrc:$XT), (ins vsrc:$XB), - "xvcvspsxws $XT, $XB", IIC_VecFP, []>; + "xvcvspsxws $XT, $XB", IIC_VecFP, + [(set v4i32:$XT, (fp_to_sint v4f32:$XB))]>; def XVCVSPUXDS : XX2Form<60, 392, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvspuxds $XT, $XB", IIC_VecFP, []>; def XVCVSPUXWS : XX2Form<60, 136, (outs vsrc:$XT), (ins vsrc:$XB), - "xvcvspuxws $XT, $XB", IIC_VecFP, []>; + "xvcvspuxws $XT, $XB", IIC_VecFP, + [(set v4i32:$XT, (fp_to_uint v4f32:$XB))]>; def XVCVSXDDP : XX2Form<60, 504, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvsxddp $XT, $XB", IIC_VecFP, @@ -653,7 +675,8 @@ "xvcvuxwdp $XT, $XB", IIC_VecFP, []>; def XVCVUXWSP : XX2Form<60, 168, (outs vsrc:$XT), (ins vsrc:$XB), - "xvcvuxwsp $XT, $XB", IIC_VecFP, []>; + "xvcvuxwsp $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (uint_to_fp v4i32:$XB))]>; // Rounding Instructions def XSRDPI : XX2Form<60, 73, @@ -1178,6 +1201,8 @@ def : Pat<(f64 (extloadf32 xoaddr:$src)), (COPY_TO_REGCLASS (LXSSPX xoaddr:$src), VSFRC)>; + def : Pat<(f32 (fpround (extloadf32 xoaddr:$src))), + (f32 (LXSSPX xoaddr:$src))>; def : Pat<(f64 (fpextend f32:$src)), (COPY_TO_REGCLASS $src, VSFRC)>; @@ -1355,7 +1380,7 @@ } // AddedComplexity = 400 } // HasP8Vector -let UseVSXReg = 1 in { +let UseVSXReg = 1, AddedComplexity = 400 in { let Predicates = [HasDirectMove] in { // VSX direct move instructions def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT), @@ -1701,6 +1726,7 @@ dag BE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS BE_VDOUBLE_PERMUTE, VSRC); } +let AddedComplexity = 400 in { // v4f32 scalar <-> vector conversions (BE) let Predicates = [IsBigEndian, HasP8Vector] in { def : Pat<(v4f32 (scalar_to_vector f32:$A)), @@ -1939,15 +1965,16 @@ (f64 (MTVSRD $S))>; } +// Materialize a zero-vector of long long +def : Pat<(v2i64 immAllZerosV), + (v2i64 (XXLXORz))>; +} + def AlignValues { dag F32_TO_BE_WORD1 = (v4f32 (XXSLDWI (XSCVDPSPN $B), (XSCVDPSPN $B), 3)); dag I32_TO_BE_WORD1 = (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC); } -// Materialize a zero-vector of long long -def : Pat<(v2i64 immAllZerosV), - (v2i64 (XXLXORz))>; - // The following VSX instructions were introduced in Power ISA 3.0 def HasP9Vector : Predicate<"PPCSubTarget->hasP9Vector()">; let AddedComplexity = 400, Predicates = [HasP9Vector] in { @@ -2407,23 +2434,8 @@ (v4i32 (LXVWSX xoaddr:$src))>; def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), (v4f32 (LXVWSX xoaddr:$src))>; - def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)), - (v4i32 (MTVSRWS $A))>; - def : Pat<(v16i8 (build_vector immSExt8:$A, immSExt8:$A, immSExt8:$A, - immSExt8:$A, immSExt8:$A, immSExt8:$A, - immSExt8:$A, immSExt8:$A, immSExt8:$A, - immSExt8:$A, immSExt8:$A, immSExt8:$A, - immSExt8:$A, immSExt8:$A, immSExt8:$A, - immSExt8:$A)), - (v16i8 (COPY_TO_REGCLASS (XXSPLTIB imm:$A), VSRC))>; - def : Pat<(v16i8 immAllOnesV), - (v16i8 (COPY_TO_REGCLASS (XXSPLTIB 255), VSRC))>; - def : Pat<(v8i16 immAllOnesV), - (v8i16 (COPY_TO_REGCLASS (XXSPLTIB 255), VSRC))>; - def : Pat<(v4i32 immAllOnesV), - (v4i32 (XXSPLTIB 255))>; - def : Pat<(v2i64 immAllOnesV), - (v2i64 (XXSPLTIB 255))>; + def : Pat<(v4f32 (scalar_to_vector (f32 (fpround (extloadf32 xoaddr:$src))))), + (v4f32 (LXVWSX xoaddr:$src))>; // Build vectors from i8 loads def : Pat<(v16i8 (scalar_to_vector ScalarLoads.Li8)), @@ -2564,6 +2576,7 @@ (f64 (COPY_TO_REGCLASS (VEXTSB2Ds $A), VSFRC))>; def : Pat<(f64 (PPCVexts f64:$A, 2)), (f64 (COPY_TO_REGCLASS (VEXTSH2Ds $A), VSFRC))>; + let isPseudo = 1 in { def DFLOADf32 : Pseudo<(outs vssrc:$XT), (ins memrix:$src), "#DFLOADf32", @@ -2580,18 +2593,261 @@ } def : Pat<(f64 (extloadf32 iaddr:$src)), (COPY_TO_REGCLASS (DFLOADf32 iaddr:$src), VSFRC)>; + def : Pat<(f32 (fpround (extloadf32 iaddr:$src))), + (f32 (DFLOADf32 iaddr:$src))>; } // end HasP9Vector, AddedComplexity -let Predicates = [IsISA3_0, HasDirectMove, IsLittleEndian] in { -def : Pat<(v2i64 (build_vector i64:$rA, i64:$rB)), - (v2i64 (MTVSRDD $rB, $rA))>; -def : Pat<(i64 (extractelt v2i64:$A, 0)), - (i64 (MFVSRLD $A))>; +// Integer extend helper dags 32 -> 64 +def AnyExts { + dag A = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32); + dag B = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $B, sub_32); + dag C = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $C, sub_32); + dag D = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $D, sub_32); } -let Predicates = [IsISA3_0, HasDirectMove, IsBigEndian] in { -def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)), - (v2i64 (MTVSRDD $rB, $rA))>; -def : Pat<(i64 (extractelt v2i64:$A, 1)), - (i64 (MFVSRLD $A))>; +def DblToFlt { + dag A0 = (f32 (fpround (f64 (extractelt v2f64:$A, 0)))); + dag A1 = (f32 (fpround (f64 (extractelt v2f64:$A, 1)))); + dag B0 = (f32 (fpround (f64 (extractelt v2f64:$B, 0)))); + dag B1 = (f32 (fpround (f64 (extractelt v2f64:$B, 1)))); +} +def FltToIntLoad { + dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (extloadf32 xoaddr:$A))))); +} +def FltToUIntLoad { + dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (extloadf32 xoaddr:$A))))); +} +def FltToLongLoad { + dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 xoaddr:$A))))); +} +def FltToULongLoad { + dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 xoaddr:$A))))); +} +def FltToLong { + dag A = (i64 (PPCmfvsr (PPCfctidz (fpextend f32:$A)))); +} +def FltToULong { + dag A = (i64 (PPCmfvsr (PPCfctiduz (fpextend f32:$A)))); +} +def DblToInt { + dag A = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$A)))); +} +def DblToUInt { + dag A = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$A)))); +} +def DblToLong { + dag A = (i64 (PPCmfvsr (f64 (PPCfctidz f64:$A)))); +} +def DblToULong { + dag A = (i64 (PPCmfvsr (f64 (PPCfctiduz f64:$A)))); +} +def DblToIntLoad { + dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load xoaddr:$A))))); +} +def DblToUIntLoad { + dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load xoaddr:$A))))); +} +def DblToLongLoad { + dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (load xoaddr:$A))))); +} +def DblToULongLoad { + dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (load xoaddr:$A))))); +} + +// FP merge dags (for f32 -> v4f32) +def MrgFP { + dag AC = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $A, VSRC), + (COPY_TO_REGCLASS $C, VSRC), 0)); + dag BD = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $B, VSRC), + (COPY_TO_REGCLASS $D, VSRC), 0)); + dag ABhToFlt = (XVCVDPSP (XXPERMDI $A, $B, 0)); + dag ABlToFlt = (XVCVDPSP (XXPERMDI $A, $B, 3)); + dag BAhToFlt = (XVCVDPSP (XXPERMDI $B, $A, 0)); + dag BAlToFlt = (XVCVDPSP (XXPERMDI $B, $A, 3)); +} + +// Patterns for BUILD_VECTOR nodes. +def NoP9Vector : Predicate<"!PPCSubTarget->hasP9Vector()">; +let AddedComplexity = 400 in { + + let Predicates = [HasVSX] in { + // Build vectors of floating point converted to i32. + def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.A, + DblToInt.A, DblToInt.A)), + (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS $A), VSRC), 1))>; + def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.A, + DblToUInt.A, DblToUInt.A)), + (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS $A), VSRC), 1))>; + def : Pat<(v2i64 (build_vector DblToLong.A, DblToLong.A)), + (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC), + (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC), 0))>; + def : Pat<(v2i64 (build_vector DblToULong.A, DblToULong.A)), + (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), + (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), 0))>; + def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)), + (v4i32 (XXSPLTW (COPY_TO_REGCLASS + (XSCVDPSXWSs (LXSSPX xoaddr:$A)), VSRC), 1))>; + def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)), + (v4i32 (XXSPLTW (COPY_TO_REGCLASS + (XSCVDPUXWSs (LXSSPX xoaddr:$A)), VSRC), 1))>; + def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)), + (v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>; + + // Build vectors of floating point converted to i64. + def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)), + (v2i64 (XXPERMDIs (COPY_TO_REGCLASS (XSCVDPSXDSs $A), VSFRC), 0))>; + def : Pat<(v2i64 (build_vector FltToULong.A, FltToULong.A)), + (v2i64 (XXPERMDIs (COPY_TO_REGCLASS (XSCVDPUXDSs $A), VSFRC), 0))>; + def : Pat<(v2i64 (scalar_to_vector DblToLongLoad.A)), + (v2i64 (XVCVDPSXDS (LXVDSX xoaddr:$A)))>; + def : Pat<(v2i64 (scalar_to_vector DblToULongLoad.A)), + (v2i64 (XVCVDPUXDS (LXVDSX xoaddr:$A)))>; + } + + let Predicates = [HasVSX, NoP9Vector] in { + // Load-and-splat with fp-to-int conversion (using X-Form VSX loads). + def : Pat<(v4i32 (scalar_to_vector DblToIntLoad.A)), + (v4i32 (XXSPLTW (COPY_TO_REGCLASS + (XSCVDPSXWS (LXSDX xoaddr:$A)), VSRC), 1))>; + def : Pat<(v4i32 (scalar_to_vector DblToUIntLoad.A)), + (v4i32 (XXSPLTW (COPY_TO_REGCLASS + (XSCVDPUXWS (LXSDX xoaddr:$A)), VSRC), 1))>; + def : Pat<(v2i64 (scalar_to_vector FltToLongLoad.A)), + (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS + (LXSSPX xoaddr:$A), VSFRC)), 0))>; + def : Pat<(v2i64 (scalar_to_vector FltToULongLoad.A)), + (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS + (LXSSPX xoaddr:$A), VSFRC)), 0))>; + } + + // Big endian, available on all targets with VSX + let Predicates = [IsBigEndian, HasVSX] in { + def : Pat<(v2f64 (build_vector f64:$A, f64:$B)), + (v2f64 (XXPERMDI + (COPY_TO_REGCLASS $A, VSRC), + (COPY_TO_REGCLASS $B, VSRC), 0))>; + + def : Pat<(v4f32 (build_vector f32:$A, f32:$B, f32:$C, f32:$D)), + (VMRGEW MrgFP.AC, MrgFP.BD)>; + def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1, + DblToFlt.B0, DblToFlt.B1)), + (v4f32 (VMRGEW MrgFP.ABhToFlt, MrgFP.ABlToFlt))>; + } + + let Predicates = [IsLittleEndian, HasVSX] in { + // Little endian, available on all targets with VSX + def : Pat<(v2f64 (build_vector f64:$A, f64:$B)), + (v2f64 (XXPERMDI + (COPY_TO_REGCLASS $B, VSRC), + (COPY_TO_REGCLASS $A, VSRC), 0))>; + + def : Pat<(v4f32 (build_vector f32:$D, f32:$C, f32:$B, f32:$A)), + (VMRGEW MrgFP.AC, MrgFP.BD)>; + def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1, + DblToFlt.B0, DblToFlt.B1)), + (v4f32 (VMRGEW MrgFP.BAhToFlt, MrgFP.BAlToFlt))>; + } + + let Predicates = [HasDirectMove] in { + /* Endianness-neutral constant splat on P8 and newer targets. The reason + for this pattern is that on targets with direct moves, we don't expand + BUILD_VECTOR nodes for v4i32. + */ + def : Pat<(v4i32 (build_vector immSExt5NonZero:$A, immSExt5NonZero:$A, + immSExt5NonZero:$A, immSExt5NonZero:$A)), + (v4i32 (VSPLTISW imm:$A))>; + } + + let Predicates = [IsBigEndian, HasDirectMove, NoP9Vector] in { + // Big endian integer vectors using direct moves. + def : Pat<(v2i64 (build_vector i64:$A, i64:$B)), + (v2i64 (XXPERMDI + (COPY_TO_REGCLASS (MTVSRD $A), VSRC), + (COPY_TO_REGCLASS (MTVSRD $B), VSRC), 0))>; + def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), + (VMRGOW (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), + (COPY_TO_REGCLASS (MTVSRWZ $C), VSRC), 0), + (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC), + (COPY_TO_REGCLASS (MTVSRWZ $D), VSRC), 0))>; + def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)), + (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>; + } + + let Predicates = [IsLittleEndian, HasDirectMove, NoP9Vector] in { + // Little endian integer vectors using direct moves. + def : Pat<(v2i64 (build_vector i64:$A, i64:$B)), + (v2i64 (XXPERMDI + (COPY_TO_REGCLASS (MTVSRD $B), VSRC), + (COPY_TO_REGCLASS (MTVSRD $A), VSRC), 0))>; + def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), + (VMRGOW (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $D), VSRC), + (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC), 0), + (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $C), VSRC), + (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 0))>; + def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)), + (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>; + } + + let Predicates = [HasP9Vector] in { + // Endianness-neutral patterns for const splats with ISA 3.0 instructions. + def : Pat<(v4i32 (scalar_to_vector i32:$A)), + (v4i32 (MTVSRWS $A))>; + def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)), + (v4i32 (MTVSRWS $A))>; + def : Pat<(v16i8 (build_vector immSExt8:$A, immSExt8:$A, immSExt8:$A, + immSExt8:$A, immSExt8:$A, immSExt8:$A, + immSExt8:$A, immSExt8:$A, immSExt8:$A, + immSExt8:$A, immSExt8:$A, immSExt8:$A, + immSExt8:$A, immSExt8:$A, immSExt8:$A, + immSExt8:$A)), + (v16i8 (COPY_TO_REGCLASS (XXSPLTIB imm:$A), VSRC))>; + def : Pat<(v16i8 immAllOnesV), + (v16i8 (COPY_TO_REGCLASS (XXSPLTIB 255), VSRC))>; + def : Pat<(v8i16 immAllOnesV), + (v8i16 (COPY_TO_REGCLASS (XXSPLTIB 255), VSRC))>; + def : Pat<(v4i32 immAllOnesV), + (v4i32 (XXSPLTIB 255))>; + def : Pat<(v2i64 immAllOnesV), + (v2i64 (XXSPLTIB 255))>; + def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)), + (v4i32 (XVCVSPSXWS (LXVWSX xoaddr:$A)))>; + def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)), + (v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>; + def : Pat<(v4i32 (scalar_to_vector DblToIntLoad.A)), + (v4i32 (XXSPLTW (COPY_TO_REGCLASS + (XSCVDPSXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>; + def : Pat<(v4i32 (scalar_to_vector DblToUIntLoad.A)), + (v4i32 (XXSPLTW (COPY_TO_REGCLASS + (XSCVDPUXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>; + def : Pat<(v2i64 (scalar_to_vector FltToLongLoad.A)), + (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS + (DFLOADf32 iaddr:$A), + VSFRC)), 0))>; + def : Pat<(v2i64 (scalar_to_vector FltToULongLoad.A)), + (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS + (DFLOADf32 iaddr:$A), + VSFRC)), 0))>; + } + + let Predicates = [IsISA3_0, HasDirectMove, IsBigEndian] in { + def : Pat<(i64 (extractelt v2i64:$A, 1)), + (i64 (MFVSRLD $A))>; + // Better way to build integer vectors if we have MTVSRDD. Big endian. + def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)), + (v2i64 (MTVSRDD $rB, $rA))>; + def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), + (VMRGOW (COPY_TO_REGCLASS (MTVSRDD AnyExts.A, AnyExts.C), VSRC), + (COPY_TO_REGCLASS (MTVSRDD AnyExts.B, AnyExts.D), VSRC))>; + } + + let Predicates = [IsISA3_0, HasDirectMove, IsLittleEndian] in { + def : Pat<(i64 (extractelt v2i64:$A, 0)), + (i64 (MFVSRLD $A))>; + // Better way to build integer vectors if we have MTVSRDD. Little endian. + def : Pat<(v2i64 (build_vector i64:$rA, i64:$rB)), + (v2i64 (MTVSRDD $rB, $rA))>; + def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), + (VMRGOW (COPY_TO_REGCLASS (MTVSRDD AnyExts.D, AnyExts.B), VSRC), + (COPY_TO_REGCLASS (MTVSRDD AnyExts.C, AnyExts.A), VSRC))>; + } } Index: lib/Target/PowerPC/PPCMIPeephole.cpp =================================================================== --- lib/Target/PowerPC/PPCMIPeephole.cpp +++ lib/Target/PowerPC/PPCMIPeephole.cpp @@ -125,6 +125,37 @@ && TargetRegisterInfo::isVirtualRegister(TrueReg1)) { MachineInstr *DefMI = MRI->getVRegDef(TrueReg1); + // If this is a splat fed by a splatting load, the splat is + // redundant. Remove it. This doesn't happen directly due to + // code in PPCDAGToDAGISel.cpp, but it can happen when converting + // a load of a double to a vector of 64-bit integers. + auto isConversionOfLoadAndSplat = [=]() -> bool { + if (DefMI->getOpcode() != PPC::XVCVDPSXDS && + DefMI->getOpcode() != PPC::XVCVDPUXDS) + return false; + unsigned DefReg = lookThruCopyLike(DefMI->getOperand(1).getReg()); + if (TargetRegisterInfo::isVirtualRegister(DefReg)) { + MachineInstr *LoadMI = MRI->getVRegDef(DefReg); + if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX) + return true; + } + return false; + }; + if (DefMI && (Immed == 0 || Immed == 3)) { + if (DefMI->getOpcode() == PPC::LXVDSX || + isConversionOfLoadAndSplat()) { + DEBUG(dbgs() + << "Optimizing load-and-splat/splat " + "to load-and-splat/copy: "); + DEBUG(MI.dump()); + BuildMI(MBB, &MI, MI.getDebugLoc(), + TII->get(PPC::COPY), MI.getOperand(0).getReg()) + .addOperand(MI.getOperand(1)); + ToErase = &MI; + Simplified = true; + } + } + // If this is a splat or a swap fed by another splat, we // can replace it with a copy. if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) { @@ -190,14 +221,29 @@ unsigned MyOpcode = MI.getOpcode(); unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2; unsigned TrueReg = lookThruCopyLike(MI.getOperand(OpNo).getReg()); + if (!TargetRegisterInfo::isVirtualRegister(TrueReg)) + break; MachineInstr *DefMI = MRI->getVRegDef(TrueReg); if (!DefMI) break; unsigned DefOpcode = DefMI->getOpcode(); + auto isConvertOfSplat = [=]() -> bool { + if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS) + return false; + unsigned ConvReg = DefMI->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(ConvReg)) + return false; + MachineInstr *Splt = MRI->getVRegDef(ConvReg); + return Splt && (Splt->getOpcode() == PPC::LXVWSX || + Splt->getOpcode() == PPC::XXSPLTW); + }; bool SameOpcode = (MyOpcode == DefOpcode) || (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) || (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) || - (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs); + (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) || + (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) || + (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)|| + (MyOpcode == PPC::XXSPLTW && isConvertOfSplat()); // Splat fed by another splat - switch the output of the first // and remove the second. if (SameOpcode) { @@ -234,6 +280,69 @@ } break; } + case PPC::XVCVDPSP: { + // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant. + unsigned TrueReg = lookThruCopyLike(MI.getOperand(1).getReg()); + if (!TargetRegisterInfo::isVirtualRegister(TrueReg)) + break; + MachineInstr *DefMI = MRI->getVRegDef(TrueReg); + + // This can occur when building a vector of single precision or integer + // values. + if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) { + unsigned DefsReg1 = lookThruCopyLike(DefMI->getOperand(1).getReg()); + unsigned DefsReg2 = lookThruCopyLike(DefMI->getOperand(2).getReg()); + if (!TargetRegisterInfo::isVirtualRegister(DefsReg1) || + !TargetRegisterInfo::isVirtualRegister(DefsReg2)) + break; + MachineInstr *P1 = MRI->getVRegDef(DefsReg1); + MachineInstr *P2 = MRI->getVRegDef(DefsReg2); + if (P1 && P2 && + (P1->getOpcode() == PPC::FRSP || P2->getOpcode() == PPC::FRSP)) { + Simplified = true; + // Look at all the uses of the register that the FRSP's define. + if (P1->getOpcode() == PPC::FRSP && + MRI->hasOneNonDBGUse(P1->getOperand(0).getReg())) { + unsigned ConvReg1 = P1->getOperand(1).getReg(); + unsigned FRSP1Defines = P1->getOperand(0).getReg(); + for (MachineInstr &I : MRI->use_instructions(FRSP1Defines)) { + for (int i = 0, e = I.getNumOperands(); i < e; i++) + if (I.getOperand(i).isReg() && + I.getOperand(i).getReg() == FRSP1Defines) + I.getOperand(i).setReg(ConvReg1); + } + DEBUG(dbgs() << "Removing redundant FRSP:\n"); + DEBUG(P1->dump()); + DEBUG(dbgs() << "As it feeds instruction:\n"); + DEBUG(MI.dump()); + DEBUG(dbgs() << "Through instruction:\n"); + DEBUG(DefMI->dump()); + P1->eraseFromParent(); + } + if (P1 == P2) + break; + if (P2->getOpcode() == PPC::FRSP && + MRI->hasOneNonDBGUse(P2->getOperand(0).getReg())) { + unsigned ConvReg2 = P2->getOperand(1).getReg(); + unsigned FRSP2Defines = P2->getOperand(0).getReg(); + for (MachineInstr &I : MRI->use_instructions(FRSP2Defines)) { + for (int i = 0, e = I.getNumOperands(); i < e; i++) + if (I.getOperand(i).isReg() && + I.getOperand(i).getReg() == FRSP2Defines) + I.getOperand(i).setReg(ConvReg2); + } + DEBUG(dbgs() << "Removing redundant FRSP:\n"); + DEBUG(P2->dump()); + DEBUG(dbgs() << "As it feeds instruction:\n"); + DEBUG(MI.dump()); + DEBUG(dbgs() << "Through instruction:\n"); + DEBUG(DefMI->dump()); + P2->eraseFromParent(); + } + } + } + break; + } } } Index: test/CodeGen/PowerPC/build-vector-tests.ll =================================================================== --- test/CodeGen/PowerPC/build-vector-tests.ll +++ test/CodeGen/PowerPC/build-vector-tests.ll @@ -0,0 +1,4858 @@ +; RUN: llc -mcpu=pwr9 -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-unknown \ +; RUN: < %s | FileCheck %s -check-prefix=P9BE -implicit-check-not frsp +; RUN: llc -mcpu=pwr9 -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-unknown \ +; RUN: < %s | FileCheck %s -check-prefix=P9LE -implicit-check-not frsp +; RUN: llc -mcpu=pwr8 -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-unknown \ +; RUN: < %s | FileCheck %s -check-prefix=P8BE -implicit-check-not frsp +; RUN: llc -mcpu=pwr8 -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-unknown \ +; RUN: < %s | FileCheck %s -check-prefix=P8LE -implicit-check-not frsp + +; This test case comes from the following C test case (included as it may be +; slightly more readable than the LLVM IR. + +;/* This test case provides various ways of building vectors to ensure we +; produce optimal code for all cases. The cases are (for each type): +; - All zeros +; - All ones +; - Splat of a constant +; - From different values already in registers +; - From different constants +; - From different values in memory +; - Splat of a value in register +; - Splat of a value in memory +; - Inserting element into existing vector +; - Inserting element from existing vector into existing vector +; +; With conversions (float <-> int) +; - Splat of a constant +; - From different values already in registers +; - From different constants +; - From different values in memory +; - Splat of a value in register +; - Splat of a value in memory +; - Inserting element into existing vector +; - Inserting element from existing vector into existing vector +;*/ +; +;/*=================================== int ===================================*/ +;// P8: xxlxor // +;// P9: xxlxor // +;vector int allZeroi() { // +; return (vector int)0; // +;} // +;// P8: vspltisb -1 // +;// P9: xxspltisb 255 // +;vector int allOnei() { // +; return (vector int)-1; // +;} // +;// P8: vspltisw 1 // +;// P9: vspltisw 1 // +;vector int spltConst1i() { // +; return (vector int)1; // +;} // +;// P8: vspltisw -15; vsrw // +;// P9: vspltisw -15; vsrw // +;vector int spltConst16ki() { // +; return (vector int)((1<<15) - 1); // +;} // +;// P8: vspltisw -16; vsrw // +;// P9: vspltisw -16; vsrw // +;vector int spltConst32ki() { // +; return (vector int)((1<<16) - 1); // +;} // +;// P8: 4 x mtvsrwz, 2 x xxmrgh, vmrgow // +;// P9: 2 x mtvsrdd, vmrgow // +;vector int fromRegsi(int a, int b, int c, int d) { // +; return (vector int){ a, b, c, d }; // +;} // +;// P8: lxvd2x, xxswapd // +;// P9: lxvx (or even lxv) // +;vector int fromDiffConstsi() { // +; return (vector int) { 242, -113, 889, 19 }; // +;} // +;// P8: lxvd2x, xxswapd // +;// P9: lxvx // +;vector int fromDiffMemConsAi(int *arr) { // +; return (vector int) { arr[0], arr[1], arr[2], arr[3] }; // +;} // +;// P8: 2 x lxvd2x, 2 x xxswapd, vperm // +;// P9: 2 x lxvx, vperm // +;vector int fromDiffMemConsDi(int *arr) { // +; return (vector int) { arr[3], arr[2], arr[1], arr[0] }; // +;} // +;// P8: sldi 2, lxvd2x, xxswapd // +;// P9: sldi 2, lxvx // +;vector int fromDiffMemVarAi(int *arr, int elem) { // +; return (vector int) { arr[elem], arr[elem+1], arr[elem+2], arr[elem+3] }; // +;} // +;// P8: sldi 2, 2 x lxvd2x, 2 x xxswapd, vperm // +;// P9: sldi 2, 2 x lxvx, vperm // +;vector int fromDiffMemVarDi(int *arr, int elem) { // +; return (vector int) { arr[elem], arr[elem-1], arr[elem-2], arr[elem-3] }; // +;} // +;// P8: 4 x lwz, 4 x mtvsrwz, 2 x xxmrghd, vmrgow // +;// P9: 4 x lwz, 2 x mtvsrdd, vmrgow // +;vector int fromRandMemConsi(int *arr) { // +; return (vector int) { arr[4], arr[18], arr[2], arr[88] }; // +;} // +;// P8: sldi 2, 4 x lwz, 4 x mtvsrwz, 2 x xxmrghd, vmrgow // +;// P9: sldi 2, add, 4 x lwz, 2 x mtvsrdd, vmrgow // +;vector int fromRandMemVari(int *arr, int elem) { // +; return (vector int) { arr[elem+4], arr[elem+1], arr[elem+2], arr[elem+8] };// +;} // +;// P8: mtvsrwz, xxspltw // +;// P9: mtvsrws // +;vector int spltRegVali(int val) { // +; return (vector int) val; // +;} // +;// P8: lxsiwax, xxspltw // +;// P9: lxvwsx // +;vector int spltMemVali(int *ptr) { // +; return (vector int)*ptr; // +;} // +;// P8: vspltisw // +;// P9: vspltisw // +;vector int spltCnstConvftoi() { // +; return (vector int) 4.74f; // +;} // +;// P8: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // +;// P9: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvdpsxws // +;vector int fromRegsConvftoi(float a, float b, float c, float d) { // +; return (vector int) { a, b, c, d }; // +;} // +;// P8: lxvd2x, xxswapd // +;// P9: lxvx (even lxv) // +;vector int fromDiffConstsConvftoi() { // +; return (vector int) { 24.46f, 234.f, 988.19f, 422.39f }; // +;} // +;// P8: lxvd2x, xxswapd, xvcvspsxws // +;// P9: lxvx, xvcvspsxws // +;vector int fromDiffMemConsAConvftoi(float *ptr) { // +; return (vector int) { ptr[0], ptr[1], ptr[2], ptr[3] }; // +;} // +;// P8: 2 x lxvd2x, 2 x xxswapd, vperm, xvcvspsxws // +;// P9: 2 x lxvx, vperm, xvcvspsxws // +;vector int fromDiffMemConsDConvftoi(float *ptr) { // +; return (vector int) { ptr[3], ptr[2], ptr[1], ptr[0] }; // +;} // +;// P8: 4 x lxsspx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // +;// P9: 4 x lxssp, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // +;// Note: if the consecutive loads learns to handle pre-inc, this can be: // +;// sldi 2, load, xvcvspuxws // +;vector int fromDiffMemVarAConvftoi(float *arr, int elem) { // +; return (vector int) { arr[elem], arr[elem+1], arr[elem+2], arr[elem+3] }; // +;} // +;// P8: 4 x lxsspx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // +;// P9: 4 x lxssp, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // +;// Note: if the consecutive loads learns to handle pre-inc, this can be: // +;// sldi 2, 2 x load, vperm, xvcvspuxws // +;vector int fromDiffMemVarDConvftoi(float *arr, int elem) { // +; return (vector int) { arr[elem], arr[elem-1], arr[elem-2], arr[elem-3] }; // +;} // +;// P8: xscvdpsxws, xxspltw // +;// P9: xscvdpsxws, xxspltw // +;vector int spltRegValConvftoi(float val) { // +; return (vector int) val; // +;} // +;// P8: lxsspx, xscvdpsxws, xxspltw // +;// P9: lxvwsx, xvcvspsxws // +;vector int spltMemValConvftoi(float *ptr) { // +; return (vector int)*ptr; // +;} // +;// P8: vspltisw // +;// P9: vspltisw // +;vector int spltCnstConvdtoi() { // +; return (vector int) 4.74; // +;} // +;// P8: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // +;// P9: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // +;vector int fromRegsConvdtoi(double a, double b, double c, double d) { // +; return (vector int) { a, b, c, d }; // +;} // +;// P8: lxvd2x, xxswapd // +;// P9: lxvx (even lxv) // +;vector int fromDiffConstsConvdtoi() { // +; return (vector int) { 24.46, 234., 988.19, 422.39 }; // +;} // +;// P8: 2 x lxvd2x, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvdpsp, vmrgew, // +;// xvcvspsxws // +;// P9: 2 x lxvx, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvdpsp, vmrgew, // +;// xvcvspsxws // +;vector int fromDiffMemConsAConvdtoi(double *ptr) { // +; return (vector int) { ptr[0], ptr[1], ptr[2], ptr[3] }; // +;} // +;// P8: 4 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // +;// P9: 4 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // +;vector int fromDiffMemConsDConvdtoi(double *ptr) { // +; return (vector int) { ptr[3], ptr[2], ptr[1], ptr[0] }; // +;} // +;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // +;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // +;vector int fromDiffMemVarAConvdtoi(double *arr, int elem) { // +; return (vector int) { arr[elem], arr[elem+1], arr[elem+2], arr[elem+3] }; // +;} // +;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // +;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // +;vector int fromDiffMemVarDConvdtoi(double *arr, int elem) { // +; return (vector int) { arr[elem], arr[elem-1], arr[elem-2], arr[elem-3] }; // +;} // +;// P8: xscvdpsxws, xxspltw // +;// P9: xscvdpsxws, xxspltw // +;vector int spltRegValConvdtoi(double val) { // +; return (vector int) val; // +;} // +;// P8: lxsdx, xscvdpsxws, xxspltw // +;// P9: lxssp, xscvdpsxws, xxspltw // +;vector int spltMemValConvdtoi(double *ptr) { // +; return (vector int)*ptr; // +;} // +;/*=================================== int ===================================*/ +;/*=============================== unsigned int ==============================*/ +;// P8: xxlxor // +;// P9: xxlxor // +;vector unsigned int allZeroui() { // +; return (vector unsigned int)0; // +;} // +;// P8: vspltisb -1 // +;// P9: xxspltisb 255 // +;vector unsigned int allOneui() { // +; return (vector unsigned int)-1; // +;} // +;// P8: vspltisw 1 // +;// P9: vspltisw 1 // +;vector unsigned int spltConst1ui() { // +; return (vector unsigned int)1; // +;} // +;// P8: vspltisw -15; vsrw // +;// P9: vspltisw -15; vsrw // +;vector unsigned int spltConst16kui() { // +; return (vector unsigned int)((1<<15) - 1); // +;} // +;// P8: vspltisw -16; vsrw // +;// P9: vspltisw -16; vsrw // +;vector unsigned int spltConst32kui() { // +; return (vector unsigned int)((1<<16) - 1); // +;} // +;// P8: 4 x mtvsrwz, 2 x xxmrghd, vmrgow // +;// P9: 2 x mtvsrdd, vmrgow // +;vector unsigned int fromRegsui(unsigned int a, unsigned int b, // +; unsigned int c, unsigned int d) { // +; return (vector unsigned int){ a, b, c, d }; // +;} // +;// P8: lxvd2x, xxswapd // +;// P9: lxvx (or even lxv) // +;vector unsigned int fromDiffConstsui() { // +; return (vector unsigned int) { 242, -113, 889, 19 }; // +;} // +;// P8: lxvd2x, xxswapd // +;// P9: lxvx // +;vector unsigned int fromDiffMemConsAui(unsigned int *arr) { // +; return (vector unsigned int) { arr[0], arr[1], arr[2], arr[3] }; // +;} // +;// P8: 2 x lxvd2x, 2 x xxswapd, vperm // +;// P9: 2 x lxvx, vperm // +;vector unsigned int fromDiffMemConsDui(unsigned int *arr) { // +; return (vector unsigned int) { arr[3], arr[2], arr[1], arr[0] }; // +;} // +;// P8: sldi 2, lxvd2x, xxswapd // +;// P9: sldi 2, lxvx // +;vector unsigned int fromDiffMemVarAui(unsigned int *arr, int elem) { // +; return (vector unsigned int) { arr[elem], arr[elem+1], // +; arr[elem+2], arr[elem+3] }; // +;} // +;// P8: sldi 2, 2 x lxvd2x, 2 x xxswapd, vperm // +;// P9: sldi 2, 2 x lxvx, vperm // +;vector unsigned int fromDiffMemVarDui(unsigned int *arr, int elem) { // +; return (vector unsigned int) { arr[elem], arr[elem-1], // +; arr[elem-2], arr[elem-3] }; // +;} // +;// P8: 4 x lwz, 4 x mtvsrwz, 2 x xxmrghd, vmrgow // +;// P9: 4 x lwz, 2 x mtvsrdd, vmrgow // +;vector unsigned int fromRandMemConsui(unsigned int *arr) { // +; return (vector unsigned int) { arr[4], arr[18], arr[2], arr[88] }; // +;} // +;// P8: sldi 2, 4 x lwz, 4 x mtvsrwz, 2 x xxmrghd, vmrgow // +;// P9: sldi 2, add, 4 x lwz, 2 x mtvsrdd, vmrgow // +;vector unsigned int fromRandMemVarui(unsigned int *arr, int elem) { // +; return (vector unsigned int) { arr[elem+4], arr[elem+1], // +; arr[elem+2], arr[elem+8] }; // +;} // +;// P8: mtvsrwz, xxspltw // +;// P9: mtvsrws // +;vector unsigned int spltRegValui(unsigned int val) { // +; return (vector unsigned int) val; // +;} // +;// P8: lxsiwax, xxspltw // +;// P9: lxvwsx // +;vector unsigned int spltMemValui(unsigned int *ptr) { // +; return (vector unsigned int)*ptr; // +;} // +;// P8: vspltisw // +;// P9: vspltisw // +;vector unsigned int spltCnstConvftoui() { // +; return (vector unsigned int) 4.74f; // +;} // +;// P8: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // +;// P9: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // +;vector unsigned int fromRegsConvftoui(float a, float b, float c, float d) { // +; return (vector unsigned int) { a, b, c, d }; // +;} // +;// P8: lxvd2x, xxswapd // +;// P9: lxvx (even lxv) // +;vector unsigned int fromDiffConstsConvftoui() { // +; return (vector unsigned int) { 24.46f, 234.f, 988.19f, 422.39f }; // +;} // +;// P8: lxvd2x, xxswapd, xvcvspuxws // +;// P9: lxvx, xvcvspuxws // +;vector unsigned int fromDiffMemConsAConvftoui(float *ptr) { // +; return (vector unsigned int) { ptr[0], ptr[1], ptr[2], ptr[3] }; // +;} // +;// P8: 2 x lxvd2x, 2 x xxswapd, vperm, xvcvspuxws // +;// P9: 2 x lxvx, vperm, xvcvspuxws // +;vector unsigned int fromDiffMemConsDConvftoui(float *ptr) { // +; return (vector unsigned int) { ptr[3], ptr[2], ptr[1], ptr[0] }; // +;} // +;// P8: lfsux, 3 x lxsspx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // +;// P9: lfsux, 3 x lfs, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // +;// Note: if the consecutive loads learns to handle pre-inc, this can be: // +;// sldi 2, load, xvcvspuxws // +;vector unsigned int fromDiffMemVarAConvftoui(float *arr, int elem) { // +; return (vector unsigned int) { arr[elem], arr[elem+1], // +; arr[elem+2], arr[elem+3] }; // +;} // +;// P8: lfsux, 3 x lxsspx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // +;// P9: lfsux, 3 x lfs, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // +;// Note: if the consecutive loads learns to handle pre-inc, this can be: // +;// sldi 2, 2 x load, vperm, xvcvspuxws // +;vector unsigned int fromDiffMemVarDConvftoui(float *arr, int elem) { // +; return (vector unsigned int) { arr[elem], arr[elem-1], // +; arr[elem-2], arr[elem-3] }; // +;} // +;// P8: xscvdpuxws, xxspltw // +;// P9: xscvdpuxws, xxspltw // +;vector unsigned int spltRegValConvftoui(float val) { // +; return (vector unsigned int) val; // +;} // +;// P8: lxsspx, xscvdpuxws, xxspltw // +;// P9: lxvwsx, xvcvspuxws // +;vector unsigned int spltMemValConvftoui(float *ptr) { // +; return (vector unsigned int)*ptr; // +;} // +;// P8: vspltisw // +;// P9: vspltisw // +;vector unsigned int spltCnstConvdtoui() { // +; return (vector unsigned int) 4.74; // +;} // +;// P8: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // +;// P9: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // +;vector unsigned int fromRegsConvdtoui(double a, double b, // +; double c, double d) { // +; return (vector unsigned int) { a, b, c, d }; // +;} // +;// P8: lxvd2x, xxswapd // +;// P9: lxvx (even lxv) // +;vector unsigned int fromDiffConstsConvdtoui() { // +; return (vector unsigned int) { 24.46, 234., 988.19, 422.39 }; // +;} // +;// P8: 2 x lxvd2x, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvdpsp, vmrgew, // +;// xvcvspuxws // +;// P9: 2 x lxvx, xxmrgld, xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // +;vector unsigned int fromDiffMemConsAConvdtoui(double *ptr) { // +; return (vector unsigned int) { ptr[0], ptr[1], ptr[2], ptr[3] }; // +;} // +;// P8: 4 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // +;// P9: 4 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // +;vector unsigned int fromDiffMemConsDConvdtoui(double *ptr) { // +; return (vector unsigned int) { ptr[3], ptr[2], ptr[1], ptr[0] }; // +;} // +;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // +;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // +;vector unsigned int fromDiffMemVarAConvdtoui(double *arr, int elem) { // +; return (vector unsigned int) { arr[elem], arr[elem+1], // +; arr[elem+2], arr[elem+3] }; // +;} // +;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // +;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // +;vector unsigned int fromDiffMemVarDConvdtoui(double *arr, int elem) { // +; return (vector unsigned int) { arr[elem], arr[elem-1], // +; arr[elem-2], arr[elem-3] }; // +;} // +;// P8: xscvdpuxws, xxspltw // +;// P9: xscvdpuxws, xxspltw // +;vector unsigned int spltRegValConvdtoui(double val) { // +; return (vector unsigned int) val; // +;} // +;// P8: lxsspx, xscvdpuxws, xxspltw // +;// P9: lfd, xscvdpuxws, xxspltw // +;vector unsigned int spltMemValConvdtoui(double *ptr) { // +; return (vector unsigned int)*ptr; // +;} // +;/*=============================== unsigned int ==============================*/ +;/*=============================== long long =================================*/ +;// P8: xxlxor // +;// P9: xxlxor // +;vector long long allZeroll() { // +; return (vector long long)0; // +;} // +;// P8: vspltisb -1 // +;// P9: xxspltisb 255 // +;vector long long allOnell() { // +; return (vector long long)-1; // +;} // +;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) // +;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) // +;vector long long spltConst1ll() { // +; return (vector long long)1; // +;} // +;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) // +;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) // +;vector long long spltConst16kll() { // +; return (vector long long)((1<<15) - 1); // +;} // +;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) // +;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) // +;vector long long spltConst32kll() { // +; return (vector long long)((1<<16) - 1); // +;} // +;// P8: 2 x mtvsrd, xxmrghd // +;// P9: mtvsrdd // +;vector long long fromRegsll(long long a, long long b) { // +; return (vector long long){ a, b }; // +;} // +;// P8: lxvd2x, xxswapd // +;// P9: lxvx (or even lxv) // +;vector long long fromDiffConstsll() { // +; return (vector long long) { 242, -113 }; // +;} // +;// P8: lxvd2x, xxswapd // +;// P9: lxvx // +;vector long long fromDiffMemConsAll(long long *arr) { // +; return (vector long long) { arr[0], arr[1] }; // +;} // +;// P8: lxvd2x // +;// P9: lxvx, xxswapd (maybe just use lxvd2x) // +;vector long long fromDiffMemConsDll(long long *arr) { // +; return (vector long long) { arr[3], arr[2] }; // +;} // +;// P8: sldi 3, lxvd2x, xxswapd // +;// P9: sldi 3, lxvx // +;vector long long fromDiffMemVarAll(long long *arr, int elem) { // +; return (vector long long) { arr[elem], arr[elem+1] }; // +;} // +;// P8: sldi 3, lxvd2x // +;// P9: sldi 3, lxvx, xxswapd (maybe just use lxvd2x) // +;vector long long fromDiffMemVarDll(long long *arr, int elem) { // +; return (vector long long) { arr[elem], arr[elem-1] }; // +;} // +;// P8: 2 x ld, 2 x mtvsrd, xxmrghd // +;// P9: 2 x ld, mtvsrdd // +;vector long long fromRandMemConsll(long long *arr) { // +; return (vector long long) { arr[4], arr[18] }; // +;} // +;// P8: sldi 3, add, 2 x ld, 2 x mtvsrd, xxmrghd // +;// P9: sldi 3, add, 2 x ld, mtvsrdd // +;vector long long fromRandMemVarll(long long *arr, int elem) { // +; return (vector long long) { arr[elem+4], arr[elem+1] }; // +;} // +;// P8: mtvsrd, xxspltd // +;// P9: mtvsrdd // +;vector long long spltRegValll(long long val) { // +; return (vector long long) val; // +;} // +;// P8: lxvdsx // +;// P9: lxvdsx // +;vector long long spltMemValll(long long *ptr) { // +; return (vector long long)*ptr; // +;} // +;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) // +;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) // +;vector long long spltCnstConvftoll() { // +; return (vector long long) 4.74f; // +;} // +;// P8: xxmrghd, xvcvdpsxds // +;// P9: xxmrghd, xvcvdpsxds // +;vector long long fromRegsConvftoll(float a, float b) { // +; return (vector long long) { a, b }; // +;} // +;// P8: lxvd2x, xxswapd // +;// P9: lxvx (even lxv) // +;vector long long fromDiffConstsConvftoll() { // +; return (vector long long) { 24.46f, 234.f }; // +;} // +;// P8: 2 x lxsspx, xxmrghd, xvcvdpsxds // +;// P9: 2 x lxssp, xxmrghd, xvcvdpsxds // +;vector long long fromDiffMemConsAConvftoll(float *ptr) { // +; return (vector long long) { ptr[0], ptr[1] }; // +;} // +;// P8: 2 x lxsspx, xxmrghd, xvcvdpsxds // +;// P9: 2 x lxssp, xxmrghd, xvcvdpsxds // +;vector long long fromDiffMemConsDConvftoll(float *ptr) { // +; return (vector long long) { ptr[3], ptr[2] }; // +;} // +;// P8: sldi 2, lfsux, lxsspx, xxmrghd, xvcvdpsxds // +;// P9: sldi 2, lfsux, lfs, xxmrghd, xvcvdpsxds // +;vector long long fromDiffMemVarAConvftoll(float *arr, int elem) { // +; return (vector long long) { arr[elem], arr[elem+1] }; // +;} // +;// P8: sldi 2, lfsux, lxsspx, xxmrghd, xvcvdpsxds // +;// P9: sldi 2, lfsux, lfs, xxmrghd, xvcvdpsxds // +;vector long long fromDiffMemVarDConvftoll(float *arr, int elem) { // +; return (vector long long) { arr[elem], arr[elem-1] }; // +;} // +;// P8: xscvdpsxds, xxspltd // +;// P9: xscvdpsxds, xxspltd // +;vector long long spltRegValConvftoll(float val) { // +; return (vector long long) val; // +;} // +;// P8: lxsspx, xscvdpsxds, xxspltd // +;// P9: lfs, xscvdpsxds, xxspltd // +;vector long long spltMemValConvftoll(float *ptr) { // +; return (vector long long)*ptr; // +;} // +;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) // +;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) // +;vector long long spltCnstConvdtoll() { // +; return (vector long long) 4.74; // +;} // +;// P8: xxmrghd, xvcvdpsxds // +;// P9: xxmrghd, xvcvdpsxds // +;vector long long fromRegsConvdtoll(double a, double b) { // +; return (vector long long) { a, b }; // +;} // +;// P8: lxvd2x, xxswapd // +;// P9: lxvx (even lxv) // +;vector long long fromDiffConstsConvdtoll() { // +; return (vector long long) { 24.46, 234. }; // +;} // +;// P8: lxvd2x, xxswapd, xvcvdpsxds // +;// P9: lxvx, xvcvdpsxds // +;vector long long fromDiffMemConsAConvdtoll(double *ptr) { // +; return (vector long long) { ptr[0], ptr[1] }; // +;} // +;// P8: lxvd2x, xvcvdpsxds // +;// P9: lxvx, xxswapd, xvcvdpsxds // +;vector long long fromDiffMemConsDConvdtoll(double *ptr) { // +; return (vector long long) { ptr[3], ptr[2] }; // +;} // +;// P8: sldi 3, lxvd2x, xxswapd, xvcvdpsxds // +;// P9: sldi 3, lxvx, xvcvdpsxds // +;vector long long fromDiffMemVarAConvdtoll(double *arr, int elem) { // +; return (vector long long) { arr[elem], arr[elem+1] }; // +;} // +;// P8: sldi 3, lxvd2x, xvcvdpsxds // +;// P9: sldi 3, lxvx, xxswapd, xvcvdpsxds // +;vector long long fromDiffMemVarDConvdtoll(double *arr, int elem) { // +; return (vector long long) { arr[elem], arr[elem-1] }; // +;} // +;// P8: xscvdpsxds, xxspltd // +;// P9: xscvdpsxds, xxspltd // +;vector long long spltRegValConvdtoll(double val) { // +; return (vector long long) val; // +;} // +;// P8: lxvdsx, xvcvdpsxds // +;// P9: lxvdsx, xvcvdpsxds // +;vector long long spltMemValConvdtoll(double *ptr) { // +; return (vector long long)*ptr; // +;} // +;/*=============================== long long =================================*/ +;/*========================== unsigned long long =============================*/ +;// P8: xxlxor // +;// P9: xxlxor // +;vector unsigned long long allZeroull() { // +; return (vector unsigned long long)0; // +;} // +;// P8: vspltisb -1 // +;// P9: xxspltisb 255 // +;vector unsigned long long allOneull() { // +; return (vector unsigned long long)-1; // +;} // +;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) // +;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) // +;vector unsigned long long spltConst1ull() { // +; return (vector unsigned long long)1; // +;} // +;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) // +;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) // +;vector unsigned long long spltConst16kull() { // +; return (vector unsigned long long)((1<<15) - 1); // +;} // +;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) // +;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) // +;vector unsigned long long spltConst32kull() { // +; return (vector unsigned long long)((1<<16) - 1); // +;} // +;// P8: 2 x mtvsrd, xxmrghd // +;// P9: mtvsrdd // +;vector unsigned long long fromRegsull(unsigned long long a, // +; unsigned long long b) { // +; return (vector unsigned long long){ a, b }; // +;} // +;// P8: lxvd2x, xxswapd // +;// P9: lxvx (or even lxv) // +;vector unsigned long long fromDiffConstsull() { // +; return (vector unsigned long long) { 242, -113 }; // +;} // +;// P8: lxvd2x, xxswapd // +;// P9: lxvx // +;vector unsigned long long fromDiffMemConsAull(unsigned long long *arr) { // +; return (vector unsigned long long) { arr[0], arr[1] }; // +;} // +;// P8: lxvd2x // +;// P9: lxvx, xxswapd (maybe just use lxvd2x) // +;vector unsigned long long fromDiffMemConsDull(unsigned long long *arr) { // +; return (vector unsigned long long) { arr[3], arr[2] }; // +;} // +;// P8: sldi 3, lxvd2x, xxswapd // +;// P9: sldi 3, lxvx // +;vector unsigned long long fromDiffMemVarAull(unsigned long long *arr, // +; int elem) { // +; return (vector unsigned long long) { arr[elem], arr[elem+1] }; // +;} // +;// P8: sldi 3, lxvd2x // +;// P9: sldi 3, lxvx, xxswapd (maybe just use lxvd2x) // +;vector unsigned long long fromDiffMemVarDull(unsigned long long *arr, // +; int elem) { // +; return (vector unsigned long long) { arr[elem], arr[elem-1] }; // +;} // +;// P8: 2 x ld, 2 x mtvsrd, xxmrghd // +;// P9: 2 x ld, mtvsrdd // +;vector unsigned long long fromRandMemConsull(unsigned long long *arr) { // +; return (vector unsigned long long) { arr[4], arr[18] }; // +;} // +;// P8: sldi 3, add, 2 x ld, 2 x mtvsrd, xxmrghd // +;// P9: sldi 3, add, 2 x ld, mtvsrdd // +;vector unsigned long long fromRandMemVarull(unsigned long long *arr, // +; int elem) { // +; return (vector unsigned long long) { arr[elem+4], arr[elem+1] }; // +;} // +;// P8: mtvsrd, xxspltd // +;// P9: mtvsrdd // +;vector unsigned long long spltRegValull(unsigned long long val) { // +; return (vector unsigned long long) val; // +;} // +;// P8: lxvdsx // +;// P9: lxvdsx // +;vector unsigned long long spltMemValull(unsigned long long *ptr) { // +; return (vector unsigned long long)*ptr; // +;} // +;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) // +;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) // +;vector unsigned long long spltCnstConvftoull() { // +; return (vector unsigned long long) 4.74f; // +;} // +;// P8: xxmrghd, xvcvdpuxds // +;// P9: xxmrghd, xvcvdpuxds // +;vector unsigned long long fromRegsConvftoull(float a, float b) { // +; return (vector unsigned long long) { a, b }; // +;} // +;// P8: lxvd2x, xxswapd // +;// P9: lxvx (even lxv) // +;vector unsigned long long fromDiffConstsConvftoull() { // +; return (vector unsigned long long) { 24.46f, 234.f }; // +;} // +;// P8: 2 x lxsspx, xxmrghd, xvcvdpuxds // +;// P9: 2 x lxssp, xxmrghd, xvcvdpuxds // +;vector unsigned long long fromDiffMemConsAConvftoull(float *ptr) { // +; return (vector unsigned long long) { ptr[0], ptr[1] }; // +;} // +;// P8: 2 x lxsspx, xxmrghd, xvcvdpuxds // +;// P9: 2 x lxssp, xxmrghd, xvcvdpuxds // +;vector unsigned long long fromDiffMemConsDConvftoull(float *ptr) { // +; return (vector unsigned long long) { ptr[3], ptr[2] }; // +;} // +;// P8: sldi 2, lfsux, lxsspx, xxmrghd, xvcvdpuxds // +;// P9: sldi 2, lfsux, lfs, xxmrghd, xvcvdpuxds // +;vector unsigned long long fromDiffMemVarAConvftoull(float *arr, int elem) { // +; return (vector unsigned long long) { arr[elem], arr[elem+1] }; // +;} // +;// P8: sldi 2, lfsux, lxsspx, xxmrghd, xvcvdpuxds // +;// P9: sldi 2, lfsux, lfs, xxmrghd, xvcvdpuxds // +;vector unsigned long long fromDiffMemVarDConvftoull(float *arr, int elem) { // +; return (vector unsigned long long) { arr[elem], arr[elem-1] }; // +;} // +;// P8: xscvdpuxds, xxspltd // +;// P9: xscvdpuxds, xxspltd // +;vector unsigned long long spltRegValConvftoull(float val) { // +; return (vector unsigned long long) val; // +;} // +;// P8: lxsspx, xscvdpuxds, xxspltd // +;// P9: lfs, xscvdpuxds, xxspltd // +;vector unsigned long long spltMemValConvftoull(float *ptr) { // +; return (vector unsigned long long)*ptr; // +;} // +;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) // +;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) // +;vector unsigned long long spltCnstConvdtoull() { // +; return (vector unsigned long long) 4.74; // +;} // +;// P8: xxmrghd, xvcvdpuxds // +;// P9: xxmrghd, xvcvdpuxds // +;vector unsigned long long fromRegsConvdtoull(double a, double b) { // +; return (vector unsigned long long) { a, b }; // +;} // +;// P8: lxvd2x, xxswapd // +;// P9: lxvx (even lxv) // +;vector unsigned long long fromDiffConstsConvdtoull() { // +; return (vector unsigned long long) { 24.46, 234. }; // +;} // +;// P8: lxvd2x, xxswapd, xvcvdpuxds // +;// P9: lxvx, xvcvdpuxds // +;vector unsigned long long fromDiffMemConsAConvdtoull(double *ptr) { // +; return (vector unsigned long long) { ptr[0], ptr[1] }; // +;} // +;// P8: lxvd2x, xvcvdpuxds // +;// P9: lxvx, xxswapd, xvcvdpuxds // +;vector unsigned long long fromDiffMemConsDConvdtoull(double *ptr) { // +; return (vector unsigned long long) { ptr[3], ptr[2] }; // +;} // +;// P8: sldi 3, lxvd2x, xxswapd, xvcvdpuxds // +;// P9: sldi 3, lxvx, xvcvdpuxds // +;vector unsigned long long fromDiffMemVarAConvdtoull(double *arr, int elem) { // +; return (vector unsigned long long) { arr[elem], arr[elem+1] }; // +;} // +;// P8: sldi 3, lxvd2x, xvcvdpuxds // +;// P9: sldi 3, lxvx, xxswapd, xvcvdpuxds // +;vector unsigned long long fromDiffMemVarDConvdtoull(double *arr, int elem) { // +; return (vector unsigned long long) { arr[elem], arr[elem-1] }; // +;} // +;// P8: xscvdpuxds, xxspltd // +;// P9: xscvdpuxds, xxspltd // +;vector unsigned long long spltRegValConvdtoull(double val) { // +; return (vector unsigned long long) val; // +;} // +;// P8: lxvdsx, xvcvdpuxds // +;// P9: lxvdsx, xvcvdpuxds // +;vector unsigned long long spltMemValConvdtoull(double *ptr) { // +; return (vector unsigned long long)*ptr; // +;} // +;/*========================== unsigned long long ==============================*/ + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @allZeroi() { +entry: + ret <4 x i32> zeroinitializer +; P9BE-LABEL: allZeroi +; P9LE-LABEL: allZeroi +; P8BE-LABEL: allZeroi +; P8LE-LABEL: allZeroi +; P9BE: xxlxor v2, v2, v2 +; P9BE: blr +; P9LE: xxlxor v2, v2, v2 +; P9LE: blr +; P8BE: xxlxor v2, v2, v2 +; P8BE: blr +; P8LE: xxlxor v2, v2, v2 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @allOnei() { +entry: + ret <4 x i32> +; P9BE-LABEL: allOnei +; P9LE-LABEL: allOnei +; P8BE-LABEL: allOnei +; P8LE-LABEL: allOnei +; P9BE: xxspltib v2, 255 +; P9BE: blr +; P9LE: xxspltib v2, 255 +; P9LE: blr +; P8BE: vspltisb v2, -1 +; P8BE: blr +; P8LE: vspltisb v2, -1 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @spltConst1i() { +entry: + ret <4 x i32> +; P9BE-LABEL: spltConst1i +; P9LE-LABEL: spltConst1i +; P8BE-LABEL: spltConst1i +; P8LE-LABEL: spltConst1i +; P9BE: vspltisw v2, 1 +; P9BE: blr +; P9LE: vspltisw v2, 1 +; P9LE: blr +; P8BE: vspltisw v2, 1 +; P8BE: blr +; P8LE: vspltisw v2, 1 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @spltConst16ki() { +entry: + ret <4 x i32> +; P9BE-LABEL: spltConst16ki +; P9LE-LABEL: spltConst16ki +; P8BE-LABEL: spltConst16ki +; P8LE-LABEL: spltConst16ki +; P9BE: vspltisw v2, -15 +; P9BE: vsrw v2, v2, v2 +; P9BE: blr +; P9LE: vspltisw v2, -15 +; P9LE: vsrw v2, v2, v2 +; P9LE: blr +; P8BE: vspltisw v2, -15 +; P8BE: vsrw v2, v2, v2 +; P8BE: blr +; P8LE: vspltisw v2, -15 +; P8LE: vsrw v2, v2, v2 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @spltConst32ki() { +entry: + ret <4 x i32> +; P9BE-LABEL: spltConst32ki +; P9LE-LABEL: spltConst32ki +; P8BE-LABEL: spltConst32ki +; P8LE-LABEL: spltConst32ki +; P9BE: vspltisw v2, -16 +; P9BE: vsrw v2, v2, v2 +; P9BE: blr +; P9LE: vspltisw v2, -16 +; P9LE: vsrw v2, v2, v2 +; P9LE: blr +; P8BE: vspltisw v2, -16 +; P8BE: vsrw v2, v2, v2 +; P8BE: blr +; P8LE: vspltisw v2, -16 +; P8LE: vsrw v2, v2, v2 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @fromRegsi(i32 signext %a, i32 signext %b, i32 signext %c, i32 signext %d) { +entry: + %vecinit = insertelement <4 x i32> undef, i32 %a, i32 0 + %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1 + %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %c, i32 2 + %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %d, i32 3 + ret <4 x i32> %vecinit3 +; P9BE-LABEL: fromRegsi +; P9LE-LABEL: fromRegsi +; P8BE-LABEL: fromRegsi +; P8LE-LABEL: fromRegsi +; P9BE-DAG: mtvsrdd [[REG1:v[0-9]+]], r3, r5 +; P9BE-DAG: mtvsrdd [[REG2:v[0-9]+]], r4, r6 +; P9BE: vmrgow v2, [[REG1]], [[REG2]] +; P9BE: blr +; P9LE-DAG: mtvsrdd [[REG1:v[0-9]+]], r5, r3 +; P9LE-DAG: mtvsrdd [[REG2:v[0-9]+]], r6, r4 +; P9LE: vmrgow v2, [[REG2]], [[REG1]] +; P9LE: blr +; P8BE-DAG: mtvsrwz {{[vf]}}[[REG1:[0-9]+]], r3 +; P8BE-DAG: mtvsrwz {{[vf]}}[[REG2:[0-9]+]], r4 +; P8BE-DAG: mtvsrwz {{[vf]}}[[REG3:[0-9]+]], r5 +; P8BE-DAG: mtvsrwz {{[vf]}}[[REG4:[0-9]+]], r6 +; P8BE-DAG: xxmrghd [[REG5:v[0-9]+]], {{[v][s]*}}[[REG1]], {{[v][s]*}}[[REG3]] +; P8BE-DAG: xxmrghd [[REG6:v[0-9]+]], {{[v][s]*}}[[REG2]], {{[v][s]*}}[[REG4]] +; P8BE: vmrgow v2, [[REG5]], [[REG6]] +; P8LE-DAG: mtvsrwz {{[vf]}}[[REG1:[0-9]+]], r3 +; P8LE-DAG: mtvsrwz {{[vf]}}[[REG2:[0-9]+]], r4 +; P8LE-DAG: mtvsrwz {{[vf]}}[[REG3:[0-9]+]], r5 +; P8LE-DAG: mtvsrwz {{[vf]}}[[REG4:[0-9]+]], r6 +; P8LE: xxmrghd [[REG5:v[0-9]+]], {{[v][s]*}}[[REG3]], {{[v][s]*}}[[REG1]] +; P8LE: xxmrghd [[REG6:v[0-9]+]], {{[v][s]*}}[[REG4]], {{[v][s]*}}[[REG2]] +; P8LE: vmrgow v2, [[REG6]], [[REG5]] +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @fromDiffConstsi() { +entry: + ret <4 x i32> +; P9BE-LABEL: fromDiffConstsi +; P9LE-LABEL: fromDiffConstsi +; P8BE-LABEL: fromDiffConstsi +; P8LE-LABEL: fromDiffConstsi +; P9BE: lxvx +; P9BE: blr +; P9LE: lxvx +; P9LE: blr +; P8BE: lxvw4x +; P8BE: blr +; P8LE: lxvd2x +; P8LE: xxswapd +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @fromDiffMemConsAi(i32* nocapture readonly %arr) { +entry: + %0 = load i32, i32* %arr, align 4 + %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0 + %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 1 + %1 = load i32, i32* %arrayidx1, align 4 + %vecinit2 = insertelement <4 x i32> %vecinit, i32 %1, i32 1 + %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 2 + %2 = load i32, i32* %arrayidx3, align 4 + %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %2, i32 2 + %arrayidx5 = getelementptr inbounds i32, i32* %arr, i64 3 + %3 = load i32, i32* %arrayidx5, align 4 + %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3 + ret <4 x i32> %vecinit6 +; P9BE-LABEL: fromDiffMemConsAi +; P9LE-LABEL: fromDiffMemConsAi +; P8BE-LABEL: fromDiffMemConsAi +; P8LE-LABEL: fromDiffMemConsAi +; P9BE: lxvx +; P9BE: blr +; P9LE: lxvx +; P9LE: blr +; P8BE: lxvw4x +; P8BE: blr +; P8LE: lxvd2x +; P8LE: xxswapd +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @fromDiffMemConsDi(i32* nocapture readonly %arr) { +entry: + %arrayidx = getelementptr inbounds i32, i32* %arr, i64 3 + %0 = load i32, i32* %arrayidx, align 4 + %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0 + %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 2 + %1 = load i32, i32* %arrayidx1, align 4 + %vecinit2 = insertelement <4 x i32> %vecinit, i32 %1, i32 1 + %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 1 + %2 = load i32, i32* %arrayidx3, align 4 + %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %2, i32 2 + %3 = load i32, i32* %arr, align 4 + %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3 + ret <4 x i32> %vecinit6 +; P9BE-LABEL: fromDiffMemConsDi +; P9LE-LABEL: fromDiffMemConsDi +; P8BE-LABEL: fromDiffMemConsDi +; P8LE-LABEL: fromDiffMemConsDi +; P9BE: lxvx +; P9BE: lxvx +; P9BE: vperm +; P9BE: blr +; P9LE: lxvx +; P9LE: lxvx +; P9LE: vperm +; P9LE: blr +; P8BE: lxvw4x +; P8BE: lxvw4x +; P8BE: vperm +; P8BE: blr +; P8LE: lxvd2x +; P8LE-DAG: lxvd2x +; P8LE-DAG: xxswapd +; P8LE: xxswapd +; P8LE: vperm +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @fromDiffMemVarAi(i32* nocapture readonly %arr, i32 signext %elem) { +entry: + %idxprom = sext i32 %elem to i64 + %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0 + %add = add nsw i32 %elem, 1 + %idxprom1 = sext i32 %add to i64 + %arrayidx2 = getelementptr inbounds i32, i32* %arr, i64 %idxprom1 + %1 = load i32, i32* %arrayidx2, align 4 + %vecinit3 = insertelement <4 x i32> %vecinit, i32 %1, i32 1 + %add4 = add nsw i32 %elem, 2 + %idxprom5 = sext i32 %add4 to i64 + %arrayidx6 = getelementptr inbounds i32, i32* %arr, i64 %idxprom5 + %2 = load i32, i32* %arrayidx6, align 4 + %vecinit7 = insertelement <4 x i32> %vecinit3, i32 %2, i32 2 + %add8 = add nsw i32 %elem, 3 + %idxprom9 = sext i32 %add8 to i64 + %arrayidx10 = getelementptr inbounds i32, i32* %arr, i64 %idxprom9 + %3 = load i32, i32* %arrayidx10, align 4 + %vecinit11 = insertelement <4 x i32> %vecinit7, i32 %3, i32 3 + ret <4 x i32> %vecinit11 +; P9BE-LABEL: fromDiffMemVarAi +; P9LE-LABEL: fromDiffMemVarAi +; P8BE-LABEL: fromDiffMemVarAi +; P8LE-LABEL: fromDiffMemVarAi +; P9BE: sldi r4, r4, 2 +; P9BE: lxvx v2, r3, r4 +; P9BE: blr +; P9LE: sldi r4, r4, 2 +; P9LE: lxvx v2, r3, r4 +; P9LE: blr +; P8BE: sldi r4, r4, 2 +; P8BE: lxvw4x {{[vs0-9]+}}, r3, r4 +; P8BE: blr +; P8LE: sldi r4, r4, 2 +; P8LE: lxvd2x {{[vs0-9]+}}, r3, r4 +; P8LE: xxswapd +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @fromDiffMemVarDi(i32* nocapture readonly %arr, i32 signext %elem) { +entry: + %idxprom = sext i32 %elem to i64 + %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0 + %sub = add nsw i32 %elem, -1 + %idxprom1 = sext i32 %sub to i64 + %arrayidx2 = getelementptr inbounds i32, i32* %arr, i64 %idxprom1 + %1 = load i32, i32* %arrayidx2, align 4 + %vecinit3 = insertelement <4 x i32> %vecinit, i32 %1, i32 1 + %sub4 = add nsw i32 %elem, -2 + %idxprom5 = sext i32 %sub4 to i64 + %arrayidx6 = getelementptr inbounds i32, i32* %arr, i64 %idxprom5 + %2 = load i32, i32* %arrayidx6, align 4 + %vecinit7 = insertelement <4 x i32> %vecinit3, i32 %2, i32 2 + %sub8 = add nsw i32 %elem, -3 + %idxprom9 = sext i32 %sub8 to i64 + %arrayidx10 = getelementptr inbounds i32, i32* %arr, i64 %idxprom9 + %3 = load i32, i32* %arrayidx10, align 4 + %vecinit11 = insertelement <4 x i32> %vecinit7, i32 %3, i32 3 + ret <4 x i32> %vecinit11 +; P9BE-LABEL: fromDiffMemVarDi +; P9LE-LABEL: fromDiffMemVarDi +; P8BE-LABEL: fromDiffMemVarDi +; P8LE-LABEL: fromDiffMemVarDi +; P9BE: sldi r4, r4, 2 +; P9BE-DAG: lxvx {{[vs0-9]+}}, r3, r4 +; P9BE-DAG: lxvx +; P9BE: vperm +; P9BE: blr +; P9LE: sldi r4, r4, 2 +; P9LE-DAG: lxvx {{[vs0-9]+}}, r3, r4 +; P9LE-DAG: lxvx +; P9LE: vperm +; P9LE: blr +; P8BE: sldi r4, r4, 2 +; P8BE-DAG: lxvw4x {{[vs0-9]+}}, r3, r4 +; P8BE-DAG: lxvw4x +; P8BE: vperm +; P8BE: blr +; P8LE: sldi r4, r4, 2 +; P8LE-DAG: lxvd2x +; P8LE-DAG: lxvd2x +; P8LE-DAG: xxswapd +; P8LE: xxswapd +; P8LE: vperm +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @fromRandMemConsi(i32* nocapture readonly %arr) { +entry: + %arrayidx = getelementptr inbounds i32, i32* %arr, i64 4 + %0 = load i32, i32* %arrayidx, align 4 + %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0 + %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 18 + %1 = load i32, i32* %arrayidx1, align 4 + %vecinit2 = insertelement <4 x i32> %vecinit, i32 %1, i32 1 + %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 2 + %2 = load i32, i32* %arrayidx3, align 4 + %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %2, i32 2 + %arrayidx5 = getelementptr inbounds i32, i32* %arr, i64 88 + %3 = load i32, i32* %arrayidx5, align 4 + %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3 + ret <4 x i32> %vecinit6 +; P9BE-LABEL: fromRandMemConsi +; P9LE-LABEL: fromRandMemConsi +; P8BE-LABEL: fromRandMemConsi +; P8LE-LABEL: fromRandMemConsi +; P9BE: lwz +; P9BE: lwz +; P9BE: lwz +; P9BE: lwz +; P9BE: mtvsrdd +; P9BE: mtvsrdd +; P9BE: vmrgow +; P9LE: lwz +; P9LE: lwz +; P9LE: lwz +; P9LE: lwz +; P9LE: mtvsrdd +; P9LE: mtvsrdd +; P9LE: vmrgow +; P8BE: lwz +; P8BE: lwz +; P8BE: lwz +; P8BE: lwz +; P8BE: mtvsrwz +; P8BE: mtvsrwz +; P8BE: mtvsrwz +; P8BE: mtvsrwz +; P8BE: xxmrghd +; P8BE: xxmrghd +; P8BE: vmrgow +; P8LE: lwz +; P8LE: lwz +; P8LE: lwz +; P8LE: lwz +; P8LE: mtvsrwz +; P8LE: mtvsrwz +; P8LE: mtvsrwz +; P8LE: mtvsrwz +; P8LE: xxmrghd +; P8LE: xxmrghd +; P8LE: vmrgow +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @fromRandMemVari(i32* nocapture readonly %arr, i32 signext %elem) { +entry: + %add = add nsw i32 %elem, 4 + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0 + %add1 = add nsw i32 %elem, 1 + %idxprom2 = sext i32 %add1 to i64 + %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 %idxprom2 + %1 = load i32, i32* %arrayidx3, align 4 + %vecinit4 = insertelement <4 x i32> %vecinit, i32 %1, i32 1 + %add5 = add nsw i32 %elem, 2 + %idxprom6 = sext i32 %add5 to i64 + %arrayidx7 = getelementptr inbounds i32, i32* %arr, i64 %idxprom6 + %2 = load i32, i32* %arrayidx7, align 4 + %vecinit8 = insertelement <4 x i32> %vecinit4, i32 %2, i32 2 + %add9 = add nsw i32 %elem, 8 + %idxprom10 = sext i32 %add9 to i64 + %arrayidx11 = getelementptr inbounds i32, i32* %arr, i64 %idxprom10 + %3 = load i32, i32* %arrayidx11, align 4 + %vecinit12 = insertelement <4 x i32> %vecinit8, i32 %3, i32 3 + ret <4 x i32> %vecinit12 +; P9BE-LABEL: fromRandMemVari +; P9LE-LABEL: fromRandMemVari +; P8BE-LABEL: fromRandMemVari +; P8LE-LABEL: fromRandMemVari +; P9BE: sldi r4, r4, 2 +; P9BE: lwz +; P9BE: lwz +; P9BE: lwz +; P9BE: lwz +; P9BE: mtvsrdd +; P9BE: mtvsrdd +; P9BE: vmrgow +; P9LE: sldi r4, r4, 2 +; P9LE: lwz +; P9LE: lwz +; P9LE: lwz +; P9LE: lwz +; P9LE: mtvsrdd +; P9LE: mtvsrdd +; P9LE: vmrgow +; P8BE: sldi r4, r4, 2 +; P8BE: lwz +; P8BE: lwz +; P8BE: lwz +; P8BE: lwz +; P8BE: mtvsrwz +; P8BE: mtvsrwz +; P8BE: mtvsrwz +; P8BE: mtvsrwz +; P8BE: xxmrghd +; P8BE: xxmrghd +; P8BE: vmrgow +; P8LE: sldi r4, r4, 2 +; P8LE: lwz +; P8LE: lwz +; P8LE: lwz +; P8LE: lwz +; P8LE: mtvsrwz +; P8LE: mtvsrwz +; P8LE: mtvsrwz +; P8LE: mtvsrwz +; P8LE: xxmrghd +; P8LE: xxmrghd +; P8LE: vmrgow +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @spltRegVali(i32 signext %val) { +entry: + %splat.splatinsert = insertelement <4 x i32> undef, i32 %val, i32 0 + %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %splat.splat +; P9BE-LABEL: spltRegVali +; P9LE-LABEL: spltRegVali +; P8BE-LABEL: spltRegVali +; P8LE-LABEL: spltRegVali +; P9BE: mtvsrws v2, r3 +; P9BE: blr +; P9LE: mtvsrws v2, r3 +; P9LE: blr +; P8BE: mtvsrwz {{[vsf0-9]+}}, r3 +; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1 +; P8BE: blr +; P8LE: mtvsrwz {{[vsf0-9]+}}, r3 +; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @spltMemVali(i32* nocapture readonly %ptr) { +entry: + %0 = load i32, i32* %ptr, align 4 + %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0 + %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %splat.splat +; P9BE-LABEL: spltMemVali +; P9LE-LABEL: spltMemVali +; P8BE-LABEL: spltMemVali +; P8LE-LABEL: spltMemVali +; P9BE: lxvwsx v2, 0, r3 +; P9BE: blr +; P9LE: lxvwsx v2, 0, r3 +; P9LE: blr +; P8BE: lxsiwax {{[vsf0-9]+}}, 0, r3 +; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1 +; P8BE: blr +; P8LE: lxsiwax {{[vsf0-9]+}}, 0, r3 +; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @spltCnstConvftoi() { +entry: + ret <4 x i32> +; P9BE-LABEL: spltCnstConvftoi +; P9LE-LABEL: spltCnstConvftoi +; P8BE-LABEL: spltCnstConvftoi +; P8LE-LABEL: spltCnstConvftoi +; P9BE: vspltisw v2, 4 +; P9BE: blr +; P9LE: vspltisw v2, 4 +; P9LE: blr +; P8BE: vspltisw v2, 4 +; P8BE: blr +; P8LE: vspltisw v2, 4 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @fromRegsConvftoi(float %a, float %b, float %c, float %d) { +entry: + %conv = fptosi float %a to i32 + %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 + %conv1 = fptosi float %b to i32 + %vecinit2 = insertelement <4 x i32> %vecinit, i32 %conv1, i32 1 + %conv3 = fptosi float %c to i32 + %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %conv3, i32 2 + %conv5 = fptosi float %d to i32 + %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %conv5, i32 3 + ret <4 x i32> %vecinit6 +; P9BE-LABEL: fromRegsConvftoi +; P9LE-LABEL: fromRegsConvftoi +; P8BE-LABEL: fromRegsConvftoi +; P8LE-LABEL: fromRegsConvftoi +; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 +; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 +; P9BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P9BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P9BE: vmrgew v2, [[REG3]], [[REG4]] +; P9BE: xvcvspsxws v2, v2 +; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 +; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 +; P9LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P9LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P9LE: vmrgew v2, [[REG4]], [[REG3]] +; P9LE: xvcvspsxws v2, v2 +; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 +; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 +; P8BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P8BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P8BE: vmrgew v2, [[REG3]], [[REG4]] +; P8BE: xvcvspsxws v2, v2 +; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 +; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 +; P8LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P8LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P8LE: vmrgew v2, [[REG4]], [[REG3]] +; P8LE: xvcvspsxws v2, v2 +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @fromDiffConstsConvftoi() { +entry: + ret <4 x i32> +; P9BE-LABEL: fromDiffConstsConvftoi +; P9LE-LABEL: fromDiffConstsConvftoi +; P8BE-LABEL: fromDiffConstsConvftoi +; P8LE-LABEL: fromDiffConstsConvftoi +; P9BE: lxvx +; P9BE: blr +; P9LE: lxvx +; P9LE: blr +; P8BE: lxvw4x +; P8BE: blr +; P8LE: lxvd2x +; P8LE: xxswapd +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @fromDiffMemConsAConvftoi(float* nocapture readonly %ptr) { +entry: + %0 = bitcast float* %ptr to <4 x float>* + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = fptosi <4 x float> %1 to <4 x i32> + ret <4 x i32> %2 +; P9BE-LABEL: fromDiffMemConsAConvftoi +; P9LE-LABEL: fromDiffMemConsAConvftoi +; P8BE-LABEL: fromDiffMemConsAConvftoi +; P8LE-LABEL: fromDiffMemConsAConvftoi +; P9BE: lxvx [[REG1:[vs0-9]+]], 0, r3 +; P9BE: xvcvspsxws v2, [[REG1]] +; P9BE: blr +; P9LE: lxvx [[REG1:[vs0-9]+]], 0, r3 +; P9LE: xvcvspsxws v2, [[REG1]] +; P9LE: blr +; P8BE: lxvw4x [[REG1:[vs0-9]+]], 0, r3 +; P8BE: xvcvspsxws v2, [[REG1]] +; P8BE: blr +; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3 +; P8LE: xxswapd v2, [[REG1]] +; P8LE: xvcvspsxws v2, v2 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @fromDiffMemConsDConvftoi(float* nocapture readonly %ptr) { +entry: + %arrayidx = getelementptr inbounds float, float* %ptr, i64 3 + %0 = load float, float* %arrayidx, align 4 + %conv = fptosi float %0 to i32 + %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 + %arrayidx1 = getelementptr inbounds float, float* %ptr, i64 2 + %1 = load float, float* %arrayidx1, align 4 + %conv2 = fptosi float %1 to i32 + %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1 + %arrayidx4 = getelementptr inbounds float, float* %ptr, i64 1 + %2 = load float, float* %arrayidx4, align 4 + %conv5 = fptosi float %2 to i32 + %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2 + %3 = load float, float* %ptr, align 4 + %conv8 = fptosi float %3 to i32 + %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3 + ret <4 x i32> %vecinit9 +; P9BE-LABEL: fromDiffMemConsDConvftoi +; P9LE-LABEL: fromDiffMemConsDConvftoi +; P8BE-LABEL: fromDiffMemConsDConvftoi +; P8LE-LABEL: fromDiffMemConsDConvftoi +; P9BE: lxvx +; P9BE: lxvx +; P9BE: vperm +; P9BE: xvcvspsxws +; P9BE: blr +; P9LE: lxvx +; P9LE: lxvx +; P9LE: vperm +; P9LE: xvcvspsxws +; P9LE: blr +; P8BE: lxvw4x +; P8BE: lxvw4x +; P8BE: vperm +; P8BE: xvcvspsxws +; P8BE: blr +; P8LE: lxvd2x +; P8LE-DAG: lxvd2x +; P8LE-DAG: xxswapd +; P8LE: xxswapd +; P8LE: vperm +; P8LE: xvcvspsxws +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @fromDiffMemVarAConvftoi(float* nocapture readonly %arr, i32 signext %elem) { +entry: + %idxprom = sext i32 %elem to i64 + %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom + %0 = load float, float* %arrayidx, align 4 + %conv = fptosi float %0 to i32 + %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 + %add = add nsw i32 %elem, 1 + %idxprom1 = sext i32 %add to i64 + %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1 + %1 = load float, float* %arrayidx2, align 4 + %conv3 = fptosi float %1 to i32 + %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1 + %add5 = add nsw i32 %elem, 2 + %idxprom6 = sext i32 %add5 to i64 + %arrayidx7 = getelementptr inbounds float, float* %arr, i64 %idxprom6 + %2 = load float, float* %arrayidx7, align 4 + %conv8 = fptosi float %2 to i32 + %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2 + %add10 = add nsw i32 %elem, 3 + %idxprom11 = sext i32 %add10 to i64 + %arrayidx12 = getelementptr inbounds float, float* %arr, i64 %idxprom11 + %3 = load float, float* %arrayidx12, align 4 + %conv13 = fptosi float %3 to i32 + %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3 + ret <4 x i32> %vecinit14 +; P9BE-LABEL: fromDiffMemVarAConvftoi +; P9LE-LABEL: fromDiffMemVarAConvftoi +; P8BE-LABEL: fromDiffMemVarAConvftoi +; P8LE-LABEL: fromDiffMemVarAConvftoi +; FIXME: implement finding consecutive loads with pre-inc +; P9BE: lfsux +; P9LE: lfsux +; P8BE: lfsux +; P8LE: lfsux +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @fromDiffMemVarDConvftoi(float* nocapture readonly %arr, i32 signext %elem) { +entry: + %idxprom = sext i32 %elem to i64 + %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom + %0 = load float, float* %arrayidx, align 4 + %conv = fptosi float %0 to i32 + %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 + %sub = add nsw i32 %elem, -1 + %idxprom1 = sext i32 %sub to i64 + %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1 + %1 = load float, float* %arrayidx2, align 4 + %conv3 = fptosi float %1 to i32 + %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1 + %sub5 = add nsw i32 %elem, -2 + %idxprom6 = sext i32 %sub5 to i64 + %arrayidx7 = getelementptr inbounds float, float* %arr, i64 %idxprom6 + %2 = load float, float* %arrayidx7, align 4 + %conv8 = fptosi float %2 to i32 + %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2 + %sub10 = add nsw i32 %elem, -3 + %idxprom11 = sext i32 %sub10 to i64 + %arrayidx12 = getelementptr inbounds float, float* %arr, i64 %idxprom11 + %3 = load float, float* %arrayidx12, align 4 + %conv13 = fptosi float %3 to i32 + %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3 + ret <4 x i32> %vecinit14 +; P9BE-LABEL: fromDiffMemVarDConvftoi +; P9LE-LABEL: fromDiffMemVarDConvftoi +; P8BE-LABEL: fromDiffMemVarDConvftoi +; P8LE-LABEL: fromDiffMemVarDConvftoi +; FIXME: implement finding consecutive loads with pre-inc +; P9BE: lfsux +; P9LE: lfsux +; P8BE: lfsux +; P8LE: lfsux +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @spltRegValConvftoi(float %val) { +entry: + %conv = fptosi float %val to i32 + %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 + %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %splat.splat +; P9BE-LABEL: spltRegValConvftoi +; P9LE-LABEL: spltRegValConvftoi +; P8BE-LABEL: spltRegValConvftoi +; P8LE-LABEL: spltRegValConvftoi +; P9BE: xscvdpsxws f[[REG1:[0-9]+]], f1 +; P9BE: xxspltw v2, vs[[REG1]], 1 +; P9BE: blr +; P9LE: xscvdpsxws f[[REG1:[0-9]+]], f1 +; P9LE: xxspltw v2, vs[[REG1]], 1 +; P9LE: blr +; P8BE: xscvdpsxws f[[REG1:[0-9]+]], f1 +; P8BE: xxspltw v2, vs[[REG1]], 1 +; P8BE: blr +; P8LE: xscvdpsxws f[[REG1:[0-9]+]], f1 +; P8LE: xxspltw v2, vs[[REG1]], 1 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @spltMemValConvftoi(float* nocapture readonly %ptr) { +entry: + %0 = load float, float* %ptr, align 4 + %conv = fptosi float %0 to i32 + %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 + %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %splat.splat +; P9BE-LABEL: spltMemValConvftoi +; P9LE-LABEL: spltMemValConvftoi +; P8BE-LABEL: spltMemValConvftoi +; P8LE-LABEL: spltMemValConvftoi +; P9BE: lxvwsx [[REG1:[vs0-9]+]], 0, r3 +; P9BE: xvcvspsxws v2, [[REG1]] +; P9LE: [[REG1:[vs0-9]+]], 0, r3 +; P9LE: xvcvspsxws v2, [[REG1]] +; P8BE: lxsspx [[REG1:f[0-9]+]], 0, r3 +; P8BE: xscvdpsxws f[[REG2:[0-9]+]], [[REG1]] +; P8BE: xxspltw v2, vs[[REG2]], 1 +; P8LE: lxsspx [[REG1:f[0-9]+]], 0, r3 +; P8LE: xscvdpsxws f[[REG2:[vs0-9]+]], [[REG1]] +; P8LE: xxspltw v2, vs[[REG2]], 1 +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @spltCnstConvdtoi() { +entry: + ret <4 x i32> +; P9BE-LABEL: spltCnstConvdtoi +; P9LE-LABEL: spltCnstConvdtoi +; P8BE-LABEL: spltCnstConvdtoi +; P8LE-LABEL: spltCnstConvdtoi +; P9BE: vspltisw v2, 4 +; P9BE: blr +; P9LE: vspltisw v2, 4 +; P9LE: blr +; P8BE: vspltisw v2, 4 +; P8BE: blr +; P8LE: vspltisw v2, 4 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @fromRegsConvdtoi(double %a, double %b, double %c, double %d) { +entry: + %conv = fptosi double %a to i32 + %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 + %conv1 = fptosi double %b to i32 + %vecinit2 = insertelement <4 x i32> %vecinit, i32 %conv1, i32 1 + %conv3 = fptosi double %c to i32 + %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %conv3, i32 2 + %conv5 = fptosi double %d to i32 + %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %conv5, i32 3 + ret <4 x i32> %vecinit6 +; P9BE-LABEL: fromRegsConvdtoi +; P9LE-LABEL: fromRegsConvdtoi +; P8BE-LABEL: fromRegsConvdtoi +; P8LE-LABEL: fromRegsConvdtoi +; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 +; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 +; P9BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P9BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P9BE: vmrgew v2, [[REG3]], [[REG4]] +; P9BE: xvcvspsxws v2, v2 +; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 +; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 +; P9LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P9LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P9LE: vmrgew v2, [[REG4]], [[REG3]] +; P9LE: xvcvspsxws v2, v2 +; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 +; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 +; P8BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P8BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P8BE: vmrgew v2, [[REG3]], [[REG4]] +; P8BE: xvcvspsxws v2, v2 +; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 +; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 +; P8LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P8LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P8LE: vmrgew v2, [[REG4]], [[REG3]] +; P8LE: xvcvspsxws v2, v2 +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @fromDiffConstsConvdtoi() { +entry: + ret <4 x i32> +; P9BE-LABEL: fromDiffConstsConvdtoi +; P9LE-LABEL: fromDiffConstsConvdtoi +; P8BE-LABEL: fromDiffConstsConvdtoi +; P8LE-LABEL: fromDiffConstsConvdtoi +; P9BE: lxvx +; P9BE: blr +; P9LE: lxvx +; P9LE: blr +; P8BE: lxvw4x +; P8BE: blr +; P8LE: lxvd2x +; P8LE: xxswapd +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @fromDiffMemConsAConvdtoi(double* nocapture readonly %ptr) { +entry: + %0 = bitcast double* %ptr to <2 x double>* + %1 = load <2 x double>, <2 x double>* %0, align 8 + %2 = fptosi <2 x double> %1 to <2 x i32> + %arrayidx4 = getelementptr inbounds double, double* %ptr, i64 2 + %3 = bitcast double* %arrayidx4 to <2 x double>* + %4 = load <2 x double>, <2 x double>* %3, align 8 + %5 = fptosi <2 x double> %4 to <2 x i32> + %vecinit9 = shufflevector <2 x i32> %2, <2 x i32> %5, <4 x i32> + ret <4 x i32> %vecinit9 +; P9BE-LABEL: fromDiffMemConsAConvdtoi +; P9LE-LABEL: fromDiffMemConsAConvdtoi +; P8BE-LABEL: fromDiffMemConsAConvdtoi +; P8LE-LABEL: fromDiffMemConsAConvdtoi +; P9BE: lxvx [[REG1:[vs0-9]+]], 0, r3 +; P9BE: lxvx [[REG2:[vs0-9]+]], r3, r4 +; P9BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]] +; P9BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]] +; P9BE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]] +; P9BE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]] +; P9BE: vmrgew v2, [[REG6]], [[REG5]] +; P9BE: xvcvspsxws v2, v2 +; P9LE: lxvx [[REG1:[vs0-9]+]], 0, r3 +; P9LE: lxvx [[REG2:[vs0-9]+]], r3, r4 +; P9LE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG2]], [[REG1]] +; P9LE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG2]], [[REG1]] +; P9LE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]] +; P9LE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]] +; P9LE: vmrgew v2, [[REG6]], [[REG5]] +; P9LE: xvcvspsxws v2, v2 +; P8BE: lxvd2x [[REG1:[vs0-9]+]], 0, r3 +; P8BE: lxvd2x [[REG2:[vs0-9]+]], r3, r4 +; P8BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]] +; P8BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]] +; P8BE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]] +; P8BE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]] +; P8BE: vmrgew v2, [[REG6]], [[REG5]] +; P8BE: xvcvspsxws v2, v2 +; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3 +; P8LE: lxvd2x [[REG2:[vs0-9]+]], r3, r4 +; P8LE-DAG: xxswapd [[REG3:[vs0-9]+]], [[REG1]] +; P8LE-DAG: xxswapd [[REG4:[vs0-9]+]], [[REG2]] +; P8LE-DAG: xxmrgld [[REG5:[vs0-9]+]], [[REG4]], [[REG3]] +; P8LE-DAG: xxmrghd [[REG6:[vs0-9]+]], [[REG4]], [[REG3]] +; P8LE-DAG: xvcvdpsp [[REG7:[vs0-9]+]], [[REG5]] +; P8LE-DAG: xvcvdpsp [[REG8:[vs0-9]+]], [[REG6]] +; P8LE: vmrgew v2, [[REG8]], [[REG7]] +; P8LE: xvcvspsxws v2, v2 +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @fromDiffMemConsDConvdtoi(double* nocapture readonly %ptr) { +entry: + %arrayidx = getelementptr inbounds double, double* %ptr, i64 3 + %0 = load double, double* %arrayidx, align 8 + %conv = fptosi double %0 to i32 + %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 + %arrayidx1 = getelementptr inbounds double, double* %ptr, i64 2 + %1 = load double, double* %arrayidx1, align 8 + %conv2 = fptosi double %1 to i32 + %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1 + %arrayidx4 = getelementptr inbounds double, double* %ptr, i64 1 + %2 = load double, double* %arrayidx4, align 8 + %conv5 = fptosi double %2 to i32 + %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2 + %3 = load double, double* %ptr, align 8 + %conv8 = fptosi double %3 to i32 + %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3 + ret <4 x i32> %vecinit9 +; P9BE-LABEL: fromDiffMemConsDConvdtoi +; P9LE-LABEL: fromDiffMemConsDConvdtoi +; P8BE-LABEL: fromDiffMemConsDConvdtoi +; P8LE-LABEL: fromDiffMemConsDConvdtoi +; P9BE: lfd +; P9BE: lfd +; P9BE: lfd +; P9BE: lfd +; P9BE: xxmrghd +; P9BE: xxmrghd +; P9BE: xvcvdpsp +; P9BE: xvcvdpsp +; P9BE: vmrgew +; P9BE: xvcvspsxws v2 +; P9LE: lfd +; P9LE: lfd +; P9LE: lfd +; P9LE: lfd +; P9LE: xxmrghd +; P9LE: xxmrghd +; P9LE: xvcvdpsp +; P9LE: xvcvdpsp +; P9LE: vmrgew +; P9LE: xvcvspsxws v2 +; P8BE: lxsdx +; P8BE: lxsdx +; P8BE: lxsdx +; P8BE: lxsdx +; P8BE: xxmrghd +; P8BE: xxmrghd +; P8BE: xvcvdpsp +; P8BE: xvcvdpsp +; P8BE: vmrgew +; P8BE: xvcvspsxws v2 +; P8LE: lxsdx +; P8LE: lxsdx +; P8LE: lxsdx +; P8LE: lxsdx +; P8LE: xxmrghd +; P8LE: xxmrghd +; P8LE: xvcvdpsp +; P8LE: xvcvdpsp +; P8LE: vmrgew +; P8LE: xvcvspsxws v2 +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @fromDiffMemVarAConvdtoi(double* nocapture readonly %arr, i32 signext %elem) { +entry: + %idxprom = sext i32 %elem to i64 + %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom + %0 = load double, double* %arrayidx, align 8 + %conv = fptosi double %0 to i32 + %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 + %add = add nsw i32 %elem, 1 + %idxprom1 = sext i32 %add to i64 + %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1 + %1 = load double, double* %arrayidx2, align 8 + %conv3 = fptosi double %1 to i32 + %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1 + %add5 = add nsw i32 %elem, 2 + %idxprom6 = sext i32 %add5 to i64 + %arrayidx7 = getelementptr inbounds double, double* %arr, i64 %idxprom6 + %2 = load double, double* %arrayidx7, align 8 + %conv8 = fptosi double %2 to i32 + %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2 + %add10 = add nsw i32 %elem, 3 + %idxprom11 = sext i32 %add10 to i64 + %arrayidx12 = getelementptr inbounds double, double* %arr, i64 %idxprom11 + %3 = load double, double* %arrayidx12, align 8 + %conv13 = fptosi double %3 to i32 + %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3 + ret <4 x i32> %vecinit14 +; P9BE-LABEL: fromDiffMemVarAConvdtoi +; P9LE-LABEL: fromDiffMemVarAConvdtoi +; P8BE-LABEL: fromDiffMemVarAConvdtoi +; P8LE-LABEL: fromDiffMemVarAConvdtoi +; P9BE: lfdux +; P9BE: lfd +; P9BE: lfd +; P9BE: lfd +; P9BE: xxmrghd +; P9BE: xxmrghd +; P9BE: xvcvdpsp +; P9BE: xvcvdpsp +; P9BE: vmrgew +; P9BE: xvcvspsxws v2 +; P9LE: lfdux +; P9LE: lfd +; P9LE: lfd +; P9LE: lfd +; P9LE: xxmrghd +; P9LE: xxmrghd +; P9LE: xvcvdpsp +; P9LE: xvcvdpsp +; P9LE: vmrgew +; P9LE: xvcvspsxws v2 +; P8BE: lfdux +; P8BE: lxsdx +; P8BE: lxsdx +; P8BE: lxsdx +; P8BE: xxmrghd +; P8BE: xxmrghd +; P8BE: xvcvdpsp +; P8BE: xvcvdpsp +; P8BE: vmrgew +; P8BE: xvcvspsxws v2 +; P8LE: lfdux +; P8LE: lxsdx +; P8LE: lxsdx +; P8LE: lxsdx +; P8LE: xxmrghd +; P8LE: xxmrghd +; P8LE: xvcvdpsp +; P8LE: xvcvdpsp +; P8LE: vmrgew +; P8LE: xvcvspsxws v2 +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @fromDiffMemVarDConvdtoi(double* nocapture readonly %arr, i32 signext %elem) { +entry: + %idxprom = sext i32 %elem to i64 + %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom + %0 = load double, double* %arrayidx, align 8 + %conv = fptosi double %0 to i32 + %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 + %sub = add nsw i32 %elem, -1 + %idxprom1 = sext i32 %sub to i64 + %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1 + %1 = load double, double* %arrayidx2, align 8 + %conv3 = fptosi double %1 to i32 + %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1 + %sub5 = add nsw i32 %elem, -2 + %idxprom6 = sext i32 %sub5 to i64 + %arrayidx7 = getelementptr inbounds double, double* %arr, i64 %idxprom6 + %2 = load double, double* %arrayidx7, align 8 + %conv8 = fptosi double %2 to i32 + %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2 + %sub10 = add nsw i32 %elem, -3 + %idxprom11 = sext i32 %sub10 to i64 + %arrayidx12 = getelementptr inbounds double, double* %arr, i64 %idxprom11 + %3 = load double, double* %arrayidx12, align 8 + %conv13 = fptosi double %3 to i32 + %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3 + ret <4 x i32> %vecinit14 +; P9BE-LABEL: fromDiffMemVarDConvdtoi +; P9LE-LABEL: fromDiffMemVarDConvdtoi +; P8BE-LABEL: fromDiffMemVarDConvdtoi +; P8LE-LABEL: fromDiffMemVarDConvdtoi +; P9BE: lfdux +; P9BE: lfd +; P9BE: lfd +; P9BE: lfd +; P9BE: xxmrghd +; P9BE: xxmrghd +; P9BE: xvcvdpsp +; P9BE: xvcvdpsp +; P9BE: vmrgew +; P9BE: xvcvspsxws v2 +; P9LE: lfdux +; P9LE: lfd +; P9LE: lfd +; P9LE: lfd +; P9LE: xxmrghd +; P9LE: xxmrghd +; P9LE: xvcvdpsp +; P9LE: xvcvdpsp +; P9LE: vmrgew +; P9LE: xvcvspsxws v2 +; P8BE: lfdux +; P8BE: lxsdx +; P8BE: lxsdx +; P8BE: lxsdx +; P8BE: xxmrghd +; P8BE: xxmrghd +; P8BE: xvcvdpsp +; P8BE: xvcvdpsp +; P8BE: vmrgew +; P8BE: xvcvspsxws v2 +; P8LE: lfdux +; P8LE: lxsdx +; P8LE: lxsdx +; P8LE: lxsdx +; P8LE: xxmrghd +; P8LE: xxmrghd +; P8LE: xvcvdpsp +; P8LE: xvcvdpsp +; P8LE: vmrgew +; P8LE: xvcvspsxws v2 +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @spltRegValConvdtoi(double %val) { +entry: + %conv = fptosi double %val to i32 + %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 + %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %splat.splat +; P9BE-LABEL: spltRegValConvdtoi +; P9LE-LABEL: spltRegValConvdtoi +; P8BE-LABEL: spltRegValConvdtoi +; P8LE-LABEL: spltRegValConvdtoi +; P9BE: xscvdpsxws +; P9BE: xxspltw +; P9BE: blr +; P9LE: xscvdpsxws +; P9LE: xxspltw +; P9LE: blr +; P8BE: xscvdpsxws +; P8BE: xxspltw +; P8BE: blr +; P8LE: xscvdpsxws +; P8LE: xxspltw +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @spltMemValConvdtoi(double* nocapture readonly %ptr) { +entry: + %0 = load double, double* %ptr, align 8 + %conv = fptosi double %0 to i32 + %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 + %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %splat.splat +; P9BE-LABEL: spltMemValConvdtoi +; P9LE-LABEL: spltMemValConvdtoi +; P8BE-LABEL: spltMemValConvdtoi +; P8LE-LABEL: spltMemValConvdtoi +; P9BE: lfd +; P9BE: xscvdpsxws +; P9BE: xxspltw +; P9BE: blr +; P9LE: lfd +; P9LE: xscvdpsxws +; P9LE: xxspltw +; P9LE: blr +; P8BE: lxsdx +; P8BE: xscvdpsxws +; P8BE: xxspltw +; P8BE: blr +; P8LE: lxsdx +; P8LE: xscvdpsxws +; P8LE: xxspltw +; P8LE: blr +} +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @allZeroui() { +entry: + ret <4 x i32> zeroinitializer +; P9BE-LABEL: allZeroui +; P9LE-LABEL: allZeroui +; P8BE-LABEL: allZeroui +; P8LE-LABEL: allZeroui +; P9BE: xxlxor v2, v2, v2 +; P9BE: blr +; P9LE: xxlxor v2, v2, v2 +; P9LE: blr +; P8BE: xxlxor v2, v2, v2 +; P8BE: blr +; P8LE: xxlxor v2, v2, v2 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @allOneui() { +entry: + ret <4 x i32> +; P9BE-LABEL: allOneui +; P9LE-LABEL: allOneui +; P8BE-LABEL: allOneui +; P8LE-LABEL: allOneui +; P9BE: xxspltib v2, 255 +; P9BE: blr +; P9LE: xxspltib v2, 255 +; P9LE: blr +; P8BE: vspltisb v2, -1 +; P8BE: blr +; P8LE: vspltisb v2, -1 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @spltConst1ui() { +entry: + ret <4 x i32> +; P9BE-LABEL: spltConst1ui +; P9LE-LABEL: spltConst1ui +; P8BE-LABEL: spltConst1ui +; P8LE-LABEL: spltConst1ui +; P9BE: vspltisw v2, 1 +; P9BE: blr +; P9LE: vspltisw v2, 1 +; P9LE: blr +; P8BE: vspltisw v2, 1 +; P8BE: blr +; P8LE: vspltisw v2, 1 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @spltConst16kui() { +entry: + ret <4 x i32> +; P9BE-LABEL: spltConst16kui +; P9LE-LABEL: spltConst16kui +; P8BE-LABEL: spltConst16kui +; P8LE-LABEL: spltConst16kui +; P9BE: vspltisw v2, -15 +; P9BE: vsrw v2, v2, v2 +; P9BE: blr +; P9LE: vspltisw v2, -15 +; P9LE: vsrw v2, v2, v2 +; P9LE: blr +; P8BE: vspltisw v2, -15 +; P8BE: vsrw v2, v2, v2 +; P8BE: blr +; P8LE: vspltisw v2, -15 +; P8LE: vsrw v2, v2, v2 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @spltConst32kui() { +entry: + ret <4 x i32> +; P9BE-LABEL: spltConst32kui +; P9LE-LABEL: spltConst32kui +; P8BE-LABEL: spltConst32kui +; P8LE-LABEL: spltConst32kui +; P9BE: vspltisw v2, -16 +; P9BE: vsrw v2, v2, v2 +; P9BE: blr +; P9LE: vspltisw v2, -16 +; P9LE: vsrw v2, v2, v2 +; P9LE: blr +; P8BE: vspltisw v2, -16 +; P8BE: vsrw v2, v2, v2 +; P8BE: blr +; P8LE: vspltisw v2, -16 +; P8LE: vsrw v2, v2, v2 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @fromRegsui(i32 zeroext %a, i32 zeroext %b, i32 zeroext %c, i32 zeroext %d) { +entry: + %vecinit = insertelement <4 x i32> undef, i32 %a, i32 0 + %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1 + %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %c, i32 2 + %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %d, i32 3 + ret <4 x i32> %vecinit3 +; P9BE-LABEL: fromRegsui +; P9LE-LABEL: fromRegsui +; P8BE-LABEL: fromRegsui +; P8LE-LABEL: fromRegsui +; P9BE-DAG: mtvsrdd [[REG1:v[0-9]+]], r3, r5 +; P9BE-DAG: mtvsrdd [[REG2:v[0-9]+]], r4, r6 +; P9BE: vmrgow v2, [[REG1]], [[REG2]] +; P9BE: blr +; P9LE-DAG: mtvsrdd [[REG1:v[0-9]+]], r5, r3 +; P9LE-DAG: mtvsrdd [[REG2:v[0-9]+]], r6, r4 +; P9LE: vmrgow v2, [[REG2]], [[REG1]] +; P9LE: blr +; P8BE-DAG: mtvsrwz {{[vf]}}[[REG1:[0-9]+]], r3 +; P8BE-DAG: mtvsrwz {{[vf]}}[[REG2:[0-9]+]], r4 +; P8BE-DAG: mtvsrwz {{[vf]}}[[REG3:[0-9]+]], r5 +; P8BE-DAG: mtvsrwz {{[vf]}}[[REG4:[0-9]+]], r6 +; P8BE-DAG: xxmrghd [[REG5:v[0-9]+]], {{[v][s]*}}[[REG1]], {{[v][s]*}}[[REG3]] +; P8BE-DAG: xxmrghd [[REG6:v[0-9]+]], {{[v][s]*}}[[REG2]], {{[v][s]*}}[[REG4]] +; P8BE: vmrgow v2, [[REG5]], [[REG6]] +; P8LE-DAG: mtvsrwz {{[vf]}}[[REG1:[0-9]+]], r3 +; P8LE-DAG: mtvsrwz {{[vf]}}[[REG2:[0-9]+]], r4 +; P8LE-DAG: mtvsrwz {{[vf]}}[[REG3:[0-9]+]], r5 +; P8LE-DAG: mtvsrwz {{[vf]}}[[REG4:[0-9]+]], r6 +; P8LE: xxmrghd [[REG5:v[0-9]+]], {{[v][s]*}}[[REG3]], {{[v][s]*}}[[REG1]] +; P8LE: xxmrghd [[REG6:v[0-9]+]], {{[v][s]*}}[[REG4]], {{[v][s]*}}[[REG2]] +; P8LE: vmrgow v2, [[REG6]], [[REG5]] +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @fromDiffConstsui() { +entry: + ret <4 x i32> +; P9BE-LABEL: fromDiffConstsui +; P9LE-LABEL: fromDiffConstsui +; P8BE-LABEL: fromDiffConstsui +; P8LE-LABEL: fromDiffConstsui +; P9BE: lxvx +; P9BE: blr +; P9LE: lxvx +; P9LE: blr +; P8BE: lxvw4x +; P8BE: blr +; P8LE: lxvd2x +; P8LE: xxswapd +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @fromDiffMemConsAui(i32* nocapture readonly %arr) { +entry: + %0 = load i32, i32* %arr, align 4 + %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0 + %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 1 + %1 = load i32, i32* %arrayidx1, align 4 + %vecinit2 = insertelement <4 x i32> %vecinit, i32 %1, i32 1 + %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 2 + %2 = load i32, i32* %arrayidx3, align 4 + %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %2, i32 2 + %arrayidx5 = getelementptr inbounds i32, i32* %arr, i64 3 + %3 = load i32, i32* %arrayidx5, align 4 + %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3 + ret <4 x i32> %vecinit6 +; P9BE-LABEL: fromDiffMemConsAui +; P9LE-LABEL: fromDiffMemConsAui +; P8BE-LABEL: fromDiffMemConsAui +; P8LE-LABEL: fromDiffMemConsAui +; P9BE: lxvx +; P9BE: blr +; P9LE: lxvx +; P9LE: blr +; P8BE: lxvw4x +; P8BE: blr +; P8LE: lxvd2x +; P8LE: xxswapd +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @fromDiffMemConsDui(i32* nocapture readonly %arr) { +entry: + %arrayidx = getelementptr inbounds i32, i32* %arr, i64 3 + %0 = load i32, i32* %arrayidx, align 4 + %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0 + %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 2 + %1 = load i32, i32* %arrayidx1, align 4 + %vecinit2 = insertelement <4 x i32> %vecinit, i32 %1, i32 1 + %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 1 + %2 = load i32, i32* %arrayidx3, align 4 + %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %2, i32 2 + %3 = load i32, i32* %arr, align 4 + %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3 + ret <4 x i32> %vecinit6 +; P9BE-LABEL: fromDiffMemConsDui +; P9LE-LABEL: fromDiffMemConsDui +; P8BE-LABEL: fromDiffMemConsDui +; P8LE-LABEL: fromDiffMemConsDui +; P9BE: lxvx +; P9BE: lxvx +; P9BE: vperm +; P9BE: blr +; P9LE: lxvx +; P9LE: lxvx +; P9LE: vperm +; P9LE: blr +; P8BE: lxvw4x +; P8BE: lxvw4x +; P8BE: vperm +; P8BE: blr +; P8LE: lxvd2x +; P8LE-DAG: lxvd2x +; P8LE-DAG: xxswapd +; P8LE: xxswapd +; P8LE: vperm +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @fromDiffMemVarAui(i32* nocapture readonly %arr, i32 signext %elem) { +entry: + %idxprom = sext i32 %elem to i64 + %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0 + %add = add nsw i32 %elem, 1 + %idxprom1 = sext i32 %add to i64 + %arrayidx2 = getelementptr inbounds i32, i32* %arr, i64 %idxprom1 + %1 = load i32, i32* %arrayidx2, align 4 + %vecinit3 = insertelement <4 x i32> %vecinit, i32 %1, i32 1 + %add4 = add nsw i32 %elem, 2 + %idxprom5 = sext i32 %add4 to i64 + %arrayidx6 = getelementptr inbounds i32, i32* %arr, i64 %idxprom5 + %2 = load i32, i32* %arrayidx6, align 4 + %vecinit7 = insertelement <4 x i32> %vecinit3, i32 %2, i32 2 + %add8 = add nsw i32 %elem, 3 + %idxprom9 = sext i32 %add8 to i64 + %arrayidx10 = getelementptr inbounds i32, i32* %arr, i64 %idxprom9 + %3 = load i32, i32* %arrayidx10, align 4 + %vecinit11 = insertelement <4 x i32> %vecinit7, i32 %3, i32 3 + ret <4 x i32> %vecinit11 +; P9BE-LABEL: fromDiffMemVarAui +; P9LE-LABEL: fromDiffMemVarAui +; P8BE-LABEL: fromDiffMemVarAui +; P8LE-LABEL: fromDiffMemVarAui +; P9BE: sldi r4, r4, 2 +; P9BE: lxvx v2, r3, r4 +; P9BE: blr +; P9LE: sldi r4, r4, 2 +; P9LE: lxvx v2, r3, r4 +; P9LE: blr +; P8BE: sldi r4, r4, 2 +; P8BE: lxvw4x {{[vs0-9]+}}, r3, r4 +; P8BE: blr +; P8LE: sldi r4, r4, 2 +; P8LE: lxvd2x {{[vs0-9]+}}, r3, r4 +; P8LE: xxswapd +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @fromDiffMemVarDui(i32* nocapture readonly %arr, i32 signext %elem) { +entry: + %idxprom = sext i32 %elem to i64 + %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0 + %sub = add nsw i32 %elem, -1 + %idxprom1 = sext i32 %sub to i64 + %arrayidx2 = getelementptr inbounds i32, i32* %arr, i64 %idxprom1 + %1 = load i32, i32* %arrayidx2, align 4 + %vecinit3 = insertelement <4 x i32> %vecinit, i32 %1, i32 1 + %sub4 = add nsw i32 %elem, -2 + %idxprom5 = sext i32 %sub4 to i64 + %arrayidx6 = getelementptr inbounds i32, i32* %arr, i64 %idxprom5 + %2 = load i32, i32* %arrayidx6, align 4 + %vecinit7 = insertelement <4 x i32> %vecinit3, i32 %2, i32 2 + %sub8 = add nsw i32 %elem, -3 + %idxprom9 = sext i32 %sub8 to i64 + %arrayidx10 = getelementptr inbounds i32, i32* %arr, i64 %idxprom9 + %3 = load i32, i32* %arrayidx10, align 4 + %vecinit11 = insertelement <4 x i32> %vecinit7, i32 %3, i32 3 + ret <4 x i32> %vecinit11 +; P9BE-LABEL: fromDiffMemVarDui +; P9LE-LABEL: fromDiffMemVarDui +; P8BE-LABEL: fromDiffMemVarDui +; P8LE-LABEL: fromDiffMemVarDui +; P9BE: sldi r4, r4, 2 +; P9BE-DAG: lxvx {{[vs0-9]+}}, r3, r4 +; P9BE-DAG: lxvx +; P9BE: vperm +; P9BE: blr +; P9LE: sldi r4, r4, 2 +; P9LE-DAG: lxvx {{[vs0-9]+}}, r3, r4 +; P9LE-DAG: lxvx +; P9LE: vperm +; P9LE: blr +; P8BE: sldi r4, r4, 2 +; P8BE-DAG: lxvw4x {{[vs0-9]+}}, r3, r4 +; P8BE-DAG: lxvw4x +; P8BE: vperm +; P8BE: blr +; P8LE: sldi r4, r4, 2 +; P8LE-DAG: lxvd2x +; P8LE-DAG: lxvd2x +; P8LE-DAG: xxswapd +; P8LE: xxswapd +; P8LE: vperm +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @fromRandMemConsui(i32* nocapture readonly %arr) { +entry: + %arrayidx = getelementptr inbounds i32, i32* %arr, i64 4 + %0 = load i32, i32* %arrayidx, align 4 + %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0 + %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 18 + %1 = load i32, i32* %arrayidx1, align 4 + %vecinit2 = insertelement <4 x i32> %vecinit, i32 %1, i32 1 + %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 2 + %2 = load i32, i32* %arrayidx3, align 4 + %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %2, i32 2 + %arrayidx5 = getelementptr inbounds i32, i32* %arr, i64 88 + %3 = load i32, i32* %arrayidx5, align 4 + %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3 + ret <4 x i32> %vecinit6 +; P9BE-LABEL: fromRandMemConsui +; P9LE-LABEL: fromRandMemConsui +; P8BE-LABEL: fromRandMemConsui +; P8LE-LABEL: fromRandMemConsui +; P9BE: lwz +; P9BE: lwz +; P9BE: lwz +; P9BE: lwz +; P9BE: mtvsrdd +; P9BE: mtvsrdd +; P9BE: vmrgow +; P9LE: lwz +; P9LE: lwz +; P9LE: lwz +; P9LE: lwz +; P9LE: mtvsrdd +; P9LE: mtvsrdd +; P9LE: vmrgow +; P8BE: lwz +; P8BE: lwz +; P8BE: lwz +; P8BE: lwz +; P8BE: mtvsrwz +; P8BE: mtvsrwz +; P8BE: mtvsrwz +; P8BE: mtvsrwz +; P8BE: xxmrghd +; P8BE: xxmrghd +; P8BE: vmrgow +; P8LE: lwz +; P8LE: lwz +; P8LE: lwz +; P8LE: lwz +; P8LE: mtvsrwz +; P8LE: mtvsrwz +; P8LE: mtvsrwz +; P8LE: mtvsrwz +; P8LE: xxmrghd +; P8LE: xxmrghd +; P8LE: vmrgow +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @fromRandMemVarui(i32* nocapture readonly %arr, i32 signext %elem) { +entry: + %add = add nsw i32 %elem, 4 + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0 + %add1 = add nsw i32 %elem, 1 + %idxprom2 = sext i32 %add1 to i64 + %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 %idxprom2 + %1 = load i32, i32* %arrayidx3, align 4 + %vecinit4 = insertelement <4 x i32> %vecinit, i32 %1, i32 1 + %add5 = add nsw i32 %elem, 2 + %idxprom6 = sext i32 %add5 to i64 + %arrayidx7 = getelementptr inbounds i32, i32* %arr, i64 %idxprom6 + %2 = load i32, i32* %arrayidx7, align 4 + %vecinit8 = insertelement <4 x i32> %vecinit4, i32 %2, i32 2 + %add9 = add nsw i32 %elem, 8 + %idxprom10 = sext i32 %add9 to i64 + %arrayidx11 = getelementptr inbounds i32, i32* %arr, i64 %idxprom10 + %3 = load i32, i32* %arrayidx11, align 4 + %vecinit12 = insertelement <4 x i32> %vecinit8, i32 %3, i32 3 + ret <4 x i32> %vecinit12 +; P9BE-LABEL: fromRandMemVarui +; P9LE-LABEL: fromRandMemVarui +; P8BE-LABEL: fromRandMemVarui +; P8LE-LABEL: fromRandMemVarui +; P9BE: sldi r4, r4, 2 +; P9BE: lwz +; P9BE: lwz +; P9BE: lwz +; P9BE: lwz +; P9BE: mtvsrdd +; P9BE: mtvsrdd +; P9BE: vmrgow +; P9LE: sldi r4, r4, 2 +; P9LE: lwz +; P9LE: lwz +; P9LE: lwz +; P9LE: lwz +; P9LE: mtvsrdd +; P9LE: mtvsrdd +; P9LE: vmrgow +; P8BE: sldi r4, r4, 2 +; P8BE: lwz +; P8BE: lwz +; P8BE: lwz +; P8BE: lwz +; P8BE: mtvsrwz +; P8BE: mtvsrwz +; P8BE: mtvsrwz +; P8BE: mtvsrwz +; P8BE: xxmrghd +; P8BE: xxmrghd +; P8BE: vmrgow +; P8LE: sldi r4, r4, 2 +; P8LE: lwz +; P8LE: lwz +; P8LE: lwz +; P8LE: lwz +; P8LE: mtvsrwz +; P8LE: mtvsrwz +; P8LE: mtvsrwz +; P8LE: mtvsrwz +; P8LE: xxmrghd +; P8LE: xxmrghd +; P8LE: vmrgow +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @spltRegValui(i32 zeroext %val) { +entry: + %splat.splatinsert = insertelement <4 x i32> undef, i32 %val, i32 0 + %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %splat.splat +; P9BE-LABEL: spltRegValui +; P9LE-LABEL: spltRegValui +; P8BE-LABEL: spltRegValui +; P8LE-LABEL: spltRegValui +; P9BE: mtvsrws v2, r3 +; P9BE: blr +; P9LE: mtvsrws v2, r3 +; P9LE: blr +; P8BE: mtvsrwz {{[vsf0-9]+}}, r3 +; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1 +; P8BE: blr +; P8LE: mtvsrwz {{[vsf0-9]+}}, r3 +; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @spltMemValui(i32* nocapture readonly %ptr) { +entry: + %0 = load i32, i32* %ptr, align 4 + %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0 + %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %splat.splat +; P9BE-LABEL: spltMemValui +; P9LE-LABEL: spltMemValui +; P8BE-LABEL: spltMemValui +; P8LE-LABEL: spltMemValui +; P9BE: lxvwsx v2, 0, r3 +; P9BE: blr +; P9LE: lxvwsx v2, 0, r3 +; P9LE: blr +; P8BE: lxsiwax {{[vsf0-9]+}}, 0, r3 +; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1 +; P8BE: blr +; P8LE: lxsiwax {{[vsf0-9]+}}, 0, r3 +; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @spltCnstConvftoui() { +entry: + ret <4 x i32> +; P9BE-LABEL: spltCnstConvftoui +; P9LE-LABEL: spltCnstConvftoui +; P8BE-LABEL: spltCnstConvftoui +; P8LE-LABEL: spltCnstConvftoui +; P9BE: vspltisw v2, 4 +; P9BE: blr +; P9LE: vspltisw v2, 4 +; P9LE: blr +; P8BE: vspltisw v2, 4 +; P8BE: blr +; P8LE: vspltisw v2, 4 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @fromRegsConvftoui(float %a, float %b, float %c, float %d) { +entry: + %conv = fptoui float %a to i32 + %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 + %conv1 = fptoui float %b to i32 + %vecinit2 = insertelement <4 x i32> %vecinit, i32 %conv1, i32 1 + %conv3 = fptoui float %c to i32 + %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %conv3, i32 2 + %conv5 = fptoui float %d to i32 + %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %conv5, i32 3 + ret <4 x i32> %vecinit6 +; P9BE-LABEL: fromRegsConvftoui +; P9LE-LABEL: fromRegsConvftoui +; P8BE-LABEL: fromRegsConvftoui +; P8LE-LABEL: fromRegsConvftoui +; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 +; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 +; P9BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P9BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P9BE: vmrgew v2, [[REG3]], [[REG4]] +; P9BE: xvcvspuxws v2, v2 +; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 +; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 +; P9LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P9LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P9LE: vmrgew v2, [[REG4]], [[REG3]] +; P9LE: xvcvspuxws v2, v2 +; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 +; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 +; P8BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P8BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P8BE: vmrgew v2, [[REG3]], [[REG4]] +; P8BE: xvcvspuxws v2, v2 +; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 +; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 +; P8LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P8LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P8LE: vmrgew v2, [[REG4]], [[REG3]] +; P8LE: xvcvspuxws v2, v2 +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @fromDiffConstsConvftoui() { +entry: + ret <4 x i32> +; P9BE-LABEL: fromDiffConstsConvftoui +; P9LE-LABEL: fromDiffConstsConvftoui +; P8BE-LABEL: fromDiffConstsConvftoui +; P8LE-LABEL: fromDiffConstsConvftoui +; P9BE: lxvx +; P9BE: blr +; P9LE: lxvx +; P9LE: blr +; P8BE: lxvw4x +; P8BE: blr +; P8LE: lxvd2x +; P8LE: xxswapd +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @fromDiffMemConsAConvftoui(float* nocapture readonly %ptr) { +entry: + %0 = bitcast float* %ptr to <4 x float>* + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = fptoui <4 x float> %1 to <4 x i32> + ret <4 x i32> %2 +; P9BE-LABEL: fromDiffMemConsAConvftoui +; P9LE-LABEL: fromDiffMemConsAConvftoui +; P8BE-LABEL: fromDiffMemConsAConvftoui +; P8LE-LABEL: fromDiffMemConsAConvftoui +; P9BE: lxvx [[REG1:[vs0-9]+]], 0, r3 +; P9BE: xvcvspuxws v2, [[REG1]] +; P9BE: blr +; P9LE: lxvx [[REG1:[vs0-9]+]], 0, r3 +; P9LE: xvcvspuxws v2, [[REG1]] +; P9LE: blr +; P8BE: lxvw4x [[REG1:[vs0-9]+]], 0, r3 +; P8BE: xvcvspuxws v2, [[REG1]] +; P8BE: blr +; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3 +; P8LE: xxswapd v2, [[REG1]] +; P8LE: xvcvspuxws v2, v2 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @fromDiffMemConsDConvftoui(float* nocapture readonly %ptr) { +entry: + %arrayidx = getelementptr inbounds float, float* %ptr, i64 3 + %0 = load float, float* %arrayidx, align 4 + %conv = fptoui float %0 to i32 + %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 + %arrayidx1 = getelementptr inbounds float, float* %ptr, i64 2 + %1 = load float, float* %arrayidx1, align 4 + %conv2 = fptoui float %1 to i32 + %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1 + %arrayidx4 = getelementptr inbounds float, float* %ptr, i64 1 + %2 = load float, float* %arrayidx4, align 4 + %conv5 = fptoui float %2 to i32 + %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2 + %3 = load float, float* %ptr, align 4 + %conv8 = fptoui float %3 to i32 + %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3 + ret <4 x i32> %vecinit9 +; P9BE-LABEL: fromDiffMemConsDConvftoui +; P9LE-LABEL: fromDiffMemConsDConvftoui +; P8BE-LABEL: fromDiffMemConsDConvftoui +; P8LE-LABEL: fromDiffMemConsDConvftoui +; P9BE: lxvx +; P9BE: lxvx +; P9BE: vperm +; P9BE: xvcvspuxws +; P9BE: blr +; P9LE: lxvx +; P9LE: lxvx +; P9LE: vperm +; P9LE: xvcvspuxws +; P9LE: blr +; P8BE: lxvw4x +; P8BE: lxvw4x +; P8BE: vperm +; P8BE: xvcvspuxws +; P8BE: blr +; P8LE: lxvd2x +; P8LE-DAG: lxvd2x +; P8LE-DAG: xxswapd +; P8LE: xxswapd +; P8LE: vperm +; P8LE: xvcvspuxws +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @fromDiffMemVarAConvftoui(float* nocapture readonly %arr, i32 signext %elem) { +entry: + %idxprom = sext i32 %elem to i64 + %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom + %0 = load float, float* %arrayidx, align 4 + %conv = fptoui float %0 to i32 + %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 + %add = add nsw i32 %elem, 1 + %idxprom1 = sext i32 %add to i64 + %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1 + %1 = load float, float* %arrayidx2, align 4 + %conv3 = fptoui float %1 to i32 + %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1 + %add5 = add nsw i32 %elem, 2 + %idxprom6 = sext i32 %add5 to i64 + %arrayidx7 = getelementptr inbounds float, float* %arr, i64 %idxprom6 + %2 = load float, float* %arrayidx7, align 4 + %conv8 = fptoui float %2 to i32 + %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2 + %add10 = add nsw i32 %elem, 3 + %idxprom11 = sext i32 %add10 to i64 + %arrayidx12 = getelementptr inbounds float, float* %arr, i64 %idxprom11 + %3 = load float, float* %arrayidx12, align 4 + %conv13 = fptoui float %3 to i32 + %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3 + ret <4 x i32> %vecinit14 +; P9BE-LABEL: fromDiffMemVarAConvftoui +; P9LE-LABEL: fromDiffMemVarAConvftoui +; P8BE-LABEL: fromDiffMemVarAConvftoui +; P8LE-LABEL: fromDiffMemVarAConvftoui +; FIXME: implement finding consecutive loads with pre-inc +; P9BE: lfsux +; P9LE: lfsux +; P8BE: lfsux +; P8LE: lfsux +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @fromDiffMemVarDConvftoui(float* nocapture readonly %arr, i32 signext %elem) { +entry: + %idxprom = sext i32 %elem to i64 + %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom + %0 = load float, float* %arrayidx, align 4 + %conv = fptoui float %0 to i32 + %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 + %sub = add nsw i32 %elem, -1 + %idxprom1 = sext i32 %sub to i64 + %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1 + %1 = load float, float* %arrayidx2, align 4 + %conv3 = fptoui float %1 to i32 + %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1 + %sub5 = add nsw i32 %elem, -2 + %idxprom6 = sext i32 %sub5 to i64 + %arrayidx7 = getelementptr inbounds float, float* %arr, i64 %idxprom6 + %2 = load float, float* %arrayidx7, align 4 + %conv8 = fptoui float %2 to i32 + %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2 + %sub10 = add nsw i32 %elem, -3 + %idxprom11 = sext i32 %sub10 to i64 + %arrayidx12 = getelementptr inbounds float, float* %arr, i64 %idxprom11 + %3 = load float, float* %arrayidx12, align 4 + %conv13 = fptoui float %3 to i32 + %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3 + ret <4 x i32> %vecinit14 +; P9BE-LABEL: fromDiffMemVarDConvftoui +; P9LE-LABEL: fromDiffMemVarDConvftoui +; P8BE-LABEL: fromDiffMemVarDConvftoui +; P8LE-LABEL: fromDiffMemVarDConvftoui +; FIXME: implement finding consecutive loads with pre-inc +; P9BE: lfsux +; P9LE: lfsux +; P8BE: lfsux +; P8LE: lfsux +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @spltRegValConvftoui(float %val) { +entry: + %conv = fptoui float %val to i32 + %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 + %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %splat.splat +; P9BE-LABEL: spltRegValConvftoui +; P9LE-LABEL: spltRegValConvftoui +; P8BE-LABEL: spltRegValConvftoui +; P8LE-LABEL: spltRegValConvftoui +; P9BE: xscvdpuxws f[[REG1:[0-9]+]], f1 +; P9BE: xxspltw v2, vs[[REG1]], 1 +; P9BE: blr +; P9LE: xscvdpuxws f[[REG1:[0-9]+]], f1 +; P9LE: xxspltw v2, vs[[REG1]], 1 +; P9LE: blr +; P8BE: xscvdpuxws f[[REG1:[0-9]+]], f1 +; P8BE: xxspltw v2, vs[[REG1]], 1 +; P8BE: blr +; P8LE: xscvdpuxws f[[REG1:[0-9]+]], f1 +; P8LE: xxspltw v2, vs[[REG1]], 1 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @spltMemValConvftoui(float* nocapture readonly %ptr) { +entry: + %0 = load float, float* %ptr, align 4 + %conv = fptoui float %0 to i32 + %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 + %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %splat.splat +; P9BE-LABEL: spltMemValConvftoui +; P9LE-LABEL: spltMemValConvftoui +; P8BE-LABEL: spltMemValConvftoui +; P8LE-LABEL: spltMemValConvftoui +; P9BE: lxvwsx [[REG1:[vs0-9]+]], 0, r3 +; P9BE: xvcvspuxws v2, [[REG1]] +; P9LE: [[REG1:[vs0-9]+]], 0, r3 +; P9LE: xvcvspuxws v2, [[REG1]] +; P8BE: lxsspx [[REG1:f[0-9]+]], 0, r3 +; P8BE: xscvdpuxws f[[REG2:[0-9]+]], [[REG1]] +; P8BE: xxspltw v2, vs[[REG2]], 1 +; P8LE: lxsspx [[REG1:f[0-9]+]], 0, r3 +; P8LE: xscvdpuxws f[[REG2:[vs0-9]+]], [[REG1]] +; P8LE: xxspltw v2, vs[[REG2]], 1 +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @spltCnstConvdtoui() { +entry: + ret <4 x i32> +; P9BE-LABEL: spltCnstConvdtoui +; P9LE-LABEL: spltCnstConvdtoui +; P8BE-LABEL: spltCnstConvdtoui +; P8LE-LABEL: spltCnstConvdtoui +; P9BE: vspltisw v2, 4 +; P9BE: blr +; P9LE: vspltisw v2, 4 +; P9LE: blr +; P8BE: vspltisw v2, 4 +; P8BE: blr +; P8LE: vspltisw v2, 4 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @fromRegsConvdtoui(double %a, double %b, double %c, double %d) { +entry: + %conv = fptoui double %a to i32 + %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 + %conv1 = fptoui double %b to i32 + %vecinit2 = insertelement <4 x i32> %vecinit, i32 %conv1, i32 1 + %conv3 = fptoui double %c to i32 + %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %conv3, i32 2 + %conv5 = fptoui double %d to i32 + %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %conv5, i32 3 + ret <4 x i32> %vecinit6 +; P9BE-LABEL: fromRegsConvdtoui +; P9LE-LABEL: fromRegsConvdtoui +; P8BE-LABEL: fromRegsConvdtoui +; P8LE-LABEL: fromRegsConvdtoui +; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 +; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 +; P9BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P9BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P9BE: vmrgew v2, [[REG3]], [[REG4]] +; P9BE: xvcvspuxws v2, v2 +; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 +; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 +; P9LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P9LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P9LE: vmrgew v2, [[REG4]], [[REG3]] +; P9LE: xvcvspuxws v2, v2 +; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 +; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 +; P8BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P8BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P8BE: vmrgew v2, [[REG3]], [[REG4]] +; P8BE: xvcvspuxws v2, v2 +; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 +; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 +; P8LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P8LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P8LE: vmrgew v2, [[REG4]], [[REG3]] +; P8LE: xvcvspuxws v2, v2 +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @fromDiffConstsConvdtoui() { +entry: + ret <4 x i32> +; P9BE-LABEL: fromDiffConstsConvdtoui +; P9LE-LABEL: fromDiffConstsConvdtoui +; P8BE-LABEL: fromDiffConstsConvdtoui +; P8LE-LABEL: fromDiffConstsConvdtoui +; P9BE: lxvx +; P9BE: blr +; P9LE: lxvx +; P9LE: blr +; P8BE: lxvw4x +; P8BE: blr +; P8LE: lxvd2x +; P8LE: xxswapd +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @fromDiffMemConsAConvdtoui(double* nocapture readonly %ptr) { +entry: + %0 = bitcast double* %ptr to <2 x double>* + %1 = load <2 x double>, <2 x double>* %0, align 8 + %2 = fptoui <2 x double> %1 to <2 x i32> + %arrayidx4 = getelementptr inbounds double, double* %ptr, i64 2 + %3 = bitcast double* %arrayidx4 to <2 x double>* + %4 = load <2 x double>, <2 x double>* %3, align 8 + %5 = fptoui <2 x double> %4 to <2 x i32> + %vecinit9 = shufflevector <2 x i32> %2, <2 x i32> %5, <4 x i32> + ret <4 x i32> %vecinit9 +; P9BE-LABEL: fromDiffMemConsAConvdtoui +; P9LE-LABEL: fromDiffMemConsAConvdtoui +; P8BE-LABEL: fromDiffMemConsAConvdtoui +; P8LE-LABEL: fromDiffMemConsAConvdtoui +; P9BE: lxvx [[REG1:[vs0-9]+]], 0, r3 +; P9BE: lxvx [[REG2:[vs0-9]+]], r3, r4 +; P9BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]] +; P9BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]] +; P9BE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]] +; P9BE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]] +; P9BE: vmrgew v2, [[REG6]], [[REG5]] +; P9BE: xvcvspuxws v2, v2 +; P9LE: lxvx [[REG1:[vs0-9]+]], 0, r3 +; P9LE: lxvx [[REG2:[vs0-9]+]], r3, r4 +; P9LE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG2]], [[REG1]] +; P9LE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG2]], [[REG1]] +; P9LE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]] +; P9LE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]] +; P9LE: vmrgew v2, [[REG6]], [[REG5]] +; P9LE: xvcvspuxws v2, v2 +; P8BE: lxvd2x [[REG1:[vs0-9]+]], 0, r3 +; P8BE: lxvd2x [[REG2:[vs0-9]+]], r3, r4 +; P8BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]] +; P8BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]] +; P8BE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]] +; P8BE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]] +; P8BE: vmrgew v2, [[REG6]], [[REG5]] +; P8BE: xvcvspuxws v2, v2 +; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3 +; P8LE: lxvd2x [[REG2:[vs0-9]+]], r3, r4 +; P8LE-DAG: xxswapd [[REG3:[vs0-9]+]], [[REG1]] +; P8LE-DAG: xxswapd [[REG4:[vs0-9]+]], [[REG2]] +; P8LE-DAG: xxmrgld [[REG5:[vs0-9]+]], [[REG4]], [[REG3]] +; P8LE-DAG: xxmrghd [[REG6:[vs0-9]+]], [[REG4]], [[REG3]] +; P8LE-DAG: xvcvdpsp [[REG7:[vs0-9]+]], [[REG5]] +; P8LE-DAG: xvcvdpsp [[REG8:[vs0-9]+]], [[REG6]] +; P8LE: vmrgew v2, [[REG8]], [[REG7]] +; P8LE: xvcvspuxws v2, v2 +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @fromDiffMemConsDConvdtoui(double* nocapture readonly %ptr) { +entry: + %arrayidx = getelementptr inbounds double, double* %ptr, i64 3 + %0 = load double, double* %arrayidx, align 8 + %conv = fptoui double %0 to i32 + %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 + %arrayidx1 = getelementptr inbounds double, double* %ptr, i64 2 + %1 = load double, double* %arrayidx1, align 8 + %conv2 = fptoui double %1 to i32 + %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1 + %arrayidx4 = getelementptr inbounds double, double* %ptr, i64 1 + %2 = load double, double* %arrayidx4, align 8 + %conv5 = fptoui double %2 to i32 + %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2 + %3 = load double, double* %ptr, align 8 + %conv8 = fptoui double %3 to i32 + %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3 + ret <4 x i32> %vecinit9 +; P9BE-LABEL: fromDiffMemConsDConvdtoui +; P9LE-LABEL: fromDiffMemConsDConvdtoui +; P8BE-LABEL: fromDiffMemConsDConvdtoui +; P8LE-LABEL: fromDiffMemConsDConvdtoui +; P9BE: lfd +; P9BE: lfd +; P9BE: lfd +; P9BE: lfd +; P9BE: xxmrghd +; P9BE: xxmrghd +; P9BE: xvcvdpsp +; P9BE: xvcvdpsp +; P9BE: vmrgew +; P9BE: xvcvspuxws v2 +; P9LE: lfd +; P9LE: lfd +; P9LE: lfd +; P9LE: lfd +; P9LE: xxmrghd +; P9LE: xxmrghd +; P9LE: xvcvdpsp +; P9LE: xvcvdpsp +; P9LE: vmrgew +; P9LE: xvcvspuxws v2 +; P8BE: lxsdx +; P8BE: lxsdx +; P8BE: lxsdx +; P8BE: lxsdx +; P8BE: xxmrghd +; P8BE: xxmrghd +; P8BE: xvcvdpsp +; P8BE: xvcvdpsp +; P8BE: vmrgew +; P8BE: xvcvspuxws v2 +; P8LE: lxsdx +; P8LE: lxsdx +; P8LE: lxsdx +; P8LE: lxsdx +; P8LE: xxmrghd +; P8LE: xxmrghd +; P8LE: xvcvdpsp +; P8LE: xvcvdpsp +; P8LE: vmrgew +; P8LE: xvcvspuxws v2 +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @fromDiffMemVarAConvdtoui(double* nocapture readonly %arr, i32 signext %elem) { +entry: + %idxprom = sext i32 %elem to i64 + %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom + %0 = load double, double* %arrayidx, align 8 + %conv = fptoui double %0 to i32 + %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 + %add = add nsw i32 %elem, 1 + %idxprom1 = sext i32 %add to i64 + %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1 + %1 = load double, double* %arrayidx2, align 8 + %conv3 = fptoui double %1 to i32 + %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1 + %add5 = add nsw i32 %elem, 2 + %idxprom6 = sext i32 %add5 to i64 + %arrayidx7 = getelementptr inbounds double, double* %arr, i64 %idxprom6 + %2 = load double, double* %arrayidx7, align 8 + %conv8 = fptoui double %2 to i32 + %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2 + %add10 = add nsw i32 %elem, 3 + %idxprom11 = sext i32 %add10 to i64 + %arrayidx12 = getelementptr inbounds double, double* %arr, i64 %idxprom11 + %3 = load double, double* %arrayidx12, align 8 + %conv13 = fptoui double %3 to i32 + %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3 + ret <4 x i32> %vecinit14 +; P9BE-LABEL: fromDiffMemVarAConvdtoui +; P9LE-LABEL: fromDiffMemVarAConvdtoui +; P8BE-LABEL: fromDiffMemVarAConvdtoui +; P8LE-LABEL: fromDiffMemVarAConvdtoui +; P9BE: lfdux +; P9BE: lfd +; P9BE: lfd +; P9BE: lfd +; P9BE: xxmrghd +; P9BE: xxmrghd +; P9BE: xvcvdpsp +; P9BE: xvcvdpsp +; P9BE: vmrgew +; P9BE: xvcvspuxws v2 +; P9LE: lfdux +; P9LE: lfd +; P9LE: lfd +; P9LE: lfd +; P9LE: xxmrghd +; P9LE: xxmrghd +; P9LE: xvcvdpsp +; P9LE: xvcvdpsp +; P9LE: vmrgew +; P9LE: xvcvspuxws v2 +; P8BE: lfdux +; P8BE: lxsdx +; P8BE: lxsdx +; P8BE: lxsdx +; P8BE: xxmrghd +; P8BE: xxmrghd +; P8BE: xvcvdpsp +; P8BE: xvcvdpsp +; P8BE: vmrgew +; P8BE: xvcvspuxws v2 +; P8LE: lfdux +; P8LE: lxsdx +; P8LE: lxsdx +; P8LE: lxsdx +; P8LE: xxmrghd +; P8LE: xxmrghd +; P8LE: xvcvdpsp +; P8LE: xvcvdpsp +; P8LE: vmrgew +; P8LE: xvcvspuxws v2 +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @fromDiffMemVarDConvdtoui(double* nocapture readonly %arr, i32 signext %elem) { +entry: + %idxprom = sext i32 %elem to i64 + %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom + %0 = load double, double* %arrayidx, align 8 + %conv = fptoui double %0 to i32 + %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 + %sub = add nsw i32 %elem, -1 + %idxprom1 = sext i32 %sub to i64 + %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1 + %1 = load double, double* %arrayidx2, align 8 + %conv3 = fptoui double %1 to i32 + %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1 + %sub5 = add nsw i32 %elem, -2 + %idxprom6 = sext i32 %sub5 to i64 + %arrayidx7 = getelementptr inbounds double, double* %arr, i64 %idxprom6 + %2 = load double, double* %arrayidx7, align 8 + %conv8 = fptoui double %2 to i32 + %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2 + %sub10 = add nsw i32 %elem, -3 + %idxprom11 = sext i32 %sub10 to i64 + %arrayidx12 = getelementptr inbounds double, double* %arr, i64 %idxprom11 + %3 = load double, double* %arrayidx12, align 8 + %conv13 = fptoui double %3 to i32 + %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3 + ret <4 x i32> %vecinit14 +; P9BE-LABEL: fromDiffMemVarDConvdtoui +; P9LE-LABEL: fromDiffMemVarDConvdtoui +; P8BE-LABEL: fromDiffMemVarDConvdtoui +; P8LE-LABEL: fromDiffMemVarDConvdtoui +; P9BE: lfdux +; P9BE: lfd +; P9BE: lfd +; P9BE: lfd +; P9BE: xxmrghd +; P9BE: xxmrghd +; P9BE: xvcvdpsp +; P9BE: xvcvdpsp +; P9BE: vmrgew +; P9BE: xvcvspuxws v2 +; P9LE: lfdux +; P9LE: lfd +; P9LE: lfd +; P9LE: lfd +; P9LE: xxmrghd +; P9LE: xxmrghd +; P9LE: xvcvdpsp +; P9LE: xvcvdpsp +; P9LE: vmrgew +; P9LE: xvcvspuxws v2 +; P8BE: lfdux +; P8BE: lxsdx +; P8BE: lxsdx +; P8BE: lxsdx +; P8BE: xxmrghd +; P8BE: xxmrghd +; P8BE: xvcvdpsp +; P8BE: xvcvdpsp +; P8BE: vmrgew +; P8BE: xvcvspuxws v2 +; P8LE: lfdux +; P8LE: lxsdx +; P8LE: lxsdx +; P8LE: lxsdx +; P8LE: xxmrghd +; P8LE: xxmrghd +; P8LE: xvcvdpsp +; P8LE: xvcvdpsp +; P8LE: vmrgew +; P8LE: xvcvspuxws v2 +} + +; Function Attrs: norecurse nounwind readnone +define <4 x i32> @spltRegValConvdtoui(double %val) { +entry: + %conv = fptoui double %val to i32 + %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 + %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %splat.splat +; P9BE-LABEL: spltRegValConvdtoui +; P9LE-LABEL: spltRegValConvdtoui +; P8BE-LABEL: spltRegValConvdtoui +; P8LE-LABEL: spltRegValConvdtoui +; P9BE: xscvdpuxws +; P9BE: xxspltw +; P9BE: blr +; P9LE: xscvdpuxws +; P9LE: xxspltw +; P9LE: blr +; P8BE: xscvdpuxws +; P8BE: xxspltw +; P8BE: blr +; P8LE: xscvdpuxws +; P8LE: xxspltw +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @spltMemValConvdtoui(double* nocapture readonly %ptr) { +entry: + %0 = load double, double* %ptr, align 8 + %conv = fptoui double %0 to i32 + %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 + %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %splat.splat +; P9BE-LABEL: spltMemValConvdtoui +; P9LE-LABEL: spltMemValConvdtoui +; P8BE-LABEL: spltMemValConvdtoui +; P8LE-LABEL: spltMemValConvdtoui +; P9BE: lfd +; P9BE: xscvdpuxws +; P9BE: xxspltw +; P9BE: blr +; P9LE: lfd +; P9LE: xscvdpuxws +; P9LE: xxspltw +; P9LE: blr +; P8BE: lxsdx +; P8BE: xscvdpuxws +; P8BE: xxspltw +; P8BE: blr +; P8LE: lxsdx +; P8LE: xscvdpuxws +; P8LE: xxspltw +; P8LE: blr +} +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @allZeroll() { +entry: + ret <2 x i64> zeroinitializer +; P9BE-LABEL: allZeroll +; P9LE-LABEL: allZeroll +; P8BE-LABEL: allZeroll +; P8LE-LABEL: allZeroll +; P9BE: xxlxor v2, v2, v2 +; P9BE: blr +; P9LE: xxlxor v2, v2, v2 +; P9LE: blr +; P8BE: xxlxor v2, v2, v2 +; P8BE: blr +; P8LE: xxlxor v2, v2, v2 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @allOnell() { +entry: + ret <2 x i64> +; P9BE-LABEL: allOnell +; P9LE-LABEL: allOnell +; P8BE-LABEL: allOnell +; P8LE-LABEL: allOnell +; P9BE: xxspltib v2, 255 +; P9BE: blr +; P9LE: xxspltib v2, 255 +; P9LE: blr +; P8BE: vspltisb v2, -1 +; P8BE: blr +; P8LE: vspltisb v2, -1 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @spltConst1ll() { +entry: + ret <2 x i64> +; P9BE-LABEL: spltConst1ll +; P9LE-LABEL: spltConst1ll +; P8BE-LABEL: spltConst1ll +; P8LE-LABEL: spltConst1ll +; P9BE: lxvx +; P9BE: blr +; P9LE: lxvx +; P9LE: blr +; P8BE: lxvd2x +; P8BE: blr +; P8LE: lxvd2x +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @spltConst16kll() { +entry: + ret <2 x i64> +; P9BE-LABEL: spltConst16kll +; P9LE-LABEL: spltConst16kll +; P8BE-LABEL: spltConst16kll +; P8LE-LABEL: spltConst16kll +; P9BE: lxvx +; P9BE: blr +; P9LE: lxvx +; P9LE: blr +; P8BE: lxvd2x +; P8BE: blr +; P8LE: lxvd2x +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @spltConst32kll() { +entry: + ret <2 x i64> +; P9BE-LABEL: spltConst32kll +; P9LE-LABEL: spltConst32kll +; P8BE-LABEL: spltConst32kll +; P8LE-LABEL: spltConst32kll +; P9BE: lxvx +; P9BE: blr +; P9LE: lxvx +; P9LE: blr +; P8BE: lxvd2x +; P8BE: blr +; P8LE: lxvd2x +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @fromRegsll(i64 %a, i64 %b) { +entry: + %vecinit = insertelement <2 x i64> undef, i64 %a, i32 0 + %vecinit1 = insertelement <2 x i64> %vecinit, i64 %b, i32 1 + ret <2 x i64> %vecinit1 +; P9BE-LABEL: fromRegsll +; P9LE-LABEL: fromRegsll +; P8BE-LABEL: fromRegsll +; P8LE-LABEL: fromRegsll +; P9BE: mtvsrdd v2, r3, r4 +; P9BE: blr +; P9LE: mtvsrdd v2, r4, r3 +; P9LE: blr +; P8BE-DAG: mtvsrd {{[vsf0-9]+}}, r3 +; P8BE-DAG: mtvsrd {{[vsf0-9]+}}, r4 +; P8BE: xxmrghd v2 +; P8BE: blr +; P8LE-DAG: mtvsrd {{[vsf0-9]+}}, r3 +; P8LE-DAG: mtvsrd {{[vsf0-9]+}}, r4 +; P8LE: xxmrghd v2 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @fromDiffConstsll() { +entry: + ret <2 x i64> +; P9BE-LABEL: fromDiffConstsll +; P9LE-LABEL: fromDiffConstsll +; P8BE-LABEL: fromDiffConstsll +; P8LE-LABEL: fromDiffConstsll +; P9BE: lxvx +; P9BE: blr +; P9LE: lxvx +; P9LE: blr +; P8BE: lxvd2x +; P8BE: blr +; P8LE: lxvd2x +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @fromDiffMemConsAll(i64* nocapture readonly %arr) { +entry: + %0 = load i64, i64* %arr, align 8 + %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0 + %arrayidx1 = getelementptr inbounds i64, i64* %arr, i64 1 + %1 = load i64, i64* %arrayidx1, align 8 + %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 + ret <2 x i64> %vecinit2 +; P9BE-LABEL: fromDiffMemConsAll +; P9LE-LABEL: fromDiffMemConsAll +; P8BE-LABEL: fromDiffMemConsAll +; P8LE-LABEL: fromDiffMemConsAll +; P9BE: lxvx v2 +; P9BE: blr +; P9LE: lxvx v2 +; P9LE: blr +; P8BE: lxvd2x v2 +; P8BE: blr +; P8LE: lxvd2x +; P8LE: xxswapd v2 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @fromDiffMemConsDll(i64* nocapture readonly %arr) { +entry: + %arrayidx = getelementptr inbounds i64, i64* %arr, i64 3 + %0 = load i64, i64* %arrayidx, align 8 + %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0 + %arrayidx1 = getelementptr inbounds i64, i64* %arr, i64 2 + %1 = load i64, i64* %arrayidx1, align 8 + %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 + ret <2 x i64> %vecinit2 +; P9BE-LABEL: fromDiffMemConsDll +; P9LE-LABEL: fromDiffMemConsDll +; P8BE-LABEL: fromDiffMemConsDll +; P8LE-LABEL: fromDiffMemConsDll +; P9BE: lxvx v2 +; P9BE: blr +; P9LE: lxvx +; P9LE: xxswapd v2 +; P9LE: blr +; P8BE: lxvd2x +; P8BE: xxswapd v2 +; P8BE-NEXT: blr +; P8LE: lxvd2x v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @fromDiffMemVarAll(i64* nocapture readonly %arr, i32 signext %elem) { +entry: + %idxprom = sext i32 %elem to i64 + %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom + %0 = load i64, i64* %arrayidx, align 8 + %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0 + %add = add nsw i32 %elem, 1 + %idxprom1 = sext i32 %add to i64 + %arrayidx2 = getelementptr inbounds i64, i64* %arr, i64 %idxprom1 + %1 = load i64, i64* %arrayidx2, align 8 + %vecinit3 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 + ret <2 x i64> %vecinit3 +; P9BE-LABEL: fromDiffMemVarAll +; P9LE-LABEL: fromDiffMemVarAll +; P8BE-LABEL: fromDiffMemVarAll +; P8LE-LABEL: fromDiffMemVarAll +; P9BE: sldi +; P9BE: lxvx v2 +; P9BE-NEXT: blr +; P9LE: sldi +; P9LE: lxvx v2 +; P9LE-NEXT: blr +; P8BE: sldi +; P8BE: lxvd2x v2 +; P8BE-NEXT: blr +; P8LE: sldi +; P8LE: lxvd2x +; P8LE: xxswapd v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @fromDiffMemVarDll(i64* nocapture readonly %arr, i32 signext %elem) { +entry: + %idxprom = sext i32 %elem to i64 + %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom + %0 = load i64, i64* %arrayidx, align 8 + %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0 + %sub = add nsw i32 %elem, -1 + %idxprom1 = sext i32 %sub to i64 + %arrayidx2 = getelementptr inbounds i64, i64* %arr, i64 %idxprom1 + %1 = load i64, i64* %arrayidx2, align 8 + %vecinit3 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 + ret <2 x i64> %vecinit3 +; P9BE-LABEL: fromDiffMemVarDll +; P9LE-LABEL: fromDiffMemVarDll +; P8BE-LABEL: fromDiffMemVarDll +; P8LE-LABEL: fromDiffMemVarDll +; P9BE: sldi +; P9BE: lxvx +; P9BE: xxswapd v2 +; P9BE-NEXT: blr +; P9LE: sldi +; P9LE: lxvx +; P9LE: xxswapd v2 +; P9LE-NEXT: blr +; P8BE: sldi +; P8BE: lxvd2x +; P8BE: xxswapd v2 +; P8BE-NEXT: blr +; P8LE: sldi +; P8LE: lxvd2x v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @fromRandMemConsll(i64* nocapture readonly %arr) { +entry: + %arrayidx = getelementptr inbounds i64, i64* %arr, i64 4 + %0 = load i64, i64* %arrayidx, align 8 + %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0 + %arrayidx1 = getelementptr inbounds i64, i64* %arr, i64 18 + %1 = load i64, i64* %arrayidx1, align 8 + %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 + ret <2 x i64> %vecinit2 +; P9BE-LABEL: fromRandMemConsll +; P9LE-LABEL: fromRandMemConsll +; P8BE-LABEL: fromRandMemConsll +; P8LE-LABEL: fromRandMemConsll +; P9BE: ld +; P9BE: ld +; P9BE: mtvsrdd v2 +; P9BE-NEXT: blr +; P9LE: ld +; P9LE: ld +; P9LE: mtvsrdd v2 +; P9LE-NEXT: blr +; P8BE: ld +; P8BE: ld +; P8BE-DAG: mtvsrd +; P8BE-DAG: mtvsrd +; P8BE: xxmrghd v2 +; P8BE-NEXT: blr +; P8LE: ld +; P8LE: ld +; P8LE-DAG: mtvsrd +; P8LE-DAG: mtvsrd +; P8LE: xxmrghd v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @fromRandMemVarll(i64* nocapture readonly %arr, i32 signext %elem) { +entry: + %add = add nsw i32 %elem, 4 + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom + %0 = load i64, i64* %arrayidx, align 8 + %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0 + %add1 = add nsw i32 %elem, 1 + %idxprom2 = sext i32 %add1 to i64 + %arrayidx3 = getelementptr inbounds i64, i64* %arr, i64 %idxprom2 + %1 = load i64, i64* %arrayidx3, align 8 + %vecinit4 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 + ret <2 x i64> %vecinit4 +; P9BE-LABEL: fromRandMemVarll +; P9LE-LABEL: fromRandMemVarll +; P8BE-LABEL: fromRandMemVarll +; P8LE-LABEL: fromRandMemVarll +; P9BE: sldi +; P9BE: ld +; P9BE: ld +; P9BE: mtvsrdd v2 +; P9BE-NEXT: blr +; P9LE: sldi +; P9LE: ld +; P9LE: ld +; P9LE: mtvsrdd v2 +; P9LE-NEXT: blr +; P8BE: sldi +; P8BE: ld +; P8BE: ld +; P8BE: mtvsrd +; P8BE: mtvsrd +; P8BE: xxmrghd v2 +; P8BE-NEXT: blr +; P8LE: sldi +; P8LE: ld +; P8LE: ld +; P8LE: mtvsrd +; P8LE: mtvsrd +; P8LE: xxmrghd v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @spltRegValll(i64 %val) { +entry: + %splat.splatinsert = insertelement <2 x i64> undef, i64 %val, i32 0 + %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %splat.splat +; P9BE-LABEL: spltRegValll +; P9LE-LABEL: spltRegValll +; P8BE-LABEL: spltRegValll +; P8LE-LABEL: spltRegValll +; P9BE: mtvsrdd v2, r3, r3 +; P9BE-NEXT: blr +; P9LE: mtvsrdd v2, r3, r3 +; P9LE-NEXT: blr +; P8BE: mtvsrd {{[vsf]+}}[[REG1:[0-9]+]], r3 +; P8BE: xxspltd v2, {{[vsf]+}}[[REG1]], 0 +; P8BE-NEXT: blr +; P8LE: mtvsrd {{[vsf]+}}[[REG1:[0-9]+]], r3 +; P8LE: xxspltd v2, {{[vsf]+}}[[REG1]], 0 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @spltMemValll(i64* nocapture readonly %ptr) { +entry: + %0 = load i64, i64* %ptr, align 8 + %splat.splatinsert = insertelement <2 x i64> undef, i64 %0, i32 0 + %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %splat.splat +; P9BE-LABEL: spltMemValll +; P9LE-LABEL: spltMemValll +; P8BE-LABEL: spltMemValll +; P8LE-LABEL: spltMemValll +; P9BE: lxvdsx v2 +; P9BE-NEXT: blr +; P9LE: lxvdsx v2 +; P9LE-NEXT: blr +; P8BE: lxvdsx v2 +; P8BE-NEXT: blr +; P8LE: lxvdsx v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @spltCnstConvftoll() { +entry: + ret <2 x i64> +; P9BE-LABEL: spltCnstConvftoll +; P9LE-LABEL: spltCnstConvftoll +; P8BE-LABEL: spltCnstConvftoll +; P8LE-LABEL: spltCnstConvftoll +; P9BE: lxvx +; P9BE: blr +; P9LE: lxvx +; P9LE: blr +; P8BE: lxvd2x +; P8BE: blr +; P8LE: lxvd2x +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @fromRegsConvftoll(float %a, float %b) { +entry: + %conv = fptosi float %a to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %conv1 = fptosi float %b to i64 + %vecinit2 = insertelement <2 x i64> %vecinit, i64 %conv1, i32 1 + ret <2 x i64> %vecinit2 +; P9BE-LABEL: fromRegsConvftoll +; P9LE-LABEL: fromRegsConvftoll +; P8BE-LABEL: fromRegsConvftoll +; P8LE-LABEL: fromRegsConvftoll +; P9BE: xxmrghd +; P9BE: xvcvdpsxds v2 +; P9BE-NEXT: blr +; P9LE: xxmrghd +; P9LE: xvcvdpsxds v2 +; P9LE-NEXT: blr +; P8BE: xxmrghd +; P8BE: xvcvdpsxds v2 +; P8BE-NEXT: blr +; P8LE: xxmrghd +; P8LE: xvcvdpsxds v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @fromDiffConstsConvftoll() { +entry: + ret <2 x i64> +; P9BE-LABEL: fromDiffConstsConvftoll +; P9LE-LABEL: fromDiffConstsConvftoll +; P8BE-LABEL: fromDiffConstsConvftoll +; P8LE-LABEL: fromDiffConstsConvftoll +; P9BE: lxvx v2 +; P9BE: blr +; P9LE: lxvx v2 +; P9LE: blr +; P8BE: lxvd2x v2 +; P8BE: blr +; P8LE: lxvd2x +; P8LE: xxswapd v2 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @fromDiffMemConsAConvftoll(float* nocapture readonly %ptr) { +entry: + %0 = load float, float* %ptr, align 4 + %conv = fptosi float %0 to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %arrayidx1 = getelementptr inbounds float, float* %ptr, i64 1 + %1 = load float, float* %arrayidx1, align 4 + %conv2 = fptosi float %1 to i64 + %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 + ret <2 x i64> %vecinit3 +; P9BE-LABEL: fromDiffMemConsAConvftoll +; P9LE-LABEL: fromDiffMemConsAConvftoll +; P8BE-LABEL: fromDiffMemConsAConvftoll +; P8LE-LABEL: fromDiffMemConsAConvftoll +; P9BE: lfs +; P9BE: lfs +; P9BE: xxmrghd +; P9BE-NEXT: xvcvdpsxds v2 +; P9BE-NEXT: blr +; P9LE: lfs +; P9LE: lfs +; P9LE: xxmrghd +; P9LE-NEXT: xvcvdpsxds v2 +; P9LE-NEXT: blr +; P8BE: lxsspx +; P8BE: lxsspx +; P8BE: xxmrghd +; P8BE-NEXT: xvcvdpsxds v2 +; P8BE-NEXT: blr +; P8LE: lxsspx +; P8LE: lxsspx +; P8LE: xxmrghd +; P8LE-NEXT: xvcvdpsxds v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @fromDiffMemConsDConvftoll(float* nocapture readonly %ptr) { +entry: + %arrayidx = getelementptr inbounds float, float* %ptr, i64 3 + %0 = load float, float* %arrayidx, align 4 + %conv = fptosi float %0 to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %arrayidx1 = getelementptr inbounds float, float* %ptr, i64 2 + %1 = load float, float* %arrayidx1, align 4 + %conv2 = fptosi float %1 to i64 + %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 + ret <2 x i64> %vecinit3 +; P9BE-LABEL: fromDiffMemConsDConvftoll +; P9LE-LABEL: fromDiffMemConsDConvftoll +; P8BE-LABEL: fromDiffMemConsDConvftoll +; P8LE-LABEL: fromDiffMemConsDConvftoll +; P9BE: lfs +; P9BE: lfs +; P9BE: xxmrghd +; P9BE-NEXT: xvcvdpsxds v2 +; P9BE-NEXT: blr +; P9LE: lfs +; P9LE: lfs +; P9LE: xxmrghd +; P9LE-NEXT: xvcvdpsxds v2 +; P9LE-NEXT: blr +; P8BE: lxsspx +; P8BE: lxsspx +; P8BE: xxmrghd +; P8BE-NEXT: xvcvdpsxds v2 +; P8BE-NEXT: blr +; P8LE: lxsspx +; P8LE: lxsspx +; P8LE: xxmrghd +; P8LE-NEXT: xvcvdpsxds v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @fromDiffMemVarAConvftoll(float* nocapture readonly %arr, i32 signext %elem) { +entry: + %idxprom = sext i32 %elem to i64 + %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom + %0 = load float, float* %arrayidx, align 4 + %conv = fptosi float %0 to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %add = add nsw i32 %elem, 1 + %idxprom1 = sext i32 %add to i64 + %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1 + %1 = load float, float* %arrayidx2, align 4 + %conv3 = fptosi float %1 to i64 + %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1 + ret <2 x i64> %vecinit4 +; P9BE-LABEL: fromDiffMemVarAConvftoll +; P9LE-LABEL: fromDiffMemVarAConvftoll +; P8BE-LABEL: fromDiffMemVarAConvftoll +; P8LE-LABEL: fromDiffMemVarAConvftoll +; P9BE: sldi +; P9BE: lfsux +; P9BE: lfs +; P9BE: xxmrghd +; P9BE-NEXT: xvcvdpsxds v2 +; P9BE-NEXT: blr +; P9LE: sldi +; P9LE: lfsux +; P9LE: lfs +; P9LE: xxmrghd +; P9LE-NEXT: xvcvdpsxds v2 +; P9LE-NEXT: blr +; P8BE: sldi +; P8BE: lfsux +; P8BE: lxsspx +; P8BE: xxmrghd +; P8BE-NEXT: xvcvdpsxds v2 +; P8BE-NEXT: blr +; P8LE: sldi +; P8LE: lfsux +; P8LE: lxsspx +; P8LE: xxmrghd +; P8LE-NEXT: xvcvdpsxds v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @fromDiffMemVarDConvftoll(float* nocapture readonly %arr, i32 signext %elem) { +entry: + %idxprom = sext i32 %elem to i64 + %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom + %0 = load float, float* %arrayidx, align 4 + %conv = fptosi float %0 to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %sub = add nsw i32 %elem, -1 + %idxprom1 = sext i32 %sub to i64 + %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1 + %1 = load float, float* %arrayidx2, align 4 + %conv3 = fptosi float %1 to i64 + %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1 + ret <2 x i64> %vecinit4 +; P9BE-LABEL: fromDiffMemVarDConvftoll +; P9LE-LABEL: fromDiffMemVarDConvftoll +; P8BE-LABEL: fromDiffMemVarDConvftoll +; P8LE-LABEL: fromDiffMemVarDConvftoll +; P9BE: sldi +; P9BE: lfsux +; P9BE: lfs +; P9BE: xxmrghd +; P9BE-NEXT: xvcvdpsxds v2 +; P9BE-NEXT: blr +; P9LE: sldi +; P9LE: lfsux +; P9LE: lfs +; P9LE: xxmrghd +; P9LE-NEXT: xvcvdpsxds v2 +; P9LE-NEXT: blr +; P8BE: sldi +; P8BE: lfsux +; P8BE: lxsspx +; P8BE: xxmrghd +; P8BE-NEXT: xvcvdpsxds v2 +; P8BE-NEXT: blr +; P8LE: sldi +; P8LE: lfsux +; P8LE: lxsspx +; P8LE: xxmrghd +; P8LE-NEXT: xvcvdpsxds v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @spltRegValConvftoll(float %val) { +entry: + %conv = fptosi float %val to i64 + %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 + %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %splat.splat +; P9BE-LABEL: spltRegValConvftoll +; P9LE-LABEL: spltRegValConvftoll +; P8BE-LABEL: spltRegValConvftoll +; P8LE-LABEL: spltRegValConvftoll +; P9BE: xscvdpsxds +; P9BE-NEXT: xxspltd v2 +; P9BE-NEXT: blr +; P9LE: xscvdpsxds +; P9LE-NEXT: xxspltd v2 +; P9LE-NEXT: blr +; P8BE: xscvdpsxds +; P8BE-NEXT: xxspltd v2 +; P8BE-NEXT: blr +; P8LE: xscvdpsxds +; P8LE-NEXT: xxspltd v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @spltMemValConvftoll(float* nocapture readonly %ptr) { +entry: + %0 = load float, float* %ptr, align 4 + %conv = fptosi float %0 to i64 + %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 + %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %splat.splat +; P9BE-LABEL: spltMemValConvftoll +; P9LE-LABEL: spltMemValConvftoll +; P8BE-LABEL: spltMemValConvftoll +; P8LE-LABEL: spltMemValConvftoll +; P9BE: lfs +; P9BE-NEXT: xscvdpsxds +; P9BE-NEXT: xxspltd v2 +; P9BE-NEXT: blr +; P9LE: lfs +; P9LE-NEXT: xscvdpsxds +; P9LE-NEXT: xxspltd v2 +; P9LE-NEXT: blr +; P8BE: lxsspx +; P8BE-NEXT: xscvdpsxds +; P8BE-NEXT: xxspltd v2 +; P8BE-NEXT: blr +; P8LE: lxsspx +; P8LE-NEXT: xscvdpsxds +; P8LE-NEXT: xxspltd v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @spltCnstConvdtoll() { +entry: + ret <2 x i64> +; P9BE-LABEL: spltCnstConvdtoll +; P9LE-LABEL: spltCnstConvdtoll +; P8BE-LABEL: spltCnstConvdtoll +; P8LE-LABEL: spltCnstConvdtoll +; P9BE: lxvx +; P9BE: blr +; P9LE: lxvx +; P9LE: blr +; P8BE: lxvd2x +; P8BE: blr +; P8LE: lxvd2x +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @fromRegsConvdtoll(double %a, double %b) { +entry: + %conv = fptosi double %a to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %conv1 = fptosi double %b to i64 + %vecinit2 = insertelement <2 x i64> %vecinit, i64 %conv1, i32 1 + ret <2 x i64> %vecinit2 +; P9BE-LABEL: fromRegsConvdtoll +; P9LE-LABEL: fromRegsConvdtoll +; P8BE-LABEL: fromRegsConvdtoll +; P8LE-LABEL: fromRegsConvdtoll +; P9BE: xxmrghd +; P9BE-NEXT: xvcvdpsxds +; P9BE-NEXT: blr +; P9LE: xxmrghd +; P9LE-NEXT: xvcvdpsxds +; P9LE-NEXT: blr +; P8BE: xxmrghd +; P8BE-NEXT: xvcvdpsxds +; P8BE-NEXT: blr +; P8LE: xxmrghd +; P8LE-NEXT: xvcvdpsxds +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @fromDiffConstsConvdtoll() { +entry: + ret <2 x i64> +; P9BE-LABEL: fromDiffConstsConvdtoll +; P9LE-LABEL: fromDiffConstsConvdtoll +; P8BE-LABEL: fromDiffConstsConvdtoll +; P8LE-LABEL: fromDiffConstsConvdtoll +; P9BE: lxvx +; P9BE: blr +; P9LE: lxvx +; P9LE: blr +; P8BE: lxvd2x +; P8BE: blr +; P8LE: lxvd2x +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @fromDiffMemConsAConvdtoll(double* nocapture readonly %ptr) { +entry: + %0 = bitcast double* %ptr to <2 x double>* + %1 = load <2 x double>, <2 x double>* %0, align 8 + %2 = fptosi <2 x double> %1 to <2 x i64> + ret <2 x i64> %2 +; P9BE-LABEL: fromDiffMemConsAConvdtoll +; P9LE-LABEL: fromDiffMemConsAConvdtoll +; P8BE-LABEL: fromDiffMemConsAConvdtoll +; P8LE-LABEL: fromDiffMemConsAConvdtoll +; P9BE: lxvx +; P9BE-NEXT: xvcvdpsxds v2 +; P9BE-NEXT: blr +; P9LE: lxvx +; P9LE-NEXT: xvcvdpsxds v2 +; P9LE-NEXT: blr +; P8BE: lxvd2x +; P8BE-NEXT: xvcvdpsxds v2 +; P8BE-NEXT: blr +; P8LE: lxvd2x +; P8LE: xxswapd +; P8LE-NEXT: xvcvdpsxds v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @fromDiffMemConsDConvdtoll(double* nocapture readonly %ptr) { +entry: + %arrayidx = getelementptr inbounds double, double* %ptr, i64 3 + %0 = load double, double* %arrayidx, align 8 + %conv = fptosi double %0 to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %arrayidx1 = getelementptr inbounds double, double* %ptr, i64 2 + %1 = load double, double* %arrayidx1, align 8 + %conv2 = fptosi double %1 to i64 + %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 + ret <2 x i64> %vecinit3 +; P9BE-LABEL: fromDiffMemConsDConvdtoll +; P9LE-LABEL: fromDiffMemConsDConvdtoll +; P8BE-LABEL: fromDiffMemConsDConvdtoll +; P8LE-LABEL: fromDiffMemConsDConvdtoll +; P9BE: lxvx +; P9BE-NEXT: xxswapd +; P9BE-NEXT: xvcvdpsxds v2 +; P9BE-NEXT: blr +; P9LE: lxvx +; P9LE-NEXT: xxswapd +; P9LE-NEXT: xvcvdpsxds v2 +; P9LE-NEXT: blr +; P8BE: lxvd2x +; P8BE-NEXT: xxswapd +; P8BE-NEXT: xvcvdpsxds v2 +; P8BE-NEXT: blr +; P8LE: lxvd2x +; P8LE-NEXT: xvcvdpsxds v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @fromDiffMemVarAConvdtoll(double* nocapture readonly %arr, i32 signext %elem) { +entry: + %idxprom = sext i32 %elem to i64 + %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom + %0 = load double, double* %arrayidx, align 8 + %conv = fptosi double %0 to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %add = add nsw i32 %elem, 1 + %idxprom1 = sext i32 %add to i64 + %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1 + %1 = load double, double* %arrayidx2, align 8 + %conv3 = fptosi double %1 to i64 + %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1 + ret <2 x i64> %vecinit4 +; P9BE-LABEL: fromDiffMemVarAConvdtoll +; P9LE-LABEL: fromDiffMemVarAConvdtoll +; P8BE-LABEL: fromDiffMemVarAConvdtoll +; P8LE-LABEL: fromDiffMemVarAConvdtoll +; P9BE: sldi +; P9BE: lxvx +; P9BE-NEXT: xvcvdpsxds v2 +; P9BE-NEXT: blr +; P9LE: sldi +; P9LE: lxvx +; P9LE-NEXT: xvcvdpsxds v2 +; P9LE-NEXT: blr +; P8BE: sldi +; P8BE: lxvd2x +; P8BE-NEXT: xvcvdpsxds v2 +; P8BE-NEXT: blr +; P8LE: sldi +; P8LE: lxvd2x +; P8LE-NEXT: xxswapd +; P8LE-NEXT: xvcvdpsxds v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @fromDiffMemVarDConvdtoll(double* nocapture readonly %arr, i32 signext %elem) { +entry: + %idxprom = sext i32 %elem to i64 + %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom + %0 = load double, double* %arrayidx, align 8 + %conv = fptosi double %0 to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %sub = add nsw i32 %elem, -1 + %idxprom1 = sext i32 %sub to i64 + %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1 + %1 = load double, double* %arrayidx2, align 8 + %conv3 = fptosi double %1 to i64 + %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1 + ret <2 x i64> %vecinit4 +; P9BE-LABEL: fromDiffMemVarDConvdtoll +; P9LE-LABEL: fromDiffMemVarDConvdtoll +; P8BE-LABEL: fromDiffMemVarDConvdtoll +; P8LE-LABEL: fromDiffMemVarDConvdtoll +; P9BE: sldi +; P9BE: lxvx +; P9BE-NEXT: xxswapd +; P9BE-NEXT: xvcvdpsxds v2 +; P9BE-NEXT: blr +; P9LE: sldi +; P9LE: lxvx +; P9LE-NEXT: xxswapd +; P9LE-NEXT: xvcvdpsxds v2 +; P9LE-NEXT: blr +; P8BE: sldi +; P8BE: lxvd2x +; P8BE-NEXT: xxswapd +; P8BE-NEXT: xvcvdpsxds v2 +; P8BE-NEXT: blr +; P8LE: sldi +; P8LE: lxvd2x +; P8LE-NEXT: xvcvdpsxds v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @spltRegValConvdtoll(double %val) { +entry: + %conv = fptosi double %val to i64 + %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 + %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %splat.splat +; P9BE-LABEL: spltRegValConvdtoll +; P9LE-LABEL: spltRegValConvdtoll +; P8BE-LABEL: spltRegValConvdtoll +; P8LE-LABEL: spltRegValConvdtoll +; P9BE: xscvdpsxds +; P9BE-NEXT: xxspltd v2 +; P9BE-NEXT: blr +; P9LE: xscvdpsxds +; P9LE-NEXT: xxspltd v2 +; P9LE-NEXT: blr +; P8BE: xscvdpsxds +; P8BE-NEXT: xxspltd v2 +; P8BE-NEXT: blr +; P8LE: xscvdpsxds +; P8LE-NEXT: xxspltd v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @spltMemValConvdtoll(double* nocapture readonly %ptr) { +entry: + %0 = load double, double* %ptr, align 8 + %conv = fptosi double %0 to i64 + %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 + %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %splat.splat +; P9BE-LABEL: spltMemValConvdtoll +; P9LE-LABEL: spltMemValConvdtoll +; P8BE-LABEL: spltMemValConvdtoll +; P8LE-LABEL: spltMemValConvdtoll +; P9BE: lxvdsx +; P9BE-NEXT: xvcvdpsxds +; P9BE-NEXT: blr +; P9LE: lxvdsx +; P9LE-NEXT: xvcvdpsxds +; P9LE-NEXT: blr +; P8BE: lxvdsx +; P8BE-NEXT: xvcvdpsxds +; P8BE-NEXT: blr +; P8LE: lxvdsx +; P8LE-NEXT: xvcvdpsxds +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @allZeroull() { +entry: + ret <2 x i64> zeroinitializer +; P9BE-LABEL: allZeroull +; P9LE-LABEL: allZeroull +; P8BE-LABEL: allZeroull +; P8LE-LABEL: allZeroull +; P9BE: xxlxor v2, v2, v2 +; P9BE: blr +; P9LE: xxlxor v2, v2, v2 +; P9LE: blr +; P8BE: xxlxor v2, v2, v2 +; P8BE: blr +; P8LE: xxlxor v2, v2, v2 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @allOneull() { +entry: + ret <2 x i64> +; P9BE-LABEL: allOneull +; P9LE-LABEL: allOneull +; P8BE-LABEL: allOneull +; P8LE-LABEL: allOneull +; P9BE: xxspltib v2, 255 +; P9BE: blr +; P9LE: xxspltib v2, 255 +; P9LE: blr +; P8BE: vspltisb v2, -1 +; P8BE: blr +; P8LE: vspltisb v2, -1 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @spltConst1ull() { +entry: + ret <2 x i64> +; P9BE-LABEL: spltConst1ull +; P9LE-LABEL: spltConst1ull +; P8BE-LABEL: spltConst1ull +; P8LE-LABEL: spltConst1ull +; P9BE: lxvx +; P9BE: blr +; P9LE: lxvx +; P9LE: blr +; P8BE: lxvd2x +; P8BE: blr +; P8LE: lxvd2x +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @spltConst16kull() { +entry: + ret <2 x i64> +; P9BE-LABEL: spltConst16kull +; P9LE-LABEL: spltConst16kull +; P8BE-LABEL: spltConst16kull +; P8LE-LABEL: spltConst16kull +; P9BE: lxvx +; P9BE: blr +; P9LE: lxvx +; P9LE: blr +; P8BE: lxvd2x +; P8BE: blr +; P8LE: lxvd2x +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @spltConst32kull() { +entry: + ret <2 x i64> +; P9BE-LABEL: spltConst32kull +; P9LE-LABEL: spltConst32kull +; P8BE-LABEL: spltConst32kull +; P8LE-LABEL: spltConst32kull +; P9BE: lxvx +; P9BE: blr +; P9LE: lxvx +; P9LE: blr +; P8BE: lxvd2x +; P8BE: blr +; P8LE: lxvd2x +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @fromRegsull(i64 %a, i64 %b) { +entry: + %vecinit = insertelement <2 x i64> undef, i64 %a, i32 0 + %vecinit1 = insertelement <2 x i64> %vecinit, i64 %b, i32 1 + ret <2 x i64> %vecinit1 +; P9BE-LABEL: fromRegsull +; P9LE-LABEL: fromRegsull +; P8BE-LABEL: fromRegsull +; P8LE-LABEL: fromRegsull +; P9BE: mtvsrdd v2, r3, r4 +; P9BE: blr +; P9LE: mtvsrdd v2, r4, r3 +; P9LE: blr +; P8BE-DAG: mtvsrd {{[vsf0-9]+}}, r3 +; P8BE-DAG: mtvsrd {{[vsf0-9]+}}, r4 +; P8BE: xxmrghd v2 +; P8BE: blr +; P8LE-DAG: mtvsrd {{[vsf0-9]+}}, r3 +; P8LE-DAG: mtvsrd {{[vsf0-9]+}}, r4 +; P8LE: xxmrghd v2 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @fromDiffConstsull() { +entry: + ret <2 x i64> +; P9BE-LABEL: fromDiffConstsull +; P9LE-LABEL: fromDiffConstsull +; P8BE-LABEL: fromDiffConstsull +; P8LE-LABEL: fromDiffConstsull +; P9BE: lxvx +; P9BE: blr +; P9LE: lxvx +; P9LE: blr +; P8BE: lxvd2x +; P8BE: blr +; P8LE: lxvd2x +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @fromDiffMemConsAull(i64* nocapture readonly %arr) { +entry: + %0 = load i64, i64* %arr, align 8 + %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0 + %arrayidx1 = getelementptr inbounds i64, i64* %arr, i64 1 + %1 = load i64, i64* %arrayidx1, align 8 + %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 + ret <2 x i64> %vecinit2 +; P9BE-LABEL: fromDiffMemConsAull +; P9LE-LABEL: fromDiffMemConsAull +; P8BE-LABEL: fromDiffMemConsAull +; P8LE-LABEL: fromDiffMemConsAull +; P9BE: lxvx v2 +; P9BE: blr +; P9LE: lxvx v2 +; P9LE: blr +; P8BE: lxvd2x v2 +; P8BE: blr +; P8LE: lxvd2x +; P8LE: xxswapd v2 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @fromDiffMemConsDull(i64* nocapture readonly %arr) { +entry: + %arrayidx = getelementptr inbounds i64, i64* %arr, i64 3 + %0 = load i64, i64* %arrayidx, align 8 + %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0 + %arrayidx1 = getelementptr inbounds i64, i64* %arr, i64 2 + %1 = load i64, i64* %arrayidx1, align 8 + %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 + ret <2 x i64> %vecinit2 +; P9BE-LABEL: fromDiffMemConsDull +; P9LE-LABEL: fromDiffMemConsDull +; P8BE-LABEL: fromDiffMemConsDull +; P8LE-LABEL: fromDiffMemConsDull +; P9BE: lxvx v2 +; P9BE: blr +; P9LE: lxvx +; P9LE: xxswapd v2 +; P9LE: blr +; P8BE: lxvd2x +; P8BE: xxswapd v2 +; P8BE-NEXT: blr +; P8LE: lxvd2x v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @fromDiffMemVarAull(i64* nocapture readonly %arr, i32 signext %elem) { +entry: + %idxprom = sext i32 %elem to i64 + %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom + %0 = load i64, i64* %arrayidx, align 8 + %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0 + %add = add nsw i32 %elem, 1 + %idxprom1 = sext i32 %add to i64 + %arrayidx2 = getelementptr inbounds i64, i64* %arr, i64 %idxprom1 + %1 = load i64, i64* %arrayidx2, align 8 + %vecinit3 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 + ret <2 x i64> %vecinit3 +; P9BE-LABEL: fromDiffMemVarAull +; P9LE-LABEL: fromDiffMemVarAull +; P8BE-LABEL: fromDiffMemVarAull +; P8LE-LABEL: fromDiffMemVarAull +; P9BE: sldi +; P9BE: lxvx v2 +; P9BE-NEXT: blr +; P9LE: sldi +; P9LE: lxvx v2 +; P9LE-NEXT: blr +; P8BE: sldi +; P8BE: lxvd2x v2 +; P8BE-NEXT: blr +; P8LE: sldi +; P8LE: lxvd2x +; P8LE: xxswapd v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @fromDiffMemVarDull(i64* nocapture readonly %arr, i32 signext %elem) { +entry: + %idxprom = sext i32 %elem to i64 + %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom + %0 = load i64, i64* %arrayidx, align 8 + %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0 + %sub = add nsw i32 %elem, -1 + %idxprom1 = sext i32 %sub to i64 + %arrayidx2 = getelementptr inbounds i64, i64* %arr, i64 %idxprom1 + %1 = load i64, i64* %arrayidx2, align 8 + %vecinit3 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 + ret <2 x i64> %vecinit3 +; P9BE-LABEL: fromDiffMemVarDull +; P9LE-LABEL: fromDiffMemVarDull +; P8BE-LABEL: fromDiffMemVarDull +; P8LE-LABEL: fromDiffMemVarDull +; P9BE: sldi +; P9BE: lxvx +; P9BE: xxswapd v2 +; P9BE-NEXT: blr +; P9LE: sldi +; P9LE: lxvx +; P9LE: xxswapd v2 +; P9LE-NEXT: blr +; P8BE: sldi +; P8BE: lxvd2x +; P8BE: xxswapd v2 +; P8BE-NEXT: blr +; P8LE: sldi +; P8LE: lxvd2x v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @fromRandMemConsull(i64* nocapture readonly %arr) { +entry: + %arrayidx = getelementptr inbounds i64, i64* %arr, i64 4 + %0 = load i64, i64* %arrayidx, align 8 + %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0 + %arrayidx1 = getelementptr inbounds i64, i64* %arr, i64 18 + %1 = load i64, i64* %arrayidx1, align 8 + %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 + ret <2 x i64> %vecinit2 +; P9BE-LABEL: fromRandMemConsull +; P9LE-LABEL: fromRandMemConsull +; P8BE-LABEL: fromRandMemConsull +; P8LE-LABEL: fromRandMemConsull +; P9BE: ld +; P9BE: ld +; P9BE: mtvsrdd v2 +; P9BE-NEXT: blr +; P9LE: ld +; P9LE: ld +; P9LE: mtvsrdd v2 +; P9LE-NEXT: blr +; P8BE: ld +; P8BE: ld +; P8BE-DAG: mtvsrd +; P8BE-DAG: mtvsrd +; P8BE: xxmrghd v2 +; P8BE-NEXT: blr +; P8LE: ld +; P8LE: ld +; P8LE-DAG: mtvsrd +; P8LE-DAG: mtvsrd +; P8LE: xxmrghd v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @fromRandMemVarull(i64* nocapture readonly %arr, i32 signext %elem) { +entry: + %add = add nsw i32 %elem, 4 + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom + %0 = load i64, i64* %arrayidx, align 8 + %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0 + %add1 = add nsw i32 %elem, 1 + %idxprom2 = sext i32 %add1 to i64 + %arrayidx3 = getelementptr inbounds i64, i64* %arr, i64 %idxprom2 + %1 = load i64, i64* %arrayidx3, align 8 + %vecinit4 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 + ret <2 x i64> %vecinit4 +; P9BE-LABEL: fromRandMemVarull +; P9LE-LABEL: fromRandMemVarull +; P8BE-LABEL: fromRandMemVarull +; P8LE-LABEL: fromRandMemVarull +; P9BE: sldi +; P9BE: ld +; P9BE: ld +; P9BE: mtvsrdd v2 +; P9BE-NEXT: blr +; P9LE: sldi +; P9LE: ld +; P9LE: ld +; P9LE: mtvsrdd v2 +; P9LE-NEXT: blr +; P8BE: sldi +; P8BE: ld +; P8BE: ld +; P8BE: mtvsrd +; P8BE: mtvsrd +; P8BE: xxmrghd v2 +; P8BE-NEXT: blr +; P8LE: sldi +; P8LE: ld +; P8LE: ld +; P8LE: mtvsrd +; P8LE: mtvsrd +; P8LE: xxmrghd v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @spltRegValull(i64 %val) { +entry: + %splat.splatinsert = insertelement <2 x i64> undef, i64 %val, i32 0 + %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %splat.splat +; P9BE-LABEL: spltRegValull +; P9LE-LABEL: spltRegValull +; P8BE-LABEL: spltRegValull +; P8LE-LABEL: spltRegValull +; P9BE: mtvsrdd v2, r3, r3 +; P9BE-NEXT: blr +; P9LE: mtvsrdd v2, r3, r3 +; P9LE-NEXT: blr +; P8BE: mtvsrd {{[vsf]+}}[[REG1:[0-9]+]], r3 +; P8BE: xxspltd v2, {{[vsf]+}}[[REG1]], 0 +; P8BE-NEXT: blr +; P8LE: mtvsrd {{[vsf]+}}[[REG1:[0-9]+]], r3 +; P8LE: xxspltd v2, {{[vsf]+}}[[REG1]], 0 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @spltMemValull(i64* nocapture readonly %ptr) { +entry: + %0 = load i64, i64* %ptr, align 8 + %splat.splatinsert = insertelement <2 x i64> undef, i64 %0, i32 0 + %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %splat.splat +; P9BE-LABEL: spltMemValull +; P9LE-LABEL: spltMemValull +; P8BE-LABEL: spltMemValull +; P8LE-LABEL: spltMemValull +; P9BE: lxvdsx v2 +; P9BE-NEXT: blr +; P9LE: lxvdsx v2 +; P9LE-NEXT: blr +; P8BE: lxvdsx v2 +; P8BE-NEXT: blr +; P8LE: lxvdsx v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @spltCnstConvftoull() { +entry: + ret <2 x i64> +; P9BE-LABEL: spltCnstConvftoull +; P9LE-LABEL: spltCnstConvftoull +; P8BE-LABEL: spltCnstConvftoull +; P8LE-LABEL: spltCnstConvftoull +; P9BE: lxvx +; P9BE: blr +; P9LE: lxvx +; P9LE: blr +; P8BE: lxvd2x +; P8BE: blr +; P8LE: lxvd2x +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @fromRegsConvftoull(float %a, float %b) { +entry: + %conv = fptoui float %a to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %conv1 = fptoui float %b to i64 + %vecinit2 = insertelement <2 x i64> %vecinit, i64 %conv1, i32 1 + ret <2 x i64> %vecinit2 +; P9BE-LABEL: fromRegsConvftoull +; P9LE-LABEL: fromRegsConvftoull +; P8BE-LABEL: fromRegsConvftoull +; P8LE-LABEL: fromRegsConvftoull +; P9BE: xxmrghd +; P9BE: xvcvdpuxds v2 +; P9BE-NEXT: blr +; P9LE: xxmrghd +; P9LE: xvcvdpuxds v2 +; P9LE-NEXT: blr +; P8BE: xxmrghd +; P8BE: xvcvdpuxds v2 +; P8BE-NEXT: blr +; P8LE: xxmrghd +; P8LE: xvcvdpuxds v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @fromDiffConstsConvftoull() { +entry: + ret <2 x i64> +; P9BE-LABEL: fromDiffConstsConvftoull +; P9LE-LABEL: fromDiffConstsConvftoull +; P8BE-LABEL: fromDiffConstsConvftoull +; P8LE-LABEL: fromDiffConstsConvftoull +; P9BE: lxvx v2 +; P9BE: blr +; P9LE: lxvx v2 +; P9LE: blr +; P8BE: lxvd2x v2 +; P8BE: blr +; P8LE: lxvd2x +; P8LE: xxswapd v2 +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @fromDiffMemConsAConvftoull(float* nocapture readonly %ptr) { +entry: + %0 = load float, float* %ptr, align 4 + %conv = fptoui float %0 to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %arrayidx1 = getelementptr inbounds float, float* %ptr, i64 1 + %1 = load float, float* %arrayidx1, align 4 + %conv2 = fptoui float %1 to i64 + %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 + ret <2 x i64> %vecinit3 +; P9BE-LABEL: fromDiffMemConsAConvftoull +; P9LE-LABEL: fromDiffMemConsAConvftoull +; P8BE-LABEL: fromDiffMemConsAConvftoull +; P8LE-LABEL: fromDiffMemConsAConvftoull +; P9BE: lfs +; P9BE: lfs +; P9BE: xxmrghd +; P9BE-NEXT: xvcvdpuxds v2 +; P9BE-NEXT: blr +; P9LE: lfs +; P9LE: lfs +; P9LE: xxmrghd +; P9LE-NEXT: xvcvdpuxds v2 +; P9LE-NEXT: blr +; P8BE: lxsspx +; P8BE: lxsspx +; P8BE: xxmrghd +; P8BE-NEXT: xvcvdpuxds v2 +; P8BE-NEXT: blr +; P8LE: lxsspx +; P8LE: lxsspx +; P8LE: xxmrghd +; P8LE-NEXT: xvcvdpuxds v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @fromDiffMemConsDConvftoull(float* nocapture readonly %ptr) { +entry: + %arrayidx = getelementptr inbounds float, float* %ptr, i64 3 + %0 = load float, float* %arrayidx, align 4 + %conv = fptoui float %0 to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %arrayidx1 = getelementptr inbounds float, float* %ptr, i64 2 + %1 = load float, float* %arrayidx1, align 4 + %conv2 = fptoui float %1 to i64 + %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 + ret <2 x i64> %vecinit3 +; P9BE-LABEL: fromDiffMemConsDConvftoull +; P9LE-LABEL: fromDiffMemConsDConvftoull +; P8BE-LABEL: fromDiffMemConsDConvftoull +; P8LE-LABEL: fromDiffMemConsDConvftoull +; P9BE: lfs +; P9BE: lfs +; P9BE: xxmrghd +; P9BE-NEXT: xvcvdpuxds v2 +; P9BE-NEXT: blr +; P9LE: lfs +; P9LE: lfs +; P9LE: xxmrghd +; P9LE-NEXT: xvcvdpuxds v2 +; P9LE-NEXT: blr +; P8BE: lxsspx +; P8BE: lxsspx +; P8BE: xxmrghd +; P8BE-NEXT: xvcvdpuxds v2 +; P8BE-NEXT: blr +; P8LE: lxsspx +; P8LE: lxsspx +; P8LE: xxmrghd +; P8LE-NEXT: xvcvdpuxds v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @fromDiffMemVarAConvftoull(float* nocapture readonly %arr, i32 signext %elem) { +entry: + %idxprom = sext i32 %elem to i64 + %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom + %0 = load float, float* %arrayidx, align 4 + %conv = fptoui float %0 to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %add = add nsw i32 %elem, 1 + %idxprom1 = sext i32 %add to i64 + %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1 + %1 = load float, float* %arrayidx2, align 4 + %conv3 = fptoui float %1 to i64 + %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1 + ret <2 x i64> %vecinit4 +; P9BE-LABEL: fromDiffMemVarAConvftoull +; P9LE-LABEL: fromDiffMemVarAConvftoull +; P8BE-LABEL: fromDiffMemVarAConvftoull +; P8LE-LABEL: fromDiffMemVarAConvftoull +; P9BE: sldi +; P9BE: lfsux +; P9BE: lfs +; P9BE: xxmrghd +; P9BE-NEXT: xvcvdpuxds v2 +; P9BE-NEXT: blr +; P9LE: sldi +; P9LE: lfsux +; P9LE: lfs +; P9LE: xxmrghd +; P9LE-NEXT: xvcvdpuxds v2 +; P9LE-NEXT: blr +; P8BE: sldi +; P8BE: lfsux +; P8BE: lxsspx +; P8BE: xxmrghd +; P8BE-NEXT: xvcvdpuxds v2 +; P8BE-NEXT: blr +; P8LE: sldi +; P8LE: lfsux +; P8LE: lxsspx +; P8LE: xxmrghd +; P8LE-NEXT: xvcvdpuxds v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @fromDiffMemVarDConvftoull(float* nocapture readonly %arr, i32 signext %elem) { +entry: + %idxprom = sext i32 %elem to i64 + %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom + %0 = load float, float* %arrayidx, align 4 + %conv = fptoui float %0 to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %sub = add nsw i32 %elem, -1 + %idxprom1 = sext i32 %sub to i64 + %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1 + %1 = load float, float* %arrayidx2, align 4 + %conv3 = fptoui float %1 to i64 + %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1 + ret <2 x i64> %vecinit4 +; P9BE-LABEL: fromDiffMemVarDConvftoull +; P9LE-LABEL: fromDiffMemVarDConvftoull +; P8BE-LABEL: fromDiffMemVarDConvftoull +; P8LE-LABEL: fromDiffMemVarDConvftoull +; P9BE: sldi +; P9BE: lfsux +; P9BE: lfs +; P9BE: xxmrghd +; P9BE-NEXT: xvcvdpuxds v2 +; P9BE-NEXT: blr +; P9LE: sldi +; P9LE: lfsux +; P9LE: lfs +; P9LE: xxmrghd +; P9LE-NEXT: xvcvdpuxds v2 +; P9LE-NEXT: blr +; P8BE: sldi +; P8BE: lfsux +; P8BE: lxsspx +; P8BE: xxmrghd +; P8BE-NEXT: xvcvdpuxds v2 +; P8BE-NEXT: blr +; P8LE: sldi +; P8LE: lfsux +; P8LE: lxsspx +; P8LE: xxmrghd +; P8LE-NEXT: xvcvdpuxds v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @spltRegValConvftoull(float %val) { +entry: + %conv = fptoui float %val to i64 + %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 + %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %splat.splat +; P9BE-LABEL: spltRegValConvftoull +; P9LE-LABEL: spltRegValConvftoull +; P8BE-LABEL: spltRegValConvftoull +; P8LE-LABEL: spltRegValConvftoull +; P9BE: xscvdpuxds +; P9BE-NEXT: xxspltd v2 +; P9BE-NEXT: blr +; P9LE: xscvdpuxds +; P9LE-NEXT: xxspltd v2 +; P9LE-NEXT: blr +; P8BE: xscvdpuxds +; P8BE-NEXT: xxspltd v2 +; P8BE-NEXT: blr +; P8LE: xscvdpuxds +; P8LE-NEXT: xxspltd v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @spltMemValConvftoull(float* nocapture readonly %ptr) { +entry: + %0 = load float, float* %ptr, align 4 + %conv = fptoui float %0 to i64 + %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 + %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %splat.splat +; P9BE-LABEL: spltMemValConvftoull +; P9LE-LABEL: spltMemValConvftoull +; P8BE-LABEL: spltMemValConvftoull +; P8LE-LABEL: spltMemValConvftoull +; P9BE: lfs +; P9BE-NEXT: xscvdpuxds +; P9BE-NEXT: xxspltd v2 +; P9BE-NEXT: blr +; P9LE: lfs +; P9LE-NEXT: xscvdpuxds +; P9LE-NEXT: xxspltd v2 +; P9LE-NEXT: blr +; P8BE: lxsspx +; P8BE-NEXT: xscvdpuxds +; P8BE-NEXT: xxspltd v2 +; P8BE-NEXT: blr +; P8LE: lxsspx +; P8LE-NEXT: xscvdpuxds +; P8LE-NEXT: xxspltd v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @spltCnstConvdtoull() { +entry: + ret <2 x i64> +; P9BE-LABEL: spltCnstConvdtoull +; P9LE-LABEL: spltCnstConvdtoull +; P8BE-LABEL: spltCnstConvdtoull +; P8LE-LABEL: spltCnstConvdtoull +; P9BE: lxvx +; P9BE: blr +; P9LE: lxvx +; P9LE: blr +; P8BE: lxvd2x +; P8BE: blr +; P8LE: lxvd2x +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @fromRegsConvdtoull(double %a, double %b) { +entry: + %conv = fptoui double %a to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %conv1 = fptoui double %b to i64 + %vecinit2 = insertelement <2 x i64> %vecinit, i64 %conv1, i32 1 + ret <2 x i64> %vecinit2 +; P9BE-LABEL: fromRegsConvdtoull +; P9LE-LABEL: fromRegsConvdtoull +; P8BE-LABEL: fromRegsConvdtoull +; P8LE-LABEL: fromRegsConvdtoull +; P9BE: xxmrghd +; P9BE-NEXT: xvcvdpuxds +; P9BE-NEXT: blr +; P9LE: xxmrghd +; P9LE-NEXT: xvcvdpuxds +; P9LE-NEXT: blr +; P8BE: xxmrghd +; P8BE-NEXT: xvcvdpuxds +; P8BE-NEXT: blr +; P8LE: xxmrghd +; P8LE-NEXT: xvcvdpuxds +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @fromDiffConstsConvdtoull() { +entry: + ret <2 x i64> +; P9BE-LABEL: fromDiffConstsConvdtoull +; P9LE-LABEL: fromDiffConstsConvdtoull +; P8BE-LABEL: fromDiffConstsConvdtoull +; P8LE-LABEL: fromDiffConstsConvdtoull +; P9BE: lxvx +; P9BE: blr +; P9LE: lxvx +; P9LE: blr +; P8BE: lxvd2x +; P8BE: blr +; P8LE: lxvd2x +; P8LE: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @fromDiffMemConsAConvdtoull(double* nocapture readonly %ptr) { +entry: + %0 = bitcast double* %ptr to <2 x double>* + %1 = load <2 x double>, <2 x double>* %0, align 8 + %2 = fptoui <2 x double> %1 to <2 x i64> + ret <2 x i64> %2 +; P9BE-LABEL: fromDiffMemConsAConvdtoull +; P9LE-LABEL: fromDiffMemConsAConvdtoull +; P8BE-LABEL: fromDiffMemConsAConvdtoull +; P8LE-LABEL: fromDiffMemConsAConvdtoull +; P9BE: lxvx +; P9BE-NEXT: xvcvdpuxds v2 +; P9BE-NEXT: blr +; P9LE: lxvx +; P9LE-NEXT: xvcvdpuxds v2 +; P9LE-NEXT: blr +; P8BE: lxvd2x +; P8BE-NEXT: xvcvdpuxds v2 +; P8BE-NEXT: blr +; P8LE: lxvd2x +; P8LE: xxswapd +; P8LE-NEXT: xvcvdpuxds v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @fromDiffMemConsDConvdtoull(double* nocapture readonly %ptr) { +entry: + %arrayidx = getelementptr inbounds double, double* %ptr, i64 3 + %0 = load double, double* %arrayidx, align 8 + %conv = fptoui double %0 to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %arrayidx1 = getelementptr inbounds double, double* %ptr, i64 2 + %1 = load double, double* %arrayidx1, align 8 + %conv2 = fptoui double %1 to i64 + %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 + ret <2 x i64> %vecinit3 +; P9BE-LABEL: fromDiffMemConsDConvdtoull +; P9LE-LABEL: fromDiffMemConsDConvdtoull +; P8BE-LABEL: fromDiffMemConsDConvdtoull +; P8LE-LABEL: fromDiffMemConsDConvdtoull +; P9BE: lxvx +; P9BE-NEXT: xxswapd +; P9BE-NEXT: xvcvdpuxds v2 +; P9BE-NEXT: blr +; P9LE: lxvx +; P9LE-NEXT: xxswapd +; P9LE-NEXT: xvcvdpuxds v2 +; P9LE-NEXT: blr +; P8BE: lxvd2x +; P8BE-NEXT: xxswapd +; P8BE-NEXT: xvcvdpuxds v2 +; P8BE-NEXT: blr +; P8LE: lxvd2x +; P8LE-NEXT: xvcvdpuxds v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @fromDiffMemVarAConvdtoull(double* nocapture readonly %arr, i32 signext %elem) { +entry: + %idxprom = sext i32 %elem to i64 + %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom + %0 = load double, double* %arrayidx, align 8 + %conv = fptoui double %0 to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %add = add nsw i32 %elem, 1 + %idxprom1 = sext i32 %add to i64 + %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1 + %1 = load double, double* %arrayidx2, align 8 + %conv3 = fptoui double %1 to i64 + %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1 + ret <2 x i64> %vecinit4 +; P9BE-LABEL: fromDiffMemVarAConvdtoull +; P9LE-LABEL: fromDiffMemVarAConvdtoull +; P8BE-LABEL: fromDiffMemVarAConvdtoull +; P8LE-LABEL: fromDiffMemVarAConvdtoull +; P9BE: sldi +; P9BE: lxvx +; P9BE-NEXT: xvcvdpuxds v2 +; P9BE-NEXT: blr +; P9LE: sldi +; P9LE: lxvx +; P9LE-NEXT: xvcvdpuxds v2 +; P9LE-NEXT: blr +; P8BE: sldi +; P8BE: lxvd2x +; P8BE-NEXT: xvcvdpuxds v2 +; P8BE-NEXT: blr +; P8LE: sldi +; P8LE: lxvd2x +; P8LE-NEXT: xxswapd +; P8LE-NEXT: xvcvdpuxds v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @fromDiffMemVarDConvdtoull(double* nocapture readonly %arr, i32 signext %elem) { +entry: + %idxprom = sext i32 %elem to i64 + %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom + %0 = load double, double* %arrayidx, align 8 + %conv = fptoui double %0 to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %sub = add nsw i32 %elem, -1 + %idxprom1 = sext i32 %sub to i64 + %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1 + %1 = load double, double* %arrayidx2, align 8 + %conv3 = fptoui double %1 to i64 + %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1 + ret <2 x i64> %vecinit4 +; P9BE-LABEL: fromDiffMemVarDConvdtoull +; P9LE-LABEL: fromDiffMemVarDConvdtoull +; P8BE-LABEL: fromDiffMemVarDConvdtoull +; P8LE-LABEL: fromDiffMemVarDConvdtoull +; P9BE: sldi +; P9BE: lxvx +; P9BE-NEXT: xxswapd +; P9BE-NEXT: xvcvdpuxds v2 +; P9BE-NEXT: blr +; P9LE: sldi +; P9LE: lxvx +; P9LE-NEXT: xxswapd +; P9LE-NEXT: xvcvdpuxds v2 +; P9LE-NEXT: blr +; P8BE: sldi +; P8BE: lxvd2x +; P8BE-NEXT: xxswapd +; P8BE-NEXT: xvcvdpuxds v2 +; P8BE-NEXT: blr +; P8LE: sldi +; P8LE: lxvd2x +; P8LE-NEXT: xvcvdpuxds v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readnone +define <2 x i64> @spltRegValConvdtoull(double %val) { +entry: + %conv = fptoui double %val to i64 + %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 + %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %splat.splat +; P9BE-LABEL: spltRegValConvdtoull +; P9LE-LABEL: spltRegValConvdtoull +; P8BE-LABEL: spltRegValConvdtoull +; P8LE-LABEL: spltRegValConvdtoull +; P9BE: xscvdpuxds +; P9BE-NEXT: xxspltd v2 +; P9BE-NEXT: blr +; P9LE: xscvdpuxds +; P9LE-NEXT: xxspltd v2 +; P9LE-NEXT: blr +; P8BE: xscvdpuxds +; P8BE-NEXT: xxspltd v2 +; P8BE-NEXT: blr +; P8LE: xscvdpuxds +; P8LE-NEXT: xxspltd v2 +; P8LE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @spltMemValConvdtoull(double* nocapture readonly %ptr) { +entry: + %0 = load double, double* %ptr, align 8 + %conv = fptoui double %0 to i64 + %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 + %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %splat.splat +; P9BE-LABEL: spltMemValConvdtoull +; P9LE-LABEL: spltMemValConvdtoull +; P8BE-LABEL: spltMemValConvdtoull +; P8LE-LABEL: spltMemValConvdtoull +; P9BE: lxvdsx +; P9BE-NEXT: xvcvdpuxds +; P9BE-NEXT: blr +; P9LE: lxvdsx +; P9LE-NEXT: xvcvdpuxds +; P9LE-NEXT: blr +; P8BE: lxvdsx +; P8BE-NEXT: xvcvdpuxds +; P8BE-NEXT: blr +; P8LE: lxvdsx +; P8LE-NEXT: xvcvdpuxds +; P8LE-NEXT: blr +} Index: test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll =================================================================== --- test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll +++ test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll @@ -46,10 +46,10 @@ %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %splat.splat -; CHECK: sldi [[REG1:[0-9]+]], 3, 32 -; CHECK: mtvsrd {{[0-9]+}}, [[REG1]] -; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3 -; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]] +; CHECK: mtvsrwz [[REG1:[0-9]+]], 3 +; CHECK: xxspltw 34, [[REG1]] +; CHECK-LE: mtvsrwz [[REG1:[0-9]+]], 3 +; CHECK-LE: xxspltw 34, [[REG1]] } ; Function Attrs: nounwind Index: test/CodeGen/PowerPC/power9-moves-and-splats.ll =================================================================== --- test/CodeGen/PowerPC/power9-moves-and-splats.ll +++ test/CodeGen/PowerPC/power9-moves-and-splats.ll @@ -10,15 +10,9 @@ ; The FIXME below is due to the lowering for BUILD_VECTOR needing a re-vamp ; which will happen in a subsequent patch. ; CHECK-LABEL: test1 -; FIXME: mtvsrdd 34, 4, 3 -; CHECK: mtvsrd {{[0-9]+}}, 3 -; CHECK: mtvsrd {{[0-9]+}}, 4 -; CHECK: xxmrgld +; CHECK: mtvsrdd 34, 4, 3 ; CHECK-BE-LABEL: test1 -; FIXME-BE: mtvsrdd 34, 3, 4 -; CHECK-BE: mtvsrd {{[0-9]+}}, 4 -; CHECK-BE: mtvsrd {{[0-9]+}}, 3 -; CHECK-BE: xxmrghd +; CHECK-BE: mtvsrdd 34, 3, 4 %vecins = insertelement <2 x i64> undef, i64 %a, i32 0 %vecins1 = insertelement <2 x i64> %vecins, i64 %b, i32 1 ret <2 x i64> %vecins1 Index: test/CodeGen/PowerPC/tail-dup-analyzable-fallthrough.ll =================================================================== --- test/CodeGen/PowerPC/tail-dup-analyzable-fallthrough.ll +++ test/CodeGen/PowerPC/tail-dup-analyzable-fallthrough.ll @@ -5,7 +5,7 @@ ; Check that the conditional return block of fmax_double3.exit was not ; duplicated into the if.then.i block ; CHECK: # %if.then.i -; CHECK: lxvd2x +; CHECK: xxlxor ; CHECK: stxvd2x ; CHECK-NOT: bclr ; CHECK: {{^}}.LBB{{[0-9_]+}}: Index: test/CodeGen/PowerPC/vsx.ll =================================================================== --- test/CodeGen/PowerPC/vsx.ll +++ test/CodeGen/PowerPC/vsx.ll @@ -1087,9 +1087,7 @@ ; CHECK-LE: mtvsrwa ; CHECK-LE: xscvsxddp ; CHECK-LE: xscvsxddp -; CHECK-LE: xxspltd -; CHECK-LE: xxspltd -; CHECK-LE: xxmrgld +; CHECK-LE: xxmrghd ; CHECK-LE: blr } @@ -1112,9 +1110,7 @@ ; CHECK-LE: mtvsrwa ; CHECK-LE: xscvsxddp ; CHECK-LE: xscvsxddp -; CHECK-LE: xxspltd -; CHECK-LE: xxspltd -; CHECK-LE: xxmrgld +; CHECK-LE: xxmrghd ; CHECK-LE: blr }