Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h @@ -71,6 +71,9 @@ /// unsigned integers with round toward zero. FCTIDUZ, FCTIWUZ, + /// Floating-point-to-interger conversion instructions + FP_TO_UINT_IN_VSR, FP_TO_SINT_IN_VSR, + /// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in /// VSFRC that is sign-extended from ByteWidth to a 64-byte integer. VEXTS, @@ -426,6 +429,9 @@ /// an xxswapd. STXVD2X, + /// Store scalar integers from VSR. + ST_VSR_SCAL_INT, + /// QBRC, CHAIN = QVLFSb CHAIN, Ptr /// The 4xf32 load used for v4i1 constants. QVLFSb, @@ -1063,6 +1069,7 @@ SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const; SDValue DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const; SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineStoreFPToInt(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const; Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1158,6 +1158,10 @@ case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ"; case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ"; + case PPCISD::FP_TO_UINT_IN_VSR: + return "PPCISD::FP_TO_UINT_IN_VSR,"; + case PPCISD::FP_TO_SINT_IN_VSR: + return "PPCISD::FP_TO_SINT_IN_VSR"; case PPCISD::FRE: return "PPCISD::FRE"; case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; case PPCISD::STFIWX: return "PPCISD::STFIWX"; @@ -1211,6 +1215,8 @@ case PPCISD::SExtVElems: return "PPCISD::SExtVElems"; case PPCISD::LXVD2X: return "PPCISD::LXVD2X"; case PPCISD::STXVD2X: return "PPCISD::STXVD2X"; + case PPCISD::ST_VSR_SCAL_INT: + return "PPCISD::ST_VSR_SCAL_INT"; case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; case PPCISD::BDNZ: return "PPCISD::BDNZ"; case PPCISD::BDZ: return "PPCISD::BDZ"; @@ -12224,6 +12230,64 @@ return Store; } +// Handle DAG combine for STORE (FP_TO_INT F). +SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N, + DAGCombinerInfo &DCI) const { + + SelectionDAG &DAG = DCI.DAG; + SDLoc dl(N); + unsigned Opcode = N->getOperand(1).getOpcode(); + + assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) + && "Not a FP_TO_INT Instruction!"); + + SDValue Val = N->getOperand(1).getOperand(0); + EVT Op1VT = N->getOperand(1).getValueType(); + EVT ResVT = Val.getValueType(); + + // Floating point types smaller than 32 bits are not legal on Power. + if (ResVT.getScalarSizeInBits() < 32) + return SDValue(); + + // Only perform combine for conversion to i64/i32 or power9 i16/i8. + bool ValidTypeForStoreFltAsInt = + (Op1VT == MVT::i32 || Op1VT == MVT::i64 || + (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8))); + + if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Altivec() || + cast(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt) + return SDValue(); + + // Extend f32 values to f64 + if (ResVT.getScalarSizeInBits() == 32) { + Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); + DCI.AddToWorklist(Val.getNode()); + } + + // Set signed or unsigned conversion opcode. + unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ? + PPCISD::FP_TO_SINT_IN_VSR : + PPCISD::FP_TO_UINT_IN_VSR; + + Val = DAG.getNode(ConvOpcode, + dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val); + DCI.AddToWorklist(Val.getNode()); + + // Set number of bytes being converted. + unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8; + SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2), + DAG.getIntPtrConstant(ByteSize, dl, false), + DAG.getValueType(Op1VT) }; + + Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl, + DAG.getVTList(MVT::Other), Ops, + cast(N)->getMemoryVT(), + cast(N)->getMemOperand()); + + DCI.AddToWorklist(Val.getNode()); + return Val; +} + SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -12263,60 +12327,22 @@ case ISD::UINT_TO_FP: return combineFPToIntToFP(N, DCI); case ISD::STORE: { - EVT Op1VT = N->getOperand(1).getValueType(); - bool ValidTypeForStoreFltAsInt = (Op1VT == MVT::i32) || - (Subtarget.hasP9Vector() && (Op1VT == MVT::i8 || Op1VT == MVT::i16)); - // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). - if (Subtarget.hasSTFIWX() && !cast(N)->isTruncatingStore() && - N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && - ValidTypeForStoreFltAsInt && - N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) { - SDValue Val = N->getOperand(1).getOperand(0); - if (Val.getValueType() == MVT::f32) { - Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); - DCI.AddToWorklist(Val.getNode()); - } - Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val); - DCI.AddToWorklist(Val.getNode()); - - if (Op1VT == MVT::i32) { - SDValue Ops[] = { - N->getOperand(0), Val, N->getOperand(2), - DAG.getValueType(N->getOperand(1).getValueType()) - }; - - Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, - DAG.getVTList(MVT::Other), Ops, - cast(N)->getMemoryVT(), - cast(N)->getMemOperand()); - } else { - unsigned WidthInBytes = - N->getOperand(1).getValueType() == MVT::i8 ? 1 : 2; - SDValue WidthConst = DAG.getIntPtrConstant(WidthInBytes, dl, false); - - SDValue Ops[] = { - N->getOperand(0), Val, N->getOperand(2), WidthConst, - DAG.getValueType(N->getOperand(1).getValueType()) - }; - Val = DAG.getMemIntrinsicNode(PPCISD::STXSIX, dl, - DAG.getVTList(MVT::Other), Ops, - cast(N)->getMemoryVT(), - cast(N)->getMemOperand()); - } + EVT Op1VT = N->getOperand(1).getValueType(); + unsigned Opcode = N->getOperand(1).getOpcode(); - DCI.AddToWorklist(Val.getNode()); - return Val; + if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) { + SDValue Val= combineStoreFPToInt(N, DCI); + if (Val) + return Val; } // Turn STORE (BSWAP) -> sthbrx/stwbrx. - if (cast(N)->isUnindexed() && - N->getOperand(1).getOpcode() == ISD::BSWAP && + if (cast(N)->isUnindexed() && Opcode == ISD::BSWAP && N->getOperand(1).getNode()->hasOneUse() && - (N->getOperand(1).getValueType() == MVT::i32 || - N->getOperand(1).getValueType() == MVT::i16 || - (Subtarget.hasLDBRX() && Subtarget.isPPC64() && - N->getOperand(1).getValueType() == MVT::i64))) { + (Op1VT == MVT::i32 || Op1VT == MVT::i16 || + (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) { + // STBRX can only handle simple types. EVT mVT = cast(N)->getMemoryVT(); if (mVT.isExtended()) @@ -12349,9 +12375,8 @@ // STORE Constant:i32<0> -> STORE Constant:i64<0> // So it can increase the chance of CSE constant construction. - EVT VT = N->getOperand(1).getValueType(); if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() && - isa(N->getOperand(1)) && VT == MVT::i32) { + isa(N->getOperand(1)) && Op1VT == MVT::i32) { // Need to sign-extended to 64-bits to handle negative values. EVT MemVT = cast(N)->getMemoryVT(); uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1), @@ -12369,8 +12394,8 @@ // For little endian, VSX stores require generating xxswapd/lxvd2x. // Not needed on ISA 3.0 based CPUs since we have a non-permuting store. - if (VT.isSimple()) { - MVT StoreVT = VT.getSimpleVT(); + if (Op1VT.isSimple()) { + MVT StoreVT = Op1VT.getSimpleVT(); if (Subtarget.needsSwapsForVSXMemOps() && (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 || StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32)) Index: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td +++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td @@ -29,6 +29,12 @@ def SDT_PPCstxsix : SDTypeProfile<0, 3, [ SDTCisVT<0, f64>, SDTCisPtrTy<1>, SDTCisPtrTy<2> ]>; +def SDT_PPCcv_fp_to_int : SDTypeProfile<1, 1, [ + SDTCisFP<0>, SDTCisFP<1> + ]>; +def SDT_PPCstore_scal_int_from_vsr : SDTypeProfile<0, 3, [ + SDTCisVT<0, f64>, SDTCisPtrTy<1>, SDTCisPtrTy<2> +]>; def SDT_PPCVexts : SDTypeProfile<1, 2, [ SDTCisVT<0, f64>, SDTCisVT<1, f64>, SDTCisPtrTy<2> ]>; @@ -123,6 +129,14 @@ def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>; def PPCfctiduz: SDNode<"PPCISD::FCTIDUZ",SDTFPUnaryOp, []>; def PPCfctiwuz: SDNode<"PPCISD::FCTIWUZ",SDTFPUnaryOp, []>; + +def PPCcv_fp_to_uint_in_vsr: + SDNode<"PPCISD::FP_TO_UINT_IN_VSR", SDT_PPCcv_fp_to_int, []>; +def PPCcv_fp_to_sint_in_vsr: + SDNode<"PPCISD::FP_TO_SINT_IN_VSR", SDT_PPCcv_fp_to_int, []>; +def PPCstore_scal_int_from_vsr: + SDNode<"PPCISD::ST_VSR_SCAL_INT", SDT_PPCstore_scal_int_from_vsr, + [SDNPHasChain, SDNPMayStore]>; def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx, [SDNPHasChain, SDNPMayStore]>; def PPClfiwax : SDNode<"PPCISD::LFIWAX", SDT_PPClfiwx, @@ -1998,7 +2012,7 @@ "stwx $rS, $dst", IIC_LdStStore, [(store i32:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; - + def STHBRX: XForm_8_memOp<31, 918, (outs), (ins gprc:$rS, memrr:$dst), "sthbrx $rS, $dst", IIC_LdStStore, [(PPCstbrx i32:$rS, xoaddr:$dst, i16)]>, @@ -2011,7 +2025,7 @@ def STFIWX: XForm_28_memOp<31, 983, (outs), (ins f8rc:$frS, memrr:$dst), "stfiwx $frS, $dst", IIC_LdStSTFD, [(PPCstfiwx f64:$frS, xoaddr:$dst)]>; - + def STFSX : XForm_28_memOp<31, 663, (outs), (ins f4rc:$frS, memrr:$dst), "stfsx $frS, $dst", IIC_LdStSTFD, [(store f32:$frS, xaddr:$dst)]>; Index: llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td +++ llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td @@ -1200,6 +1200,7 @@ */ def HasP8Vector : Predicate<"PPCSubTarget->hasP8Vector()">; def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">; +def NoP9Vector : Predicate<"!PPCSubTarget->hasP9Vector()">; let Predicates = [HasP8Vector] in { let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. let isCommutable = 1, UseVSXReg = 1 in { @@ -1464,6 +1465,25 @@ } def : Pat<(v4i32 (scalar_to_vector ScalarLoads.Li32)), (v4i32 (XXSPLTWs (LIWAX xoaddr:$src), 1))>; + + // Instructions for converting float to i64 feeding a store. + let Predicates = [NoP9Vector] in { + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 8), + (STXSDX (XSCVDPSXDS f64:$src), xoaddr:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 8), + (STXSDX (XSCVDPUXDS f64:$src), xoaddr:$dst)>; + } + + // Instructions for converting float to i32 feeding a store. + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 4), + (STIWX (XSCVDPSXWS f64:$src), xoaddr:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 4), + (STIWX (XSCVDPUXWS f64:$src), xoaddr:$dst)>; + } // AddedComplexity = 400 } // HasP8Vector @@ -3150,8 +3170,36 @@ def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi8)), (f128 (XSCVUDQP (LXSIBZX xoaddr:$src)))>; -} // end HasP9Vector, AddedComplexity + // Instructions for fptosint (i64,i16,i8) feeding a store. + // The 8-byte version is repeated here due to availability of D-Form STXSD. + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xaddr:$dst, 8), + (STXSDX (XSCVDPSXDS f64:$src), xaddr:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), ixaddr:$dst, 8), + (STXSD (XSCVDPSXDS f64:$src), ixaddr:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 2), + (STXSIHX (XSCVDPSXWS f64:$src), xoaddr:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 1), + (STXSIBX (XSCVDPSXWS f64:$src), xoaddr:$dst)>; + + // Instructions for fptouint (i64,i16,i8) feeding a store. + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xaddr:$dst, 8), + (STXSDX (XSCVDPUXDS f64:$src), xaddr:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), ixaddr:$dst, 8), + (STXSD (XSCVDPUXDS f64:$src), ixaddr:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 2), + (STXSIHX (XSCVDPUXWS f64:$src), xoaddr:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 1), + (STXSIBX (XSCVDPUXWS f64:$src), xoaddr:$dst)>; +} // end HasP9Vector, AddedComplexity let Predicates = [HasP9Vector] in { let isPseudo = 1 in { let mayStore = 1 in { @@ -3305,7 +3353,6 @@ } // Patterns for BUILD_VECTOR nodes. -def NoP9Vector : Predicate<"!PPCSubTarget->hasP9Vector()">; let AddedComplexity = 400 in { let Predicates = [HasVSX] in { Index: llvm/trunk/test/CodeGen/PowerPC/store_fptoi.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/store_fptoi.ll +++ llvm/trunk/test/CodeGen/PowerPC/store_fptoi.ll @@ -0,0 +1,772 @@ +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -ppc-vsr-nums-as-vr \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown -ppc-vsr-nums-as-vr \ +; RUN: -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK-PWR8 %s + +; ========================================== +; Tests for store of fp_to_sint converstions +; ========================================== + +; Function Attrs: norecurse nounwind +define void @dpConv2sdw(double* nocapture readonly %a, i64* nocapture %b) { +entry: + %0 = load double, double* %a, align 8 + %conv = fptosi double %0 to i64 + store i64 %conv, i64* %b, align 8 + ret void + +; CHECK-LABEL: dpConv2sdw +; CHECK: lfd [[LD:[0-9]+]], 0(3) +; CHECK: xscvdpsxds [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stxsd [[CONV]], 0(4) +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: dpConv2sdw +; CHECK-PWR8: lxsdx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxds [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: stxsdx [[CONV]], 0, 4 +; CHECK-PWR8-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @dpConv2sw(double* nocapture readonly %a, i32* nocapture %b) { +entry: + %0 = load double, double* %a, align 8 + %conv = fptosi double %0 to i32 + store i32 %conv, i32* %b, align 4 + ret void + +; CHECK-LABEL: dpConv2sw +; CHECK: lfd [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stfiwx [[CONV]], 0, 4 +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: dpConv2sw +; CHECK-PWR8: lxsdx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: stfiwx [[CONV]], 0, 4 +; CHECK-PWR8-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @dpConv2shw(double* nocapture readonly %a, i16* nocapture %b) { +entry: + %0 = load double, double* %a, align 8 + %conv = fptosi double %0 to i16 + store i16 %conv, i16* %b, align 2 + ret void + +; CHECK-LABEL: dpConv2shw +; CHECK: lfd [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stxsihx [[CONV]], 0, 4 +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: dpConv2shw +; CHECK-PWR8: lxsdx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: mfvsrwz [[REG:[0-9]+]], [[CONV]] +; CHECK-PWR8-NEXT: sth [[REG]], 0(4) +; CHECK-PWR8-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @dpConv2sb(double* nocapture readonly %a, i8* nocapture %b) { +entry: + %0 = load double, double* %a, align 8 + %conv = fptosi double %0 to i8 + store i8 %conv, i8* %b, align 1 + ret void + +; CHECK-LABEL: dpConv2sb +; CHECK: lfd [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stxsibx [[CONV]], 0, 4 +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: dpConv2sb +; CHECK-PWR8: lxsdx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: mfvsrwz [[REG:[0-9]+]], [[CONV]] +; CHECK-PWR8-NEXT: stb [[REG]], 0(4) +; CHECK-PWR8-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @spConv2sdw(float* nocapture readonly %a, i64* nocapture %b) { +entry: + %0 = load float, float* %a, align 4 + %conv = fptosi float %0 to i64 + store i64 %conv, i64* %b, align 8 + ret void + +; CHECK-LABEL: spConv2sdw +; CHECK: lfs [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: xscvdpsxds [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stxsd [[CONV]], 0(4) +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: spConv2sdw +; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxds [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: stxsdx [[CONV]], 0, 4 +; CHECK-PWR8-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @spConv2sw(float* nocapture readonly %a, i32* nocapture %b) { +entry: + %0 = load float, float* %a, align 4 + %conv = fptosi float %0 to i32 + store i32 %conv, i32* %b, align 4 + ret void + +; CHECK-LABEL: spConv2sw +; CHECK: lfs [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stfiwx [[CONV]], 0, 4 +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: spConv2sw +; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: stfiwx [[CONV]], 0, 4 +; CHECK-PWR8-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @spConv2shw(float* nocapture readonly %a, i16* nocapture %b) { +entry: + %0 = load float, float* %a, align 4 + %conv = fptosi float %0 to i16 + store i16 %conv, i16* %b, align 2 + ret void + +; CHECK-LABEL: spConv2shw +; CHECK: lfs [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stxsihx [[CONV]], 0, 4 +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: spConv2shw +; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: mfvsrwz [[REG:[0-9]+]], [[CONV]] +; CHECK-PWR8-NEXT: sth [[REG]], 0(4) +; CHECK-PWR8-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @spConv2sb(float* nocapture readonly %a, i8* nocapture %b) { +entry: + %0 = load float, float* %a, align 4 + %conv = fptosi float %0 to i8 + store i8 %conv, i8* %b, align 1 + ret void + +; CHECK-LABEL: spConv2sb +; CHECK: lfs [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stxsibx [[CONV]], 0, 4 +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: spConv2sb +; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: mfvsrwz [[REG:[0-9]+]], [[CONV]] +; CHECK-PWR8-NEXT: stb [[REG]], 0(4) +; CHECK-PWR8-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @dpConv2sdw_x(double* nocapture readonly %a, i64* nocapture %b, + i32 signext %idx) { +entry: + %0 = load double, double* %a, align 8 + %conv = fptosi double %0 to i64 + %idxprom = sext i32 %idx to i64 + %arrayidx = getelementptr inbounds i64, i64* %b, i64 %idxprom + store i64 %conv, i64* %arrayidx, align 8 + ret void + +; CHECK-LABEL: dpConv2sdw_x +; CHECK: lfd [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: sldi [[REG:[0-9]+]], 5, 3 +; CHECK-NEXT: xscvdpsxds [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stxsdx [[CONV]], 4, [[REG]] +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: dpConv2sdw_x +; CHECK-PWR8: lxsdx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8: sldi [[REG:[0-9]+]], 5, 3 +; CHECK-PWR8-NEXT: xscvdpsxds [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: stxsdx [[CONV]], 4, [[REG]] +; CHECK-PWR8-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @dpConv2sw_x(double* nocapture readonly %a, i32* nocapture %b, + i32 signext %idx) { +entry: + %0 = load double, double* %a, align 8 + %conv = fptosi double %0 to i32 + %idxprom = sext i32 %idx to i64 + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %idxprom + store i32 %conv, i32* %arrayidx, align 4 + ret void + +; CHECK-LABEL: dpConv2sw_x +; CHECK: lfd [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: sldi [[REG:[0-9]+]], 5, 2 +; CHECK-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stfiwx [[CONV]], 4, [[REG]] +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: dpConv2sw_x +; CHECK-PWR8: lxsdx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8-NEXT: sldi [[REG:[0-9]+]], 5, 2 +; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: stfiwx [[CONV]], 4, [[REG]] +; CHECK-PWR8-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @dpConv2shw_x(double* nocapture readonly %a, i16* nocapture %b, + i32 signext %idx) { +entry: + %0 = load double, double* %a, align 8 + %conv = fptosi double %0 to i16 + %idxprom = sext i32 %idx to i64 + %arrayidx = getelementptr inbounds i16, i16* %b, i64 %idxprom + store i16 %conv, i16* %arrayidx, align 2 + ret void + +; CHECK-LABEL: dpConv2shw_x +; CHECK: lfd [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: sldi [[REG:[0-9]+]], 5, 1 +; CHECK-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stxsihx [[CONV]], 4, [[REG]] +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: dpConv2shw_x +; CHECK-PWR8: lxsdx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8-NEXT: sldi [[REG:[0-9]+]], 5, 1 +; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: mfvsrwz [[REG:[0-9]+]], [[CONV]] +; CHECK-PWR8-NEXT: sthx [[REG]], 4, 5 +; CHECK-PWR8-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @dpConv2sb_x(double* nocapture readonly %a, i8* nocapture %b, + i32 signext %idx) { +entry: + %0 = load double, double* %a, align 8 + %conv = fptosi double %0 to i8 + %idxprom = sext i32 %idx to i64 + %arrayidx = getelementptr inbounds i8, i8* %b, i64 %idxprom + store i8 %conv, i8* %arrayidx, align 1 + ret void + +; CHECK-LABEL: dpConv2sb_x +; CHECK: lfd [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stxsibx [[CONV]], 4, 5 +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: dpConv2sb_x +; CHECK-PWR8: lxsdx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: mfvsrwz [[REG:[0-9]+]], [[CONV]] +; CHECK-PWR8-NEXT: stbx [[REG]], 4, 5 +; CHECK-PWR8-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @spConv2sdw_x(float* nocapture readonly %a, i64* nocapture %b, + i32 signext %idx) { +entry: + %0 = load float, float* %a, align 4 + %conv = fptosi float %0 to i64 + %idxprom = sext i32 %idx to i64 + %arrayidx = getelementptr inbounds i64, i64* %b, i64 %idxprom + store i64 %conv, i64* %arrayidx, align 8 + ret void + +; CHECK-LABEL: spConv2sdw_x +; CHECK: lfs [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: sldi [[REG:[0-9]+]], 5, 3 +; CHECK-NEXT: xscvdpsxds [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stxsdx [[CONV]], 4, [[REG]] +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: spConv2sdw_x +; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8-NEXT: sldi [[REG:[0-9]+]], 5, 3 +; CHECK-PWR8-NEXT: xscvdpsxds [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: stxsdx [[CONV]], 4, [[REG]] +; CHECK-PWR8-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @spConv2sw_x(float* nocapture readonly %a, i32* nocapture %b, + i32 signext %idx) { +entry: + %0 = load float, float* %a, align 4 + %conv = fptosi float %0 to i32 + %idxprom = sext i32 %idx to i64 + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %idxprom + store i32 %conv, i32* %arrayidx, align 4 + ret void + +; CHECK-LABEL: spConv2sw_x +; CHECK: lfs [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: sldi [[REG:[0-9]+]], 5, 2 +; CHECK-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stfiwx [[CONV]], 4, [[REG]] +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: spConv2sw_x +; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8-NEXT: sldi [[REG:[0-9]+]], 5, 2 +; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: stfiwx [[CONV]], 4, [[REG]] +; CHECK-PWR8-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @spConv2shw_x(float* nocapture readonly %a, i16* nocapture %b, + i32 signext %idx) { +entry: + %0 = load float, float* %a, align 4 + %conv = fptosi float %0 to i16 + %idxprom = sext i32 %idx to i64 + %arrayidx = getelementptr inbounds i16, i16* %b, i64 %idxprom + store i16 %conv, i16* %arrayidx, align 2 + ret void + +; CHECK-LABEL: spConv2shw_x +; CHECK: lfs [[LD:[0-9]+]], 0(3) +; CHECK: sldi [[REG:[0-9]+]], 5, 1 +; CHECK: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stxsihx [[CONV]], 4, [[REG]] +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: spConv2shw_x +; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8-NEXT: sldi [[REG:[0-9]+]], 5, 1 +; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: mfvsrwz [[REG2:[0-9]+]], [[CONV]] +; CHECK-PWR8-NEXT: sthx [[REG2]], 4, [[REG]] +; CHECK-PWR8-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @spConv2sb_x(float* nocapture readonly %a, i8* nocapture %b, + i32 signext %idx) { +entry: + %0 = load float, float* %a, align 4 + %conv = fptosi float %0 to i8 + %idxprom = sext i32 %idx to i64 + %arrayidx = getelementptr inbounds i8, i8* %b, i64 %idxprom + store i8 %conv, i8* %arrayidx, align 1 + ret void + +; CHECK-LABEL: spConv2sb_x +; CHECK: lfs [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stxsibx [[CONV]], 4, 5 +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: spConv2sb_x +; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: mfvsrwz [[REG:[0-9]+]], [[CONV]] +; CHECK-PWR8-NEXT: stbx [[REG]], 4, 5 +; CHECK-PWR8-NEXT: blr +} + +; ========================================== +; Tests for store of fp_to_uint converstions +; ========================================== + +; Function Attrs: norecurse nounwind +define void @dpConv2udw(double* nocapture readonly %a, i64* nocapture %b) { +entry: + %0 = load double, double* %a, align 8 + %conv = fptoui double %0 to i64 + store i64 %conv, i64* %b, align 8 + ret void + +; CHECK-LABEL: dpConv2udw +; CHECK: lfd [[LD:[0-9]+]], 0(3) +; CHECK: xscvdpuxds [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stxsd [[CONV]], 0(4) +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: dpConv2udw +; CHECK-PWR8: lxsdx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8-NEXT: xscvdpuxds [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: stxsdx [[CONV]], 0, 4 +; CHECK-PWR8-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @dpConv2uw(double* nocapture readonly %a, i32* nocapture %b) { +entry: + %0 = load double, double* %a, align 8 + %conv = fptoui double %0 to i32 + store i32 %conv, i32* %b, align 4 + ret void + +; CHECK-LABEL: dpConv2uw +; CHECK: lfd [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: xscvdpuxws [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stfiwx [[CONV]], 0, 4 +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: dpConv2uw +; CHECK-PWR8: lxsdx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8-NEXT: xscvdpuxws [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: stfiwx [[CONV]], 0, 4 +; CHECK-PWR8-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @dpConv2uhw(double* nocapture readonly %a, i16* nocapture %b) { +entry: + %0 = load double, double* %a, align 8 + %conv = fptoui double %0 to i16 + store i16 %conv, i16* %b, align 2 + ret void + +; CHECK-LABEL: dpConv2uhw +; CHECK: lfd [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: xscvdpuxws [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stxsihx [[CONV]], 0, 4 +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: dpConv2uhw +; CHECK-PWR8: lxsdx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: mfvsrwz [[REG:[0-9]+]], [[CONV]] +; CHECK-PWR8-NEXT: sth [[REG]], 0(4) +; CHECK-PWR8-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @dpConv2ub(double* nocapture readonly %a, i8* nocapture %b) { +entry: + %0 = load double, double* %a, align 8 + %conv = fptoui double %0 to i8 + store i8 %conv, i8* %b, align 1 + ret void + +; CHECK-LABEL: dpConv2ub +; CHECK: lfd [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: xscvdpuxws [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stxsibx [[CONV]], 0, 4 +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: dpConv2ub +; CHECK-PWR8: lxsdx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: mfvsrwz [[REG:[0-9]+]], [[CONV]] +; CHECK-PWR8-NEXT: stb [[REG]], 0(4) +; CHECK-PWR8-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @spConv2udw(float* nocapture readonly %a, i64* nocapture %b) { +entry: + %0 = load float, float* %a, align 4 + %conv = fptoui float %0 to i64 + store i64 %conv, i64* %b, align 8 + ret void + +; CHECK-LABEL: spConv2udw +; CHECK: lfs [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: xscvdpuxds [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stxsd [[CONV]], 0(4) +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: spConv2udw +; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8-NEXT: xscvdpuxds [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: stxsdx [[CONV]], 0, 4 +; CHECK-PWR8-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @spConv2uw(float* nocapture readonly %a, i32* nocapture %b) { +entry: + %0 = load float, float* %a, align 4 + %conv = fptoui float %0 to i32 + store i32 %conv, i32* %b, align 4 + ret void + +; CHECK-LABEL: spConv2uw +; CHECK: lfs [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: xscvdpuxws [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stfiwx [[CONV]], 0, 4 +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: spConv2uw +; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8-NEXT: xscvdpuxws [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: stfiwx [[CONV]], 0, 4 +; CHECK-PWR8-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @spConv2uhw(float* nocapture readonly %a, i16* nocapture %b) { +entry: + %0 = load float, float* %a, align 4 + %conv = fptoui float %0 to i16 + store i16 %conv, i16* %b, align 2 + ret void + +; CHECK-LABEL: spConv2uhw +; CHECK: lfs [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: xscvdpuxws [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stxsihx [[CONV]], 0, 4 +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: spConv2uhw +; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: mfvsrwz [[REG:[0-9]+]], [[CONV]] +; CHECK-PWR8-NEXT: sth [[REG]], 0(4) +; CHECK-PWR8-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @spConv2ub(float* nocapture readonly %a, i8* nocapture %b) { +entry: + %0 = load float, float* %a, align 4 + %conv = fptoui float %0 to i8 + store i8 %conv, i8* %b, align 1 + ret void + +; CHECK-LABEL: spConv2ub +; CHECK: lfs [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: xscvdpuxws [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stxsibx [[CONV]], 0, 4 +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: spConv2ub +; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: mfvsrwz [[REG:[0-9]+]], [[CONV]] +; CHECK-PWR8-NEXT: stb [[REG]], 0(4) +; CHECK-PWR8-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @dpConv2udw_x(double* nocapture readonly %a, i64* nocapture %b, + i32 zeroext %idx) { +entry: + %0 = load double, double* %a, align 8 + %conv = fptoui double %0 to i64 + %idxprom = zext i32 %idx to i64 + %arrayidx = getelementptr inbounds i64, i64* %b, i64 %idxprom + store i64 %conv, i64* %arrayidx, align 8 + ret void + +; CHECK-LABEL: dpConv2udw_x +; CHECK: lfd [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: sldi [[REG:[0-9]+]], 5, 3 +; CHECK-NEXT: xscvdpuxds [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stxsdx [[CONV]], 4, [[REG]] +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: dpConv2udw_x +; CHECK-PWR8: lxsdx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8: sldi [[REG:[0-9]+]], 5, 3 +; CHECK-PWR8-NEXT: xscvdpuxds [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: stxsdx [[CONV]], 4, [[REG]] +; CHECK-PWR8-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @dpConv2uw_x(double* nocapture readonly %a, i32* nocapture %b, + i32 zeroext %idx) { +entry: + %0 = load double, double* %a, align 8 + %conv = fptoui double %0 to i32 + %idxprom = zext i32 %idx to i64 + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %idxprom + store i32 %conv, i32* %arrayidx, align 4 + ret void + +; CHECK-LABEL: dpConv2uw_x +; CHECK: lfd [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: sldi [[REG:[0-9]+]], 5, 2 +; CHECK-NEXT: xscvdpuxws [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stfiwx [[CONV]], 4, [[REG]] +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: dpConv2uw_x +; CHECK-PWR8: lxsdx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8-NEXT: sldi [[REG:[0-9]+]], 5, 2 +; CHECK-PWR8-NEXT: xscvdpuxws [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: stfiwx [[CONV]], 4, [[REG]] +; CHECK-PWR8-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @dpConv2uhw_x(double* nocapture readonly %a, i16* nocapture %b, + i32 zeroext %idx) { +entry: + %0 = load double, double* %a, align 8 + %conv = fptoui double %0 to i16 + %idxprom = zext i32 %idx to i64 + %arrayidx = getelementptr inbounds i16, i16* %b, i64 %idxprom + store i16 %conv, i16* %arrayidx, align 2 + ret void + +; CHECK-LABEL: dpConv2uhw_x +; CHECK: lfd [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: sldi [[REG:[0-9]+]], 5, 1 +; CHECK-NEXT: xscvdpuxws [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stxsihx [[CONV]], 4, [[REG]] +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: dpConv2uhw_x +; CHECK-PWR8: lxsdx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8-NEXT: sldi [[REG:[0-9]+]], 5, 1 +; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: mfvsrwz [[REG:[0-9]+]], [[CONV]] +; CHECK-PWR8-NEXT: sthx [[REG]], 4, 5 +; CHECK-PWR8-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @dpConv2ub_x(double* nocapture readonly %a, i8* nocapture %b, + i32 zeroext %idx) { +entry: + %0 = load double, double* %a, align 8 + %conv = fptoui double %0 to i8 + %idxprom = zext i32 %idx to i64 + %arrayidx = getelementptr inbounds i8, i8* %b, i64 %idxprom + store i8 %conv, i8* %arrayidx, align 1 + ret void + +; CHECK-LABEL: dpConv2ub_x +; CHECK: lfd [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: xscvdpuxws [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stxsibx [[CONV]], 4, 5 +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: dpConv2ub_x +; CHECK-PWR8: lxsdx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: mfvsrwz [[REG:[0-9]+]], [[CONV]] +; CHECK-PWR8-NEXT: stbx [[REG]], 4, 5 +; CHECK-PWR8-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @spConv2udw_x(float* nocapture readonly %a, i64* nocapture %b, + i32 zeroext %idx) { +entry: + %0 = load float, float* %a, align 4 + %conv = fptoui float %0 to i64 + %idxprom = zext i32 %idx to i64 + %arrayidx = getelementptr inbounds i64, i64* %b, i64 %idxprom + store i64 %conv, i64* %arrayidx, align 8 + ret void + +; CHECK-LABEL: spConv2udw_x +; CHECK: lfs [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: sldi [[REG:[0-9]+]], 5, 3 +; CHECK-NEXT: xscvdpuxds [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stxsdx [[CONV]], 4, [[REG]] +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: spConv2udw_x +; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8-NEXT: sldi [[REG:[0-9]+]], 5, 3 +; CHECK-PWR8-NEXT: xscvdpuxds [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: stxsdx [[CONV]], 4, [[REG]] +; CHECK-PWR8-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @spConv2uw_x(float* nocapture readonly %a, i32* nocapture %b, + i32 zeroext %idx) { +entry: + %0 = load float, float* %a, align 4 + %conv = fptoui float %0 to i32 + %idxprom = zext i32 %idx to i64 + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %idxprom + store i32 %conv, i32* %arrayidx, align 4 + ret void + +; CHECK-LABEL: spConv2uw_x +; CHECK: lfs [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: sldi [[REG:[0-9]+]], 5, 2 +; CHECK-NEXT: xscvdpuxws [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stfiwx [[CONV]], 4, [[REG]] +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: spConv2uw_x +; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8-NEXT: sldi [[REG:[0-9]+]], 5, 2 +; CHECK-PWR8-NEXT: xscvdpuxws [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: stfiwx [[CONV]], 4, [[REG]] +; CHECK-PWR8-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @spConv2uhw_x(float* nocapture readonly %a, i16* nocapture %b, + i32 zeroext %idx) { +entry: + %0 = load float, float* %a, align 4 + %conv = fptoui float %0 to i16 + %idxprom = zext i32 %idx to i64 + %arrayidx = getelementptr inbounds i16, i16* %b, i64 %idxprom + store i16 %conv, i16* %arrayidx, align 2 + ret void + +; CHECK-LABEL: spConv2uhw_x +; CHECK: lfs [[LD:[0-9]+]], 0(3) +; CHECK: sldi [[REG:[0-9]+]], 5, 1 +; CHECK: xscvdpuxws [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stxsihx [[CONV]], 4, [[REG]] +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: spConv2uhw_x +; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8-NEXT: sldi [[REG:[0-9]+]], 5, 1 +; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: mfvsrwz [[REG2:[0-9]+]], [[CONV]] +; CHECK-PWR8-NEXT: sthx [[REG2]], 4, [[REG]] +; CHECK-PWR8-NEXT: blr +} + +; Function Attrs: norecurse nounwind +define void @spConv2ub_x(float* nocapture readonly %a, i8* nocapture %b, + i32 zeroext %idx) { +entry: + %0 = load float, float* %a, align 4 + %conv = fptoui float %0 to i8 + %idxprom = zext i32 %idx to i64 + %arrayidx = getelementptr inbounds i8, i8* %b, i64 %idxprom + store i8 %conv, i8* %arrayidx, align 1 + ret void + +; CHECK-LABEL: spConv2ub_x +; CHECK: lfs [[LD:[0-9]+]], 0(3) +; CHECK-NEXT: xscvdpuxws [[CONV:[0-9]+]], [[LD]] +; CHECK-NEXT: stxsibx [[CONV]], 4, 5 +; CHECK-NEXT: blr + +; CHECK-PWR8-LABEL: spConv2ub_x +; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] +; CHECK-PWR8-NEXT: mfvsrwz [[REG:[0-9]+]], [[CONV]] +; CHECK-PWR8-NEXT: stbx [[REG]], 4, 5 +; CHECK-PWR8-NEXT: blr +}