Index: lib/Target/PowerPC/PPC.td =================================================================== --- lib/Target/PowerPC/PPC.td +++ lib/Target/PowerPC/PPC.td @@ -122,6 +122,10 @@ def FeatureP8Vector : SubtargetFeature<"power8-vector", "HasP8Vector", "true", "Enable POWER8 vector instructions", [FeatureVSX, FeatureP8Altivec]>; +def FeatureDirectMove : + SubtargetFeature<"direct-move", "HasDirectMove", "true", + "Enable Power8 direct move instructions", + [FeatureVSX]>; def FeaturePartwordAtomic : SubtargetFeature<"partword-atomics", "HasPartwordAtomics", "true", "Enable l[bh]arx and st[bh]cx.">; @@ -164,7 +168,7 @@ DeprecatedMFTB, DeprecatedDST]; list Power8SpecificFeatures = [DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto, - FeatureHTM, FeatureICBT]; + FeatureHTM, FeatureDirectMove, FeatureICBT]; list Power8FeatureList = !listconcat(Power7FeatureList, Power8SpecificFeatures); } Index: lib/Target/PowerPC/PPCFastISel.cpp =================================================================== --- lib/Target/PowerPC/PPCFastISel.cpp +++ lib/Target/PowerPC/PPCFastISel.cpp @@ -958,6 +958,8 @@ } // Attempt to fast-select an integer-to-floating-point conversion. +// FIXME: Once fast-isel has better support for VSX, conversions using +// direct moves should be implemented. bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) { MVT DstVT; Type *DstTy = I->getType(); @@ -1065,6 +1067,8 @@ } // Attempt to fast-select a floating-point-to-integer conversion. +// FIXME: Once fast-isel has better support for VSX, conversions using +// direct moves should be implemented. bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { MVT DstVT, SrcVT; Type *DstTy = I->getType(); Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -119,6 +119,15 @@ /// resultant GPR. Bits corresponding to other CR regs are undefined. MFOCRF, + /// Direct move from a VSX register to a GPR + MFVSR, + + /// Direct move from a GPR to a VSX register (algebraic) + MTVSRA, + + /// Direct move from a GPR to a VSX register (zero) + MTVSRZ, + // FIXME: Remove these once the ANDI glue bug is fixed: /// i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the /// eq or gt bit of CR0 after executing andi. x, 1. This is used to @@ -645,6 +654,10 @@ void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, SelectionDAG &DAG, SDLoc dl) const; + SDValue LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG, + SDLoc dl) const; + SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG, + SDLoc dl) const; SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const; SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const; Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -996,6 +996,9 @@ case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; + case PPCISD::MFVSR: return "PPCISD::MFVSR"; + case PPCISD::MTVSRA: return "PPCISD::MTVSRA"; + case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ"; case PPCISD::VCMP: return "PPCISD::VCMP"; case PPCISD::VCMPo: return "PPCISD::VCMPo"; case PPCISD::LBRX: return "PPCISD::LBRX"; @@ -5911,8 +5914,46 @@ RLI.MPI = MPI; } +/// \brief Custom lowers floating point to integer conversions to use +/// the direct move instructions available in ISA 2.07 to avoid the +/// need for load/store combinations. +SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op, + SelectionDAG &DAG, + SDLoc dl) const { + assert(Op.getOperand(0).getValueType().isFloatingPoint()); + SDValue Src = Op.getOperand(0); + + if (Src.getValueType() == MVT::f32) + Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); + + SDValue Tmp; + switch (Op.getSimpleValueType().SimpleTy) { + default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); + case MVT::i32: + Tmp = DAG.getNode( + Op.getOpcode() == ISD::FP_TO_SINT + ? PPCISD::FCTIWZ + : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), + dl, MVT::f64, Src); + Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp); + break; + case MVT::i64: + assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && + "i64 FP_TO_UINT is supported only with FPCVT"); + Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : + PPCISD::FCTIDUZ, + dl, MVT::f64, Src); + Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp); + break; + } + return Tmp; +} + SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, SDLoc dl) const { + if (Subtarget.hasDirectMove() && Subtarget.isPPC64()) + return LowerFP_TO_INTDirectMove(Op, DAG, dl); + ReuseLoadInfo RLI; LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); @@ -5990,6 +6031,40 @@ DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain); } +/// \brief Custom lowers integer to floating point conversions to use +/// the direct move instructions available in ISA 2.07 to avoid the +/// need for load/store combinations. +SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op, + SelectionDAG &DAG, + SDLoc dl) const { + assert((Op.getValueType() == MVT::f32 || + Op.getValueType() == MVT::f64) && + "Invalid floating point type as target of conversion"); + assert(Subtarget.hasFPCVT() && + "Int to FP conversions with direct moves require FPCVT"); + SDValue FP; + SDValue Src = Op.getOperand(0); + bool SinglePrec = Op.getValueType() == MVT::f32; + bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32; + bool Signed = Op.getOpcode() == ISD::SINT_TO_FP; + unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) : + (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU); + + if (WordInt) { + FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ, + dl, MVT::f64, Src); + FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); + } + else { + FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src); + // To prevent unnecessary double rounding, convert directly to single + // precision if the target value is single precision + FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); + } + + return FP; +} + SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -6025,6 +6100,11 @@ DAG.getConstantFP(1.0, Op.getValueType()), DAG.getConstantFP(0.0, Op.getValueType())); + // If we have direct moves, we can do all the conversion, skip the store/load + // However, without FPCVT we can't do most conversions + if (Subtarget.hasDirectMove() && Subtarget.isPPC64() && Subtarget.hasFPCVT()) + return LowerINT_TO_FPDirectMove(Op, DAG, dl); + assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"); Index: lib/Target/PowerPC/PPCInstrFormats.td =================================================================== --- lib/Target/PowerPC/PPCInstrFormats.td +++ lib/Target/PowerPC/PPCInstrFormats.td @@ -764,6 +764,12 @@ let Inst{31} = XT{5}; } +class XX1_RS6_RD5_XO opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list pattern> + : XX1Form { + let B = 0; +} + class XX2Form opcode, bits<9> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> : I { Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -41,6 +41,9 @@ def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x, [SDNPHasChain, SDNPMayStore]>; def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>; +def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>; +def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>; +def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>; multiclass XX3Form_Rcr opcode, bits<7> xo, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, @@ -946,6 +949,7 @@ when the elements are larger than i32. */ def HasP8Vector : Predicate<"PPCSubTarget->hasP8Vector()">; +def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">; let Predicates = [HasP8Vector] in { let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. let isCommutable = 1 in { @@ -965,3 +969,24 @@ [(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>; } // AddedComplexity = 500 } // HasP8Vector + +let Predicates = [HasDirectMove, HasVSX] in { +// VSX direct move instructions +def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT), + "mfvsrd $rA, $XT", IIC_VecGeneral, + [(set i64:$rA, (PPCmfvsr f64:$XT))]>, + Requires<[In64BitMode]>; +def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT), + "mfvsrwz $rA, $XT", IIC_VecGeneral, + [(set i32:$rA, (PPCmfvsr f64:$XT))]>; +def MTVSRD : XX1_RS6_RD5_XO<31, 179, (outs vsfrc:$XT), (ins g8rc:$rA), + "mtvsrd $XT, $rA", IIC_VecGeneral, + [(set f64:$XT, (PPCmtvsra i64:$rA))]>, + Requires<[In64BitMode]>; +def MTVSRWA : XX1_RS6_RD5_XO<31, 211, (outs vsfrc:$XT), (ins gprc:$rA), + "mtvsrwa $XT, $rA", IIC_VecGeneral, + [(set f64:$XT, (PPCmtvsra i32:$rA))]>; +def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$rA), + "mtvsrwz $XT, $rA", IIC_VecGeneral, + [(set f64:$XT, (PPCmtvsrz i32:$rA))]>; +} // HasDirectMove, HasVSX Index: lib/Target/PowerPC/PPCSubtarget.h =================================================================== --- lib/Target/PowerPC/PPCSubtarget.h +++ lib/Target/PowerPC/PPCSubtarget.h @@ -117,6 +117,7 @@ bool HasICBT; bool HasInvariantFunctionDescriptors; bool HasPartwordAtomics; + bool HasDirectMove; bool HasHTM; /// When targeting QPX running a stock PPC64 Linux kernel where the stack @@ -243,6 +244,7 @@ return HasInvariantFunctionDescriptors; } bool hasPartwordAtomics() const { return HasPartwordAtomics; } + bool hasDirectMove() const { return HasDirectMove; } bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; } unsigned getPlatformStackAlignment() const { Index: lib/Target/PowerPC/PPCSubtarget.cpp =================================================================== --- lib/Target/PowerPC/PPCSubtarget.cpp +++ lib/Target/PowerPC/PPCSubtarget.cpp @@ -97,6 +97,7 @@ HasICBT = false; HasInvariantFunctionDescriptors = false; HasPartwordAtomics = false; + HasDirectMove = false; IsQPXStackUnaligned = false; HasHTM = false; } Index: test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll =================================================================== --- test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll +++ test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll @@ -0,0 +1,426 @@ +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s + +; Function Attrs: nounwind +define zeroext i8 @_Z6testcff(float %arg) { +entry: + %arg.addr = alloca float, align 4 + store float %arg, float* %arg.addr, align 4 + %0 = load float, float* %arg.addr, align 4 + %conv = fptoui float %0 to i8 + ret i8 %conv +; CHECK-LABEL: @_Z6testcff +; CHECK: xscvdpsxws {{[0-9]+}}, 1 +; CHECK: mfvsrwz 3, {{[0-9]+}} +} + +; Function Attrs: nounwind +define float @_Z6testfcc(i8 zeroext %arg) { +entry: + %arg.addr = alloca i8, align 1 + store i8 %arg, i8* %arg.addr, align 1 + %0 = load i8, i8* %arg.addr, align 1 + %conv = uitofp i8 %0 to float + ret float %conv +; CHECK-LABEL: @_Z6testfcc +; CHECK: mtvsrwz {{[0-9]+}}, 3 +; FIXME: Once we have XSCVUXDSP implemented, this will change +; CHECK: fcfidus 1, {{[0-9]+}} +} + +; Function Attrs: nounwind +define zeroext i8 @_Z6testcdd(double %arg) { +entry: + %arg.addr = alloca double, align 8 + store double %arg, double* %arg.addr, align 8 + %0 = load double, double* %arg.addr, align 8 + %conv = fptoui double %0 to i8 + ret i8 %conv +; CHECK-LABEL: @_Z6testcdd +; CHECK: xscvdpsxws {{[0-9]+}}, 1 +; CHECK: mfvsrwz 3, {{[0-9]+}} +} + +; Function Attrs: nounwind +define double @_Z6testdcc(i8 zeroext %arg) { +entry: + %arg.addr = alloca i8, align 1 + store i8 %arg, i8* %arg.addr, align 1 + %0 = load i8, i8* %arg.addr, align 1 + %conv = uitofp i8 %0 to double + ret double %conv +; CHECK-LABEL: @_Z6testdcc +; CHECK: mtvsrwz {{[0-9]+}}, 3 +; CHECK: xscvuxddp 1, {{[0-9]+}} +} + +; Function Attrs: nounwind +define zeroext i8 @_Z7testucff(float %arg) { +entry: + %arg.addr = alloca float, align 4 + store float %arg, float* %arg.addr, align 4 + %0 = load float, float* %arg.addr, align 4 + %conv = fptoui float %0 to i8 + ret i8 %conv +; CHECK-LABEL: @_Z7testucff +; CHECK: xscvdpsxws {{[0-9]+}}, 1 +; CHECK: mfvsrwz 3, {{[0-9]+}} +} + +; Function Attrs: nounwind +define float @_Z7testfuch(i8 zeroext %arg) { +entry: + %arg.addr = alloca i8, align 1 + store i8 %arg, i8* %arg.addr, align 1 + %0 = load i8, i8* %arg.addr, align 1 + %conv = uitofp i8 %0 to float + ret float %conv +; CHECK-LABEL: @_Z7testfuch +; CHECK: mtvsrwz {{[0-9]+}}, 3 +; FIXME: Once we have XSCVUXDSP implemented, this will change +; CHECK: fcfidus 1, {{[0-9]+}} +} + +; Function Attrs: nounwind +define zeroext i8 @_Z7testucdd(double %arg) { +entry: + %arg.addr = alloca double, align 8 + store double %arg, double* %arg.addr, align 8 + %0 = load double, double* %arg.addr, align 8 + %conv = fptoui double %0 to i8 + ret i8 %conv +; CHECK-LABEL: @_Z7testucdd +; CHECK: xscvdpsxws {{[0-9]+}}, 1 +; CHECK: mfvsrwz 3, {{[0-9]+}} +} + +; Function Attrs: nounwind +define double @_Z7testduch(i8 zeroext %arg) { +entry: + %arg.addr = alloca i8, align 1 + store i8 %arg, i8* %arg.addr, align 1 + %0 = load i8, i8* %arg.addr, align 1 + %conv = uitofp i8 %0 to double + ret double %conv +; CHECK-LABEL: @_Z7testduch +; CHECK: mtvsrwz {{[0-9]+}}, 3 +; CHECK: xscvuxddp 1, {{[0-9]+}} +} + +; Function Attrs: nounwind +define signext i16 @_Z6testsff(float %arg) { +entry: + %arg.addr = alloca float, align 4 + store float %arg, float* %arg.addr, align 4 + %0 = load float, float* %arg.addr, align 4 + %conv = fptosi float %0 to i16 + ret i16 %conv +; CHECK-LABEL: @_Z6testsff +; CHECK: xscvdpsxws {{[0-9]+}}, 1 +; CHECK: mfvsrwz 3, {{[0-9]+}} +} + +; Function Attrs: nounwind +define float @_Z6testfss(i16 signext %arg) { +entry: + %arg.addr = alloca i16, align 2 + store i16 %arg, i16* %arg.addr, align 2 + %0 = load i16, i16* %arg.addr, align 2 + %conv = sitofp i16 %0 to float + ret float %conv +; CHECK-LABEL: @_Z6testfss +; CHECK: mtvsrwa {{[0-9]+}}, 3 +; FIXME: Once we have XSCVSXDSP implemented, this will change +; CHECK: fcfids 1, {{[0-9]+}} +} + +; Function Attrs: nounwind +define signext i16 @_Z6testsdd(double %arg) { +entry: + %arg.addr = alloca double, align 8 + store double %arg, double* %arg.addr, align 8 + %0 = load double, double* %arg.addr, align 8 + %conv = fptosi double %0 to i16 + ret i16 %conv +; CHECK-LABEL: @_Z6testsdd +; CHECK: xscvdpsxws {{[0-9]+}}, 1 +; CHECK: mfvsrwz 3, {{[0-9]+}} +} + +; Function Attrs: nounwind +define double @_Z6testdss(i16 signext %arg) { +entry: + %arg.addr = alloca i16, align 2 + store i16 %arg, i16* %arg.addr, align 2 + %0 = load i16, i16* %arg.addr, align 2 + %conv = sitofp i16 %0 to double + ret double %conv +; CHECK-LABEL: @_Z6testdss +; CHECK: mtvsrwa {{[0-9]+}}, 3 +; CHECK: xscvsxddp 1, {{[0-9]+}} +} + +; Function Attrs: nounwind +define zeroext i16 @_Z7testusff(float %arg) { +entry: + %arg.addr = alloca float, align 4 + store float %arg, float* %arg.addr, align 4 + %0 = load float, float* %arg.addr, align 4 + %conv = fptoui float %0 to i16 + ret i16 %conv +; CHECK-LABEL: @_Z7testusff +; CHECK: xscvdpsxws {{[0-9]+}}, 1 +; CHECK: mfvsrwz 3, {{[0-9]+}} +} + +; Function Attrs: nounwind +define float @_Z7testfust(i16 zeroext %arg) { +entry: + %arg.addr = alloca i16, align 2 + store i16 %arg, i16* %arg.addr, align 2 + %0 = load i16, i16* %arg.addr, align 2 + %conv = uitofp i16 %0 to float + ret float %conv +; CHECK-LABEL: @_Z7testfust +; CHECK: mtvsrwz {{[0-9]+}}, 3 +; FIXME: Once we have XSCVUXDSP implemented, this will change +; CHECK: fcfidus 1, {{[0-9]+}} +} + +; Function Attrs: nounwind +define zeroext i16 @_Z7testusdd(double %arg) { +entry: + %arg.addr = alloca double, align 8 + store double %arg, double* %arg.addr, align 8 + %0 = load double, double* %arg.addr, align 8 + %conv = fptoui double %0 to i16 + ret i16 %conv +; CHECK-LABEL: @_Z7testusdd +; CHECK: xscvdpsxws {{[0-9]+}}, 1 +; CHECK: mfvsrwz 3, {{[0-9]+}} +} + +; Function Attrs: nounwind +define double @_Z7testdust(i16 zeroext %arg) { +entry: + %arg.addr = alloca i16, align 2 + store i16 %arg, i16* %arg.addr, align 2 + %0 = load i16, i16* %arg.addr, align 2 + %conv = uitofp i16 %0 to double + ret double %conv +; CHECK-LABEL: @_Z7testdust +; CHECK: mtvsrwz {{[0-9]+}}, 3 +; CHECK: xscvuxddp 1, {{[0-9]+}} +} + +; Function Attrs: nounwind +define signext i32 @_Z6testiff(float %arg) { +entry: + %arg.addr = alloca float, align 4 + store float %arg, float* %arg.addr, align 4 + %0 = load float, float* %arg.addr, align 4 + %conv = fptosi float %0 to i32 + ret i32 %conv +; CHECK-LABEL: @_Z6testiff +; CHECK: xscvdpsxws {{[0-9]+}}, 1 +; CHECK: mfvsrwz 3, {{[0-9]+}} +} + +; Function Attrs: nounwind +define float @_Z6testfii(i32 signext %arg) { +entry: + %arg.addr = alloca i32, align 4 + store i32 %arg, i32* %arg.addr, align 4 + %0 = load i32, i32* %arg.addr, align 4 + %conv = sitofp i32 %0 to float + ret float %conv +; CHECK-LABEL: @_Z6testfii +; CHECK: mtvsrwa {{[0-9]+}}, 3 +; FIXME: Once we have XSCVSXDSP implemented, this will change +; CHECK: fcfids 1, {{[0-9]+}} +} + +; Function Attrs: nounwind +define signext i32 @_Z6testidd(double %arg) { +entry: + %arg.addr = alloca double, align 8 + store double %arg, double* %arg.addr, align 8 + %0 = load double, double* %arg.addr, align 8 + %conv = fptosi double %0 to i32 + ret i32 %conv +; CHECK-LABEL: @_Z6testidd +; CHECK: xscvdpsxws {{[0-9]+}}, 1 +; CHECK: mfvsrwz 3, {{[0-9]+}} +} + +; Function Attrs: nounwind +define double @_Z6testdii(i32 signext %arg) { +entry: + %arg.addr = alloca i32, align 4 + store i32 %arg, i32* %arg.addr, align 4 + %0 = load i32, i32* %arg.addr, align 4 + %conv = sitofp i32 %0 to double + ret double %conv +; CHECK-LABEL: @_Z6testdii +; CHECK: mtvsrwa {{[0-9]+}}, 3 +; CHECK: xscvsxddp 1, {{[0-9]+}} +} + +; Function Attrs: nounwind +define zeroext i32 @_Z7testuiff(float %arg) { +entry: + %arg.addr = alloca float, align 4 + store float %arg, float* %arg.addr, align 4 + %0 = load float, float* %arg.addr, align 4 + %conv = fptoui float %0 to i32 + ret i32 %conv +; CHECK-LABEL: @_Z7testuiff +; CHECK: xscvdpuxws {{[0-9]+}}, 1 +; CHECK: mfvsrwz 3, {{[0-9]+}} +} + +; Function Attrs: nounwind +define float @_Z7testfuij(i32 zeroext %arg) { +entry: + %arg.addr = alloca i32, align 4 + store i32 %arg, i32* %arg.addr, align 4 + %0 = load i32, i32* %arg.addr, align 4 + %conv = uitofp i32 %0 to float + ret float %conv +; CHECK-LABEL: @_Z7testfuij +; CHECK: mtvsrwz {{[0-9]+}}, 3 +; FIXME: Once we have XSCVUXDSP implemented, this will change +; CHECK: fcfidus 1, {{[0-9]+}} +} + +; Function Attrs: nounwind +define zeroext i32 @_Z7testuidd(double %arg) { +entry: + %arg.addr = alloca double, align 8 + store double %arg, double* %arg.addr, align 8 + %0 = load double, double* %arg.addr, align 8 + %conv = fptoui double %0 to i32 + ret i32 %conv +; CHECK-LABEL: @_Z7testuidd +; CHECK: xscvdpuxws {{[0-9]+}}, 1 +; CHECK: mfvsrwz 3, {{[0-9]+}} +} + +; Function Attrs: nounwind +define double @_Z7testduij(i32 zeroext %arg) { +entry: + %arg.addr = alloca i32, align 4 + store i32 %arg, i32* %arg.addr, align 4 + %0 = load i32, i32* %arg.addr, align 4 + %conv = uitofp i32 %0 to double + ret double %conv +; CHECK-LABEL: @_Z7testduij +; CHECK: mtvsrwz {{[0-9]+}}, 3 +; CHECK: xscvuxddp 1, {{[0-9]+}} +} + +; Function Attrs: nounwind +define i64 @_Z7testllff(float %arg) { +entry: + %arg.addr = alloca float, align 4 + store float %arg, float* %arg.addr, align 4 + %0 = load float, float* %arg.addr, align 4 + %conv = fptosi float %0 to i64 + ret i64 %conv +; CHECK-LABEL: @_Z7testllff +; CHECK: xscvdpsxds {{[0-9]+}}, 1 +; CHECK: mfvsrd 3, {{[0-9]+}} +} + +; Function Attrs: nounwind +define float @_Z7testfllx(i64 %arg) { +entry: + %arg.addr = alloca i64, align 8 + store i64 %arg, i64* %arg.addr, align 8 + %0 = load i64, i64* %arg.addr, align 8 + %conv = sitofp i64 %0 to float + ret float %conv +; CHECK-LABEL:@_Z7testfllx +; CHECK: mtvsrd {{[0-9]+}}, 3 +; FIXME: Once we have XSCVSXDSP implemented, this will change +; CHECK: fcfids 1, {{[0-9]+}} +} + +; Function Attrs: nounwind +define i64 @_Z7testlldd(double %arg) { +entry: + %arg.addr = alloca double, align 8 + store double %arg, double* %arg.addr, align 8 + %0 = load double, double* %arg.addr, align 8 + %conv = fptosi double %0 to i64 + ret i64 %conv +; CHECK-LABEL: @_Z7testlldd +; CHECK: xscvdpsxds {{[0-9]+}}, 1 +; CHECK: mfvsrd 3, {{[0-9]+}} +} + +; Function Attrs: nounwind +define double @_Z7testdllx(i64 %arg) { +entry: + %arg.addr = alloca i64, align 8 + store i64 %arg, i64* %arg.addr, align 8 + %0 = load i64, i64* %arg.addr, align 8 + %conv = sitofp i64 %0 to double + ret double %conv +; CHECK-LABEL: @_Z7testdllx +; CHECK: mtvsrd {{[0-9]+}}, 3 +; CHECK: xscvsxddp 1, {{[0-9]+}} +} + +; Function Attrs: nounwind +define i64 @_Z8testullff(float %arg) { +entry: + %arg.addr = alloca float, align 4 + store float %arg, float* %arg.addr, align 4 + %0 = load float, float* %arg.addr, align 4 + %conv = fptoui float %0 to i64 + ret i64 %conv +; CHECK-LABEL: @_Z8testullff +; CHECK: xscvdpuxds {{[0-9]+}}, 1 +; CHECK: mfvsrd 3, {{[0-9]+}} +} + +; Function Attrs: nounwind +define float @_Z8testfully(i64 %arg) { +entry: + %arg.addr = alloca i64, align 8 + store i64 %arg, i64* %arg.addr, align 8 + %0 = load i64, i64* %arg.addr, align 8 + %conv = uitofp i64 %0 to float + ret float %conv +; CHECK-LABEL: @_Z8testfully +; CHECK: mtvsrd {{[0-9]+}}, 3 +; FIXME: Once we have XSCVUXDSP implemented, this will change +; CHECK: fcfidus 1, {{[0-9]+}} +} + +; Function Attrs: nounwind +define i64 @_Z8testulldd(double %arg) { +entry: + %arg.addr = alloca double, align 8 + store double %arg, double* %arg.addr, align 8 + %0 = load double, double* %arg.addr, align 8 + %conv = fptoui double %0 to i64 + ret i64 %conv +; CHECK-LABEL: @_Z8testulldd +; CHECK: xscvdpuxds {{[0-9]+}}, 1 +; CHECK: mfvsrd 3, {{[0-9]+}} +} + +; Function Attrs: nounwind +define double @_Z8testdully(i64 %arg) { +entry: + %arg.addr = alloca i64, align 8 + store i64 %arg, i64* %arg.addr, align 8 + %0 = load i64, i64* %arg.addr, align 8 + %conv = uitofp i64 %0 to double + ret double %conv +; CHECK-LABEL: @_Z8testdully +; CHECK: mtvsrd {{[0-9]+}}, 3 +; CHECK: xscvuxddp 1, {{[0-9]+}} +} Index: test/MC/Disassembler/PowerPC/vsx.txt =================================================================== --- test/MC/Disassembler/PowerPC/vsx.txt +++ test/MC/Disassembler/PowerPC/vsx.txt @@ -459,3 +459,17 @@ # CHECK: xxpermdi 7, 63, 63, 2 0xf0 0xff 0xfa 0x56 +# CHECK: mfvsrd 3, 0 +0x7c 0x03 0x00 0x66 + +# CHECK: mfvsrwz 5, 0 +0x7c 0x05 0x00 0xe6 + +# CHECK: mtvsrd 0, 3 +0x7c 0x03 0x01 0x66 + +# CHECK: mtvsrwa 0, 3 +0x7c 0x03 0x01 0xa6 + +# CHECK: mtvsrwz 0, 3 +0x7c 0x03 0x01 0xe6 Index: test/MC/PowerPC/vsx.s =================================================================== --- test/MC/PowerPC/vsx.s +++ test/MC/PowerPC/vsx.s @@ -454,3 +454,20 @@ # CHECK-BE: xxpermdi 7, 63, 63, 2 # encoding: [0xf0,0xff,0xfa,0x56] # CHECK-LE: xxpermdi 7, 63, 63, 2 # encoding: [0x56,0xfa,0xff,0xf0] xxswapd 7, 63 + +# Move to/from VSR +# CHECK-BE: mfvsrd 3, 0 # encoding: [0x7c,0x03,0x00,0x66] +# CHECK-LE: mfvsrd 3, 0 # encoding: [0x66,0x00,0x03,0x7c] + mfvsrd 3, 0 +# CHECK-BE: mfvsrwz 5, 0 # encoding: [0x7c,0x05,0x00,0xe6] +# CHECK-LE: mfvsrwz 5, 0 # encoding: [0xe6,0x00,0x05,0x7c] + mfvsrwz 5, 0 +# CHECK-BE: mtvsrd 0, 3 # encoding: [0x7c,0x03,0x01,0x66] +# CHECK-LE: mtvsrd 0, 3 # encoding: [0x66,0x01,0x03,0x7c] + mtvsrd 0, 3 +# CHECK-BE: mtvsrwa 0, 3 # encoding: [0x7c,0x03,0x01,0xa6] +# CHECK-LE: mtvsrwa 0, 3 # encoding: [0xa6,0x01,0x03,0x7c] + mtvsrwa 0, 3 +# CHECK-BE: mtvsrwz 0, 3 # encoding: [0x7c,0x03,0x01,0xe6] +# CHECK-LE: mtvsrwz 0, 3 # encoding: [0xe6,0x01,0x03,0x7c] + mtvsrwz 0, 3