Index: lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.h +++ lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -46,7 +46,6 @@ SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -707,7 +707,6 @@ case ISD::FRINT: return LowerFRINT(Op, DAG); case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG); case ISD::FROUND: return LowerFROUND(Op, DAG); - case ISD::FFLOOR: return LowerFFLOOR(Op, DAG); case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); @@ -1679,31 +1678,6 @@ llvm_unreachable("unhandled type"); } -SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const { - SDLoc SL(Op); - SDValue Src = Op.getOperand(0); - - // result = trunc(src); - // if (src < 0.0 && src != result) - // result += -1.0. - - SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src); - - const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64); - const SDValue NegOne = DAG.getConstantFP(-1.0, SL, MVT::f64); - - EVT SetCCVT = - getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64); - - SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOLT); - SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE); - SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc); - - SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero); - // TODO: Should this propagate fast-math-flags? - return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add); -} - SDValue AMDGPUTargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); SDValue Src = Op.getOperand(0); Index: lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- lib/Target/AMDGPU/SIISelLowering.h +++ lib/Target/AMDGPU/SIISelLowering.h @@ -41,7 +41,6 @@ SDValue LowerFDIV32(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool Signed) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -3539,7 +3539,7 @@ SRCMODS.NONE, (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE), SRCMODS.NONE, - (V_MOV_B64_PSEUDO 0x3fefffffffffffff), + CONST.FP64_ONE, DSTCLAMP.NONE, DSTOMOD.NONE), $x, (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/*NaN*/)), Index: test/CodeGen/AMDGPU/ffloor.f64.ll =================================================================== --- test/CodeGen/AMDGPU/ffloor.f64.ll +++ test/CodeGen/AMDGPU/ffloor.f64.ll @@ -12,12 +12,13 @@ ; FUNC-LABEL: {{^}}ffloor_f64: ; CI: v_floor_f64_e32 -; SI: v_fract_f64_e32 -; SI-DAG: v_min_f64 -; SI-DAG: v_cmp_class_f64_e64 + +; SI: v_fract_f64_e32 [[FRACT:v\[[0-9]+:[0-9]+\]]], [[X:s\[[0-9]+:[0-9]+\]]] +; SI-DAG: v_min_f64 [[MIN:v\[[0-9]+:[0-9]+\]]], 1.0, [[FRACT]] +; SI-DAG: v_cmp_class_f64_e64 {{s\[[0-9]+:[0-9]+\]}}, [[X]], 3 ; SI: v_cndmask_b32_e64 ; SI: v_cndmask_b32_e64 -; SI: v_add_f64 +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, [[X]], -{{v\[[0-9]+:[0-9]+\]}} ; SI: s_endpgm define void @ffloor_f64(double addrspace(1)* %out, double %x) { %y = call double @llvm.floor.f64(double %x) nounwind readnone @@ -27,15 +28,16 @@ ; FUNC-LABEL: {{^}}ffloor_f64_neg: ; CI: v_floor_f64_e64 + ; SI: v_fract_f64_e64 {{v[[0-9]+:[0-9]+]}}, -[[INPUT:s[[0-9]+:[0-9]+]]] ; SI-DAG: v_min_f64 ; SI-DAG: v_cmp_class_f64_e64 ; SI: v_cndmask_b32_e64 ; SI: v_cndmask_b32_e64 -; SI: v_add_f64 {{v[[0-9]+:[0-9]+]}}, -[[INPUT]] +; SI: v_add_f64 {{v[[0-9]+:[0-9]+]}}, -[[INPUT]], -v{{\[[0-9]+:[0-9]+\]}} ; SI: s_endpgm define void @ffloor_f64_neg(double addrspace(1)* %out, double %x) { - %neg = fsub double 0.0, %x + %neg = fsub double -0.0, %x %y = call double @llvm.floor.f64(double %neg) nounwind readnone store double %y, double addrspace(1)* %out ret void @@ -52,7 +54,7 @@ ; SI: s_endpgm define void @ffloor_f64_neg_abs(double addrspace(1)* %out, double %x) { %abs = call double @llvm.fabs.f64(double %x) - %neg = fsub double 0.0, %abs + %neg = fsub double -0.0, %abs %y = call double @llvm.floor.f64(double %neg) nounwind readnone store double %y, double addrspace(1)* %out ret void Index: test/CodeGen/AMDGPU/fract.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fract.f64.ll +++ test/CodeGen/AMDGPU/fract.f64.ll @@ -10,9 +10,7 @@ ; FUNC-LABEL: {{^}}fract_f64: ; SI-DAG: v_fract_f64_e32 [[FRC:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]] -; SI-DAG: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1 -; SI-DAG: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff -; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]] +; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], 1.0, [[FRC]] ; SI-DAG: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3 ; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], [[COND]] ; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], [[COND]] @@ -37,9 +35,7 @@ ; FUNC-LABEL: {{^}}fract_f64_neg: ; SI-DAG: v_fract_f64_e64 [[FRC:v\[[0-9]+:[0-9]+\]]], -v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]] -; SI-DAG: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1 -; SI-DAG: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff -; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]] +; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], 1.0, [[FRC]] ; SI-DAG: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3 ; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], [[COND]] ; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], [[COND]] @@ -65,9 +61,7 @@ ; FUNC-LABEL: {{^}}fract_f64_neg_abs: ; SI-DAG: v_fract_f64_e64 [[FRC:v\[[0-9]+:[0-9]+\]]], -|v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]]| -; SI-DAG: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1 -; SI-DAG: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff -; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]] +; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], 1.0, [[FRC]] ; SI-DAG: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3 ; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], [[COND]] ; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], [[COND]]