Index: llvm/include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -275,6 +275,7 @@ [llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable] >; +// out = 1.0 / sqrt(a) result clamped to +/- max_float. def int_amdgcn_rsq_clamp : Intrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]>; Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -416,7 +416,6 @@ RCP, RSQ, RCP_LEGACY, - RSQ_LEGACY, RCP_IFLAG, FMUL_LEGACY, RSQ_CLAMP, Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -3000,6 +3000,16 @@ case Intrinsic::amdgcn_mul_i24: case Intrinsic::amdgcn_mul_u24: return simplifyI24(N, DCI); + case Intrinsic::amdgcn_fract: + case Intrinsic::amdgcn_rsq: + case Intrinsic::amdgcn_rcp_legacy: + case Intrinsic::amdgcn_rsq_legacy: + case Intrinsic::amdgcn_rsq_clamp: + case Intrinsic::amdgcn_ldexp: { + // FIXME: This is probably wrong. If src is an sNaN, it won't be quieted + SDValue Src = N->getOperand(1); + return Src.isUndef() ? Src : SDValue(); + } default: return SDValue(); } @@ -4258,7 +4268,6 @@ NODE_NAME_CASE(RCP) NODE_NAME_CASE(RSQ) NODE_NAME_CASE(RCP_LEGACY) - NODE_NAME_CASE(RSQ_LEGACY) NODE_NAME_CASE(RCP_IFLAG) NODE_NAME_CASE(FMUL_LEGACY) NODE_NAME_CASE(RSQ_CLAMP) @@ -4653,7 +4662,6 @@ case AMDGPUISD::RCP: case AMDGPUISD::RSQ: case AMDGPUISD::RCP_LEGACY: - case AMDGPUISD::RSQ_LEGACY: case AMDGPUISD::RSQ_CLAMP: { if (SNaN) return true; @@ -4697,6 +4705,17 @@ return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) && DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1); } + case Intrinsic::amdgcn_rcp: + case Intrinsic::amdgcn_rsq: + case Intrinsic::amdgcn_rcp_legacy: + case Intrinsic::amdgcn_rsq_legacy: + case Intrinsic::amdgcn_rsq_clamp: { + if (SNaN) + return true; + + // TODO: Need is known positive check. + return false; + } case Intrinsic::amdgcn_fdot2: // TODO: Refine on operand return SNaN; Index: llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -121,8 +121,6 @@ // out = 1.0 / sqrt(a) def AMDGPUrsq_impl : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>; -// out = 1.0 / sqrt(a) -def AMDGPUrsq_legacy_impl : SDNode<"AMDGPUISD::RSQ_LEGACY", SDTFPUnaryOp>; def AMDGPUrcp_legacy_impl : SDNode<"AMDGPUISD::RCP_LEGACY", SDTFPUnaryOp>; def AMDGPUrcp_iflag : SDNode<"AMDGPUISD::RCP_IFLAG", SDTFPUnaryOp>; @@ -385,9 +383,6 @@ def AMDGPUrcp_legacy : PatFrags<(ops node:$src), [(int_amdgcn_rcp_legacy node:$src), (AMDGPUrcp_legacy_impl node:$src)]>; -def AMDGPUrsq_legacy : PatFrags<(ops node:$src), [(int_amdgcn_rsq_legacy node:$src), - (AMDGPUrsq_legacy_impl node:$src)]>; - def AMDGPUrsq : PatFrags<(ops node:$src), [(int_amdgcn_rsq node:$src), (AMDGPUrsq_impl node:$src)]>; Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -5842,8 +5842,7 @@ case Intrinsic::amdgcn_rsq_legacy: if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) return emitRemovedIntrinsicError(DAG, DL, VT); - - return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1)); + return SDValue(); case Intrinsic::amdgcn_rcp_legacy: if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) return emitRemovedIntrinsicError(DAG, DL, VT); @@ -8777,7 +8776,6 @@ case AMDGPUISD::RSQ: case AMDGPUISD::RSQ_CLAMP: case AMDGPUISD::RCP_LEGACY: - case AMDGPUISD::RSQ_LEGACY: case AMDGPUISD::RCP_IFLAG: case AMDGPUISD::TRIG_PREOP: case AMDGPUISD::DIV_SCALE: @@ -8882,6 +8880,11 @@ case Intrinsic::amdgcn_cubeid: case Intrinsic::amdgcn_frexp_mant: case Intrinsic::amdgcn_fdot2: + case Intrinsic::amdgcn_rcp: + case Intrinsic::amdgcn_rsq: + case Intrinsic::amdgcn_rsq_clamp: + case Intrinsic::amdgcn_rcp_legacy: + case Intrinsic::amdgcn_rsq_legacy: return true; default: break; @@ -10068,10 +10071,10 @@ case AMDGPUISD::FRACT: case AMDGPUISD::RSQ: case AMDGPUISD::RCP_LEGACY: - case AMDGPUISD::RSQ_LEGACY: case AMDGPUISD::RCP_IFLAG: case AMDGPUISD::RSQ_CLAMP: case AMDGPUISD::LDEXP: { + // FIXME: This is probably wrong. If src is an sNaN, it won't be quieted SDValue Src = N->getOperand(0); if (Src.isUndef()) return Src; Index: llvm/lib/Target/AMDGPU/VOP1Instructions.td =================================================================== --- llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -325,7 +325,7 @@ defm V_RSQ_CLAMP_F32 : VOP1Inst<"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>; defm V_RSQ_LEGACY_F32 : - VOP1Inst<"v_rsq_legacy_f32", VOP_F32_F32, AMDGPUrsq_legacy>; + VOP1Inst<"v_rsq_legacy_f32", VOP_F32_F32, int_amdgcn_rsq_legacy>; } // End SchedRW = [WriteQuarterRate32] let SchedRW = [WriteDouble] in {