Index: llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -116,6 +116,7 @@ protected: bool isUnsafeMath(const FPMathOperator *FPOp) const; + bool isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const; bool canIncreasePrecisionOfConstantFold(const FPMathOperator *FPOp) const; @@ -407,6 +408,11 @@ return UnsafeFPMath || FPOp->isFast(); } +bool AMDGPULibCalls::isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const { + return UnsafeFPMath || + (FPOp->hasApproxFunc() && FPOp->hasNoNaNs() && FPOp->hasNoInfs()); +} + bool AMDGPULibCalls::canIncreasePrecisionOfConstantFold( const FPMathOperator *FPOp) const { // TODO: Refine to approxFunc or contract @@ -798,10 +804,6 @@ CINT = CDV ? dyn_cast_or_null(CDV->getSplatValue()) : nullptr; } - // No unsafe math , no constant argument, do nothing - if (!isUnsafeMath(FPOp) && !CF && !CINT && !CZero) - return false; - // 0x1111111 means that we don't do anything for this call. int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111); @@ -857,7 +859,7 @@ } } - if (!isUnsafeMath(FPOp)) + if (!isUnsafeFiniteOnlyMath(FPOp)) return false; // Unsafe Math optimization Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll @@ -590,8 +590,18 @@ ; CHECK-LABEL: define float @test_pown_afn_nnan_ninf_f32 ; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call nnan ninf afn float @_Z4pownfi(float [[X]], i32 [[Y]]) -; CHECK-NEXT: ret float [[CALL]] +; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]]) +; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @_Z4log2f(float [[__FABS]]) +; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[Y]] to float +; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]] +; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @_Z4exp2f(float [[__YLOGX]]) +; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[Y]], 31 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32 +; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[__POW_SIGN]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float +; CHECK-NEXT: ret float [[TMP3]] ; entry: %call = tail call nnan ninf afn float @_Z4pownfi(float %x, i32 %y) @@ -602,8 +612,19 @@ ; CHECK-LABEL: define <2 x float> @test_pown_afn_nnan_ninf_v2f32 ; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x i32> [[Y:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call nnan ninf afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> [[Y]]) -; CHECK-NEXT: ret <2 x float> [[CALL]] +; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]]) +; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @_Z4log2Dv2_f(<2 x float> [[__FABS]]) +; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp <2 x i32> [[Y]] to <2 x float> +; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[POWNI2F]] +; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @_Z4exp2Dv2_f(<2 x float> [[__YLOGX]]) +; CHECK-NEXT: [[__YTOU:%.*]] = fptosi <2 x float> [[POWNI2F]] to <2 x i32> +; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[__YTOU]], +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[X]] to <2 x i32> +; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[__POW_SIGN]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <2 x float> +; CHECK-NEXT: ret <2 x float> [[TMP3]] ; entry: %call = tail call nnan ninf afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> %x, <2 x i32> %y) @@ -614,8 +635,19 @@ ; CHECK-LABEL: define double @test_pown_afn_nnan_ninf_f64 ; CHECK-SAME: (double [[X:%.*]], i32 [[Y:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call nnan ninf afn double @_Z4powndi(double [[X]], i32 [[Y]]) -; CHECK-NEXT: ret double [[CALL]] +; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn double @llvm.fabs.f64(double [[X]]) +; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn double @_Z4log2d(double [[__FABS]]) +; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[Y]] to double +; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn double [[__LOG2]], [[POWNI2F]] +; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn double @_Z4exp2d(double [[__YLOGX]]) +; CHECK-NEXT: [[__YTOU:%.*]] = zext i32 [[Y]] to i64 +; CHECK-NEXT: [[__YEVEN:%.*]] = shl i64 [[__YTOU]], 63 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast double [[X]] to i64 +; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i64 [[__YEVEN]], [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[__EXP2]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[__POW_SIGN]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[TMP2]] to double +; CHECK-NEXT: ret double [[TMP3]] ; entry: %call = tail call nnan ninf afn double @_Z4powndi(double %x, i32 %y) @@ -626,8 +658,19 @@ ; CHECK-LABEL: define <2 x double> @test_pown_afn_nnan_ninf_v2f64 ; CHECK-SAME: (<2 x double> [[X:%.*]], <2 x i32> [[Y:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call nnan ninf afn <2 x double> @_Z4pownDv2_dDv2_i(<2 x double> [[X]], <2 x i32> [[Y]]) -; CHECK-NEXT: ret <2 x double> [[CALL]] +; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn <2 x double> @llvm.fabs.v2f64(<2 x double> [[X]]) +; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x double> @_Z4log2Dv2_d(<2 x double> [[__FABS]]) +; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp <2 x i32> [[Y]] to <2 x double> +; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x double> [[__LOG2]], [[POWNI2F]] +; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x double> @_Z4exp2Dv2_d(<2 x double> [[__YLOGX]]) +; CHECK-NEXT: [[__YTOU1:%.*]] = zext <2 x i32> [[Y]] to <2 x i64> +; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i64> [[__YTOU1]], +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[X]] to <2 x i64> +; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i64> [[__YEVEN]], [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[__EXP2]] to <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i64> [[__POW_SIGN]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to <2 x double> +; CHECK-NEXT: ret <2 x double> [[TMP3]] ; entry: %call = tail call nnan ninf afn <2 x double> @_Z4pownDv2_dDv2_i(<2 x double> %x, <2 x i32> %y) @@ -638,8 +681,19 @@ ; CHECK-LABEL: define half @test_pown_afn_nnan_ninf_f16 ; CHECK-SAME: (half [[X:%.*]], i32 [[Y:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call nnan ninf afn half @_Z4pownDhi(half [[X]], i32 [[Y]]) -; CHECK-NEXT: ret half [[CALL]] +; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn half @llvm.fabs.f16(half [[X]]) +; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn half @_Z4log2Dh(half [[__FABS]]) +; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[Y]] to half +; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn half [[__LOG2]], [[POWNI2F]] +; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn half @_Z4exp2Dh(half [[__YLOGX]]) +; CHECK-NEXT: [[__YTOU:%.*]] = trunc i32 [[Y]] to i16 +; CHECK-NEXT: [[__YEVEN:%.*]] = shl i16 [[__YTOU]], 15 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast half [[X]] to i16 +; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i16 [[__YEVEN]], [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast half [[__EXP2]] to i16 +; CHECK-NEXT: [[TMP2:%.*]] = or i16 [[__POW_SIGN]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP2]] to half +; CHECK-NEXT: ret half [[TMP3]] ; entry: %call = tail call nnan ninf afn half @_Z4pownDhi(half %x, i32 %y) @@ -650,8 +704,19 @@ ; CHECK-LABEL: define <2 x half> @test_pown_afn_nnan_ninf_v2f16 ; CHECK-SAME: (<2 x half> [[X:%.*]], <2 x i32> [[Y:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call nnan ninf afn <2 x half> @_Z4pownDv2_DhDv2_i(<2 x half> [[X]], <2 x i32> [[Y]]) -; CHECK-NEXT: ret <2 x half> [[CALL]] +; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn <2 x half> @llvm.fabs.v2f16(<2 x half> [[X]]) +; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x half> @_Z4log2Dv2_Dh(<2 x half> [[__FABS]]) +; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp <2 x i32> [[Y]] to <2 x half> +; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x half> [[__LOG2]], [[POWNI2F]] +; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x half> @_Z4exp2Dv2_Dh(<2 x half> [[__YLOGX]]) +; CHECK-NEXT: [[__YTOU:%.*]] = fptosi <2 x half> [[POWNI2F]] to <2 x i16> +; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i16> [[__YTOU]], +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x half> [[X]] to <2 x i16> +; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i16> [[__YEVEN]], [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x half> [[__EXP2]] to <2 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i16> [[__POW_SIGN]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to <2 x half> +; CHECK-NEXT: ret <2 x half> [[TMP3]] ; entry: %call = tail call nnan ninf afn <2 x half> @_Z4pownDv2_DhDv2_i(<2 x half> %x, <2 x i32> %y) Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll @@ -1003,8 +1003,10 @@ define float @test_powr_afn_f32_nnan_ninf_x_known_positive(float nofpclass(ninf nnorm nsub) %x, float %y) { ; CHECK-LABEL: define float @test_powr_afn_f32_nnan_ninf_x_known_positive ; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.*]], float [[Y:%.*]]) { -; CHECK-NEXT: [[POWR:%.*]] = tail call nnan ninf afn float @_Z4powrff(float [[X]], float [[Y]]) -; CHECK-NEXT: ret float [[POWR]] +; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @_Z4log2f(float [[X]]) +; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y]] +; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @_Z4exp2f(float [[__YLOGX]]) +; CHECK-NEXT: ret float [[__EXP2]] ; %powr = tail call afn nnan ninf float @_Z4powrff(float %x, float %y) ret float %powr @@ -1023,8 +1025,10 @@ define <2 x float> @test_powr_afn_v2f32_nnan_ninf_x_known_positive(<2 x float> nofpclass(ninf nnorm nsub) %x, <2 x float> %y) { ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_nnan_ninf_x_known_positive ; CHECK-SAME: (<2 x float> nofpclass(ninf nsub nnorm) [[X:%.*]], <2 x float> [[Y:%.*]]) { -; CHECK-NEXT: [[POWR:%.*]] = tail call nnan ninf afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> [[Y]]) -; CHECK-NEXT: ret <2 x float> [[POWR]] +; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @_Z4log2Dv2_f(<2 x float> [[X]]) +; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[Y]] +; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @_Z4exp2Dv2_f(<2 x float> [[__YLOGX]]) +; CHECK-NEXT: ret <2 x float> [[__EXP2]] ; %powr = tail call afn nnan ninf <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> %y) ret <2 x float> %powr