Index: llvm/lib/Analysis/TargetLibraryInfo.cpp =================================================================== --- llvm/lib/Analysis/TargetLibraryInfo.cpp +++ llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -1243,167 +1243,195 @@ case LibFunc_acos: case LibFunc_acos_finite: - case LibFunc_acosf: - case LibFunc_acosf_finite: case LibFunc_acosh: case LibFunc_acosh_finite: - case LibFunc_acoshf: - case LibFunc_acoshf_finite: - case LibFunc_acoshl: - case LibFunc_acoshl_finite: - case LibFunc_acosl: - case LibFunc_acosl_finite: case LibFunc_asin: case LibFunc_asin_finite: - case LibFunc_asinf: - case LibFunc_asinf_finite: case LibFunc_asinh: - case LibFunc_asinhf: - case LibFunc_asinhl: - case LibFunc_asinl: - case LibFunc_asinl_finite: case LibFunc_atan: - case LibFunc_atanf: case LibFunc_atanh: case LibFunc_atanh_finite: - case LibFunc_atanhf: - case LibFunc_atanhf_finite: - case LibFunc_atanhl: - case LibFunc_atanhl_finite: - case LibFunc_atanl: case LibFunc_cbrt: - case LibFunc_cbrtf: - case LibFunc_cbrtl: case LibFunc_ceil: - case LibFunc_ceilf: - case LibFunc_ceill: case LibFunc_cos: - case LibFunc_cosf: case LibFunc_cosh: case LibFunc_cosh_finite: - case LibFunc_coshf: - case LibFunc_coshf_finite: - case LibFunc_coshl: - case LibFunc_coshl_finite: - case LibFunc_cosl: case LibFunc_exp10: case LibFunc_exp10_finite: - case LibFunc_exp10f: - case LibFunc_exp10f_finite: - case LibFunc_exp10l: - case LibFunc_exp10l_finite: case LibFunc_exp2: case LibFunc_exp2_finite: - case LibFunc_exp2f: - case LibFunc_exp2f_finite: - case LibFunc_exp2l: - case LibFunc_exp2l_finite: case LibFunc_exp: case LibFunc_exp_finite: - case LibFunc_expf: - case LibFunc_expf_finite: - case LibFunc_expl: - case LibFunc_expl_finite: case LibFunc_expm1: - case LibFunc_expm1f: - case LibFunc_expm1l: case LibFunc_fabs: - case LibFunc_fabsf: - case LibFunc_fabsl: case LibFunc_floor: - case LibFunc_floorf: - case LibFunc_floorl: case LibFunc_log10: case LibFunc_log10_finite: - case LibFunc_log10f: - case LibFunc_log10f_finite: - case LibFunc_log10l: - case LibFunc_log10l_finite: case LibFunc_log1p: - case LibFunc_log1pf: - case LibFunc_log1pl: case LibFunc_log2: case LibFunc_log2_finite: - case LibFunc_log2f: - case LibFunc_log2f_finite: - case LibFunc_log2l: - case LibFunc_log2l_finite: case LibFunc_log: case LibFunc_log_finite: case LibFunc_logb: - case LibFunc_logbf: - case LibFunc_logbl: - case LibFunc_logf: - case LibFunc_logf_finite: - case LibFunc_logl: - case LibFunc_logl_finite: case LibFunc_nearbyint: - case LibFunc_nearbyintf: - case LibFunc_nearbyintl: case LibFunc_rint: - case LibFunc_rintf: - case LibFunc_rintl: case LibFunc_round: - case LibFunc_roundf: - case LibFunc_roundl: case LibFunc_sin: - case LibFunc_sinf: case LibFunc_sinh: case LibFunc_sinh_finite: - case LibFunc_sinhf: - case LibFunc_sinhf_finite: - case LibFunc_sinhl: - case LibFunc_sinhl_finite: - case LibFunc_sinl: case LibFunc_sqrt: case LibFunc_sqrt_finite: + case LibFunc_tan: + case LibFunc_tanh: + case LibFunc_trunc: + return (NumParams == 1 && FTy.getReturnType()->isDoubleTy() && + FTy.getReturnType() == FTy.getParamType(0)); + + case LibFunc_acosf: + case LibFunc_acosf_finite: + case LibFunc_acoshf: + case LibFunc_acoshf_finite: + case LibFunc_asinf: + case LibFunc_asinf_finite: + case LibFunc_asinhf: + case LibFunc_atanf: + case LibFunc_atanhf: + case LibFunc_atanhf_finite: + case LibFunc_cbrtf: + case LibFunc_ceilf: + case LibFunc_cosf: + case LibFunc_coshf: + case LibFunc_coshf_finite: + case LibFunc_exp10f: + case LibFunc_exp10f_finite: + case LibFunc_exp2f: + case LibFunc_exp2f_finite: + case LibFunc_expf: + case LibFunc_expf_finite: + case LibFunc_expm1f: + case LibFunc_fabsf: + case LibFunc_floorf: + case LibFunc_log10f: + case LibFunc_log10f_finite: + case LibFunc_log1pf: + case LibFunc_log2f: + case LibFunc_log2f_finite: + case LibFunc_logf: + case LibFunc_logf_finite: + case LibFunc_logbf: + case LibFunc_nearbyintf: + case LibFunc_rintf: + case LibFunc_roundf: + case LibFunc_sinf: + case LibFunc_sinhf: + case LibFunc_sinhf_finite: case LibFunc_sqrtf: case LibFunc_sqrtf_finite: - case LibFunc_sqrtl: - case LibFunc_sqrtl_finite: - case LibFunc_tan: case LibFunc_tanf: - case LibFunc_tanh: case LibFunc_tanhf: - case LibFunc_tanhl: - case LibFunc_tanl: - case LibFunc_trunc: case LibFunc_truncf: + return (NumParams == 1 && FTy.getReturnType()->isFloatTy() && + FTy.getReturnType() == FTy.getParamType(0)); + + case LibFunc_acoshl: + case LibFunc_acoshl_finite: + case LibFunc_acosl: + case LibFunc_acosl_finite: + case LibFunc_asinl: + case LibFunc_asinl_finite: + case LibFunc_asinhl: + case LibFunc_atanl: + case LibFunc_atanhl: + case LibFunc_atanhl_finite: + case LibFunc_cbrtl: + case LibFunc_ceill: + case LibFunc_cosl: + case LibFunc_coshl: + case LibFunc_coshl_finite: + case LibFunc_exp10l: + case LibFunc_exp10l_finite: + case LibFunc_exp2l: + case LibFunc_exp2l_finite: + case LibFunc_expl: + case LibFunc_expl_finite: + case LibFunc_expm1l: + case LibFunc_fabsl: + case LibFunc_floorl: + case LibFunc_log10l: + case LibFunc_log10l_finite: + case LibFunc_log1pl: + case LibFunc_log2l: + case LibFunc_log2l_finite: + case LibFunc_logl: + case LibFunc_logl_finite: + case LibFunc_logbl: + case LibFunc_nearbyintl: + case LibFunc_rintl: + case LibFunc_roundl: + case LibFunc_sinl: + case LibFunc_sinhl: + case LibFunc_sinhl_finite: + case LibFunc_sqrtl: + case LibFunc_sqrtl_finite: + case LibFunc_tanl: + case LibFunc_tanhl: case LibFunc_truncl: + // TODO: This should match 'long double' more precisely, but that might be + // platform-dependent. It can not be half/float/double? return (NumParams == 1 && FTy.getReturnType()->isFloatingPointTy() && FTy.getReturnType() == FTy.getParamType(0)); case LibFunc_atan2: case LibFunc_atan2_finite: - case LibFunc_atan2f: - case LibFunc_atan2f_finite: - case LibFunc_atan2l: - case LibFunc_atan2l_finite: case LibFunc_fmin: - case LibFunc_fminf: - case LibFunc_fminl: case LibFunc_fmax: - case LibFunc_fmaxf: - case LibFunc_fmaxl: case LibFunc_fmod: - case LibFunc_fmodf: - case LibFunc_fmodl: case LibFunc_copysign: - case LibFunc_copysignf: - case LibFunc_copysignl: case LibFunc_pow: case LibFunc_pow_finite: + return (NumParams == 2 && FTy.getReturnType()->isDoubleTy() && + FTy.getReturnType() == FTy.getParamType(0) && + FTy.getReturnType() == FTy.getParamType(1)); + + case LibFunc_atan2f: + case LibFunc_atan2f_finite: + case LibFunc_fminf: + case LibFunc_fmaxf: + case LibFunc_fmodf: + case LibFunc_copysignf: case LibFunc_powf: case LibFunc_powf_finite: + return (NumParams == 2 && FTy.getReturnType()->isFloatTy() && + FTy.getReturnType() == FTy.getParamType(0) && + FTy.getReturnType() == FTy.getParamType(1)); + + case LibFunc_atan2l: + case LibFunc_atan2l_finite: + case LibFunc_fminl: + case LibFunc_fmaxl: + case LibFunc_fmodl: + case LibFunc_copysignl: case LibFunc_powl: case LibFunc_powl_finite: + // TODO: This should match 'long double' more precisely, but that might be + // platform-dependent. It can not be half/float/double? return (NumParams == 2 && FTy.getReturnType()->isFloatingPointTy() && FTy.getReturnType() == FTy.getParamType(0) && FTy.getReturnType() == FTy.getParamType(1)); case LibFunc_ldexp: + return (NumParams == 2 && FTy.getReturnType()->isDoubleTy() && + FTy.getReturnType() == FTy.getParamType(0) && + FTy.getParamType(1)->isIntegerTy(32)); + case LibFunc_ldexpf: + return (NumParams == 2 && FTy.getReturnType()->isFloatTy() && + FTy.getReturnType() == FTy.getParamType(0) && + FTy.getParamType(1)->isIntegerTy(32)); + case LibFunc_ldexpl: + // TODO: This should match 'long double' more precisely, but that might be + // platform-dependent. It can not be half/float/double? return (NumParams == 2 && FTy.getReturnType()->isFloatingPointTy() && FTy.getReturnType() == FTy.getParamType(0) && FTy.getParamType(1)->isIntegerTy(32)); Index: llvm/test/CodeGen/AMDGPU/complex-folding.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/complex-folding.ll +++ llvm/test/CodeGen/AMDGPU/complex-folding.ll @@ -5,7 +5,7 @@ define amdgpu_ps void @main(<4 x float> inreg %reg0) { entry: %0 = extractelement <4 x float> %reg0, i32 0 - %1 = call float @fabs(float %0) + %1 = call float @fabsf(float %0) %2 = fptoui float %1 to i32 %3 = bitcast i32 %2 to float %4 = insertelement <4 x float> undef, float %3, i32 0 @@ -13,5 +13,5 @@ ret void } -declare float @fabs(float ) readnone +declare float @fabsf(float) readnone declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32) Index: llvm/test/CodeGen/AMDGPU/fabs.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fabs.ll +++ llvm/test/CodeGen/AMDGPU/fabs.ll @@ -15,7 +15,7 @@ ; VI: s_bitset0_b32 s{{[0-9]+}}, 31 define amdgpu_kernel void @s_fabs_fn_free(float addrspace(1)* %out, i32 %in) { %bc= bitcast i32 %in to float - %fabs = call float @fabs(float %bc) + %fabs = call float @fabsf(float %bc) store float %fabs, float addrspace(1)* %out ret void } @@ -79,7 +79,7 @@ ; GCN: v_mov_b32_e32 [[V_MUL_VI:v[0-9]+]], s[[MUL_VAL]] ; GCN: v_mul_f32_e64 v{{[0-9]+}}, |s[[ABS_VALUE]]|, [[V_MUL_VI]] define amdgpu_kernel void @fabs_fn_fold(float addrspace(1)* %out, float %in0, float %in1) { - %fabs = call float @fabs(float %in0) + %fabs = call float @fabsf(float %in0) %fmul = fmul float %fabs, %in1 store float %fmul, float addrspace(1)* %out ret void @@ -110,7 +110,7 @@ ret void } -declare float @fabs(float) readnone +declare float @fabsf(float) readnone declare float @llvm.fabs.f32(float) readnone declare <2 x float> @llvm.fabs.v2f32(<2 x float>) readnone declare <4 x float> @llvm.fabs.v4f32(<4 x float>) readnone Index: llvm/test/CodeGen/AMDGPU/floor.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/floor.ll +++ llvm/test/CodeGen/AMDGPU/floor.ll @@ -3,12 +3,12 @@ ; CHECK: FLOOR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define amdgpu_ps void @test(<4 x float> inreg %reg0) { %r0 = extractelement <4 x float> %reg0, i32 0 - %r1 = call float @floor(float %r0) + %r1 = call float @floorf(float %r0) %vec = insertelement <4 x float> undef, float %r1, i32 0 call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0) ret void } -declare float @floor(float) readonly +declare float @floorf(float) readonly declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32) Index: llvm/test/CodeGen/AMDGPU/fneg-fabs.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fneg-fabs.ll +++ llvm/test/CodeGen/AMDGPU/fneg-fabs.ll @@ -52,7 +52,7 @@ ; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 define amdgpu_kernel void @fneg_fabs_fn_free_f32(float addrspace(1)* %out, i32 %in) { %bc = bitcast i32 %in to float - %fabs = call float @fabs(float %bc) + %fabs = call float @fabsf(float %bc) %fsub = fsub float -0.000000e+00, %fabs store float %fsub, float addrspace(1)* %out ret void @@ -107,7 +107,7 @@ ret void } -declare float @fabs(float) readnone +declare float @fabsf(float) readnone declare float @llvm.fabs.f32(float) readnone declare <2 x float> @llvm.fabs.v2f32(<2 x float>) readnone declare <4 x float> @llvm.fabs.v4f32(<4 x float>) readnone Index: llvm/test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll +++ llvm/test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll @@ -15,7 +15,7 @@ %tmp11 = extractelement <4 x float> %tmp9, i32 1 %tmp12 = extractelement <4 x float> %tmp9, i32 2 %tmp13 = extractelement <4 x float> %tmp9, i32 3 - %tmp14 = call float @fabs(float %tmp12) + %tmp14 = call float @fabsf(float %tmp12) %tmp15 = fdiv float 1.000000e+00, %tmp14 %tmp16 = fmul float %tmp10, %tmp15 %tmp17 = fadd float %tmp16, 1.500000e+00 @@ -48,7 +48,7 @@ declare <4 x float> @llvm.r600.cube(<4 x float>) #0 ; Function Attrs: readnone -declare float @fabs(float) #0 +declare float @fabsf(float) #0 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32) Index: llvm/test/CodeGen/AMDGPU/schedule-if-2.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/schedule-if-2.ll +++ llvm/test/CodeGen/AMDGPU/schedule-if-2.ll @@ -17,7 +17,7 @@ br i1 %10, label %IF, label %ELSE IF: ; preds = %main_body - %11 = call float @fabs(float %2) + %11 = call float @fabsf(float %2) %12 = fcmp ueq float %11, 0x7FF0000000000000 %13 = select i1 %12, float 1.000000e+00, float 0.000000e+00 %14 = fsub float -0.000000e+00, %13 @@ -87,7 +87,7 @@ br label %ENDIF } -declare float @fabs(float) #0 +declare float @fabsf(float) #0 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32) Index: llvm/test/Transforms/InstCombine/sqrt-nofast.ll =================================================================== --- llvm/test/Transforms/InstCombine/sqrt-nofast.ll +++ llvm/test/Transforms/InstCombine/sqrt-nofast.ll @@ -25,15 +25,15 @@ declare float @llvm.sqrt.f32(float) #1 -; FIXME: ; This is a function called "sqrtf", but its type is double. ; Assume it is a user function rather than a libm function, ; so don't transform it. define double @fake_sqrt(double %a, double %b) { ; CHECK-LABEL: @fake_sqrt( -; CHECK-NEXT: [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[A:%.*]]) -; CHECK-NEXT: ret double [[FABS]] +; CHECK-NEXT: [[C:%.*]] = fmul fast double [[A:%.*]], [[A]] +; CHECK-NEXT: [[E:%.*]] = call fast double @sqrtf(double [[C]]) #1 +; CHECK-NEXT: ret double [[E]] ; %c = fmul fast double %a, %a %e = call fast double @sqrtf(double %c) readnone