Index: include/llvm/IR/PatternMatch.h =================================================================== --- include/llvm/IR/PatternMatch.h +++ include/llvm/IR/PatternMatch.h @@ -1043,6 +1043,18 @@ return CastClass_match(Op); } +/// Matches FPToSI. +template +inline CastClass_match m_FPToSI(const OpTy &Op) { + return CastClass_match(Op); +} + +/// Matches FPToUI. +template +inline CastClass_match m_FPToUI(const OpTy &Op) { + return CastClass_match(Op); +} + /// Matches FPTrunc template inline CastClass_match m_FPTrunc(const OpTy &Op) { Index: lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCasts.cpp +++ lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1728,10 +1728,28 @@ } Instruction *InstCombiner::visitUIToFP(CastInst &CI) { + Value *X; + if (match(CI.getOperand(0), m_FPToUI(m_Value(X))) && + X->getType() == CI.getType()) { + // fptoui rounds towards zero, so this is the same as libm 'trunc': + // uitofp (fptoui X) --> llvm.trunc(X) + Value *Trunc = Builder.CreateIntrinsic(Intrinsic::trunc, { X }, &CI); + return replaceInstUsesWith(CI, Trunc); + } + return commonCastTransforms(CI); } Instruction *InstCombiner::visitSIToFP(CastInst &CI) { + Value *X; + if (match(CI.getOperand(0), m_FPToSI(m_Value(X))) && + X->getType() == CI.getType()) { + // fptosi rounds towards zero, so this is the same as libm 'trunc': + // sitofp (fptosi X) --> llvm.trunc(X) + Value *Trunc = Builder.CreateIntrinsic(Intrinsic::trunc, { X }, &CI); + return replaceInstUsesWith(CI, Trunc); + } + return commonCastTransforms(CI); } Index: test/CodeGen/AMDGPU/simplify-libcalls.ll =================================================================== --- test/CodeGen/AMDGPU/simplify-libcalls.ll +++ test/CodeGen/AMDGPU/simplify-libcalls.ll @@ -389,8 +389,8 @@ ; GCN-PRELINK: %conv = fptosi float %tmp1 to i32 ; GCN-PRELINK: %__fabs = tail call fast float @_Z4fabsf(float %tmp) ; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %__fabs) -; GCN-PRELINK: %pownI2F = sitofp i32 %conv to float -; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %pownI2F +; GCN-PRELINK: %0 = tail call float @llvm.trunc.f32(float %tmp1) +; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %0 ; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx) ; GCN-PRELINK: %__yeven = shl i32 %conv, 31 ; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32 Index: test/Transforms/InstCombine/sitofp.ll =================================================================== --- test/Transforms/InstCombine/sitofp.ll +++ test/Transforms/InstCombine/sitofp.ll @@ -216,3 +216,72 @@ ret i55 %C } +; Casting to integer and back to the same type with rounding towards zero is llvm.trunc(). +; PR36617: https://bugs.llvm.org/show_bug.cgi?id=36617 + +define float @trunc_signed_f32(float %x) { +; CHECK-LABEL: @trunc_signed_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.trunc.f32(float [[X:%.*]]) +; CHECK-NEXT: ret float [[TMP1]] +; + %i = fptosi float %x to i32 + %r = sitofp i32 %i to float + ret float %r +} + +; The intermediate type does not matter. If the first cast is out-of-range, that's UB. + +define double @trunc_signed_f64(double %x) { +; CHECK-LABEL: @trunc_signed_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.trunc.f64(double [[X:%.*]]) +; CHECK-NEXT: ret double [[TMP1]] +; + %i = fptosi double %x to i8 + %r = sitofp i8 %i to double + ret double %r +} + +; Vector types work too. + +define <2 x half> @trunc_signed_v2f16(<2 x half> %x) { +; CHECK-LABEL: @trunc_signed_v2f16( +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x half> @llvm.trunc.v2f16(<2 x half> [[X:%.*]]) +; CHECK-NEXT: ret <2 x half> [[TMP1]] +; + %i = fptosi <2 x half> %x to <2 x i32> + %r = sitofp <2 x i32> %i to <2 x half> + ret <2 x half> %r +} + +; Casting to unsigned integer and back to the same type with rounding towards zero is still llvm.trunc(). + +define <2 x float> @trunc_unsigned_v2f32(<2 x float> %x) { +; CHECK-LABEL: @trunc_unsigned_v2f32( +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x float> @llvm.trunc.v2f32(<2 x float> [[X:%.*]]) +; CHECK-NEXT: ret <2 x float> [[TMP1]] +; + %i = fptoui <2 x float> %x to <2 x i232> + %r = uitofp <2 x i232> %i to <2 x float> + ret <2 x float> %r +} + +define fp128 @trunc_unsigned_f128(fp128 %x) { +; CHECK-LABEL: @trunc_unsigned_f128( +; CHECK-NEXT: [[TMP1:%.*]] = call fp128 @llvm.trunc.f128(fp128 [[X:%.*]]) +; CHECK-NEXT: ret fp128 [[TMP1]] +; + %i = fptoui fp128 %x to i128 + %r = uitofp i128 %i to fp128 + ret fp128 %r +} + +define <2 x double> @trunc_unsigned_v2f64(<2 x double> %x) { +; CHECK-LABEL: @trunc_unsigned_v2f64( +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[X:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %i = fptoui <2 x double> %x to <2 x i16> + %r = uitofp <2 x i16> %i to <2 x double> + ret <2 x double> %r +} + Index: test/Transforms/InstCombine/vector-casts.ll =================================================================== --- test/Transforms/InstCombine/vector-casts.ll +++ test/Transforms/InstCombine/vector-casts.ll @@ -193,9 +193,8 @@ define <2 x double> @fb(<2 x double> %t) { ; CHECK-LABEL: @fb( -; CHECK-NEXT: [[A:%.*]] = fptoui <2 x double> %t to <2 x i64> -; CHECK-NEXT: [[B:%.*]] = uitofp <2 x i64> [[A]] to <2 x double> -; CHECK-NEXT: ret <2 x double> [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[T:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %a = fptoui <2 x double> %t to <2 x i64> %b = uitofp <2 x i64> %a to <2 x double> @@ -204,9 +203,8 @@ define <2 x double> @fc(<2 x double> %t) { ; CHECK-LABEL: @fc( -; CHECK-NEXT: [[A:%.*]] = fptosi <2 x double> %t to <2 x i64> -; CHECK-NEXT: [[B:%.*]] = sitofp <2 x i64> [[A]] to <2 x double> -; CHECK-NEXT: ret <2 x double> [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[T:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %a = fptosi <2 x double> %t to <2 x i64> %b = sitofp <2 x i64> %a to <2 x double>