Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -14852,14 +14852,6 @@ static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI) { - // This optimization is guarded by a function attribute because it may produce - // unexpected results. Ie, programs may be relying on the platform-specific - // undefined behavior when the float-to-int conversion overflows. - const Function &F = DAG.getMachineFunction().getFunction(); - Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow"); - if (StrictOverflow.getValueAsString().equals("false")) - return SDValue(); - // We only do this if the target has legal ftrunc. Otherwise, we'd likely be // replacing casts with a libcall. We also must be allowed to ignore -0.0 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer Index: llvm/test/CodeGen/X86/ftrunc.ll =================================================================== --- llvm/test/CodeGen/X86/ftrunc.ll +++ llvm/test/CodeGen/X86/ftrunc.ll @@ -3,6 +3,9 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX1 +declare i32 @llvm.fptoui.sat.i32.f32(float) +declare i64 @llvm.fptosi.sat.i64.f64(double) + define float @trunc_unsigned_f32(float %x) #0 { ; SSE2-LABEL: trunc_unsigned_f32: ; SSE2: # %bb.0: @@ -483,43 +486,117 @@ ret <4 x double> %r } -; The fold may be guarded to allow existing code to continue -; working based on its assumptions of float->int overflow. +; The FTRUNC ("round**" x86 asm) fold relies on UB in the case of overflow. +; This used to be guarded with an attribute check. That allowed existing +; code to continue working based on its assumptions that float->int +; overflow had saturating behavior. +; +; Now, we expect a front-end to use IR intrinsics if it wants to avoid this +; transform. define float @trunc_unsigned_f32_disable_via_attr(float %x) #1 { -; SSE-LABEL: trunc_unsigned_f32_disable_via_attr: +; SSE2-LABEL: trunc_unsigned_f32_disable_via_attr: +; SSE2: # %bb.0: +; SSE2-NEXT: cvttss2si %xmm0, %rax +; SSE2-NEXT: movl %eax, %eax +; SSE2-NEXT: xorps %xmm0, %xmm0 +; SSE2-NEXT: cvtsi2ss %rax, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: trunc_unsigned_f32_disable_via_attr: +; SSE41: # %bb.0: +; SSE41-NEXT: roundss $11, %xmm0, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc_unsigned_f32_disable_via_attr: +; AVX1: # %bb.0: +; AVX1-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: retq + %i = fptoui float %x to i32 + %r = uitofp i32 %i to float + ret float %r +} + +define double @trunc_signed_f64_disable_via_attr(double %x) #1 { +; SSE2-LABEL: trunc_signed_f64_disable_via_attr: +; SSE2: # %bb.0: +; SSE2-NEXT: cvttsd2si %xmm0, %rax +; SSE2-NEXT: xorps %xmm0, %xmm0 +; SSE2-NEXT: cvtsi2sd %rax, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: trunc_signed_f64_disable_via_attr: +; SSE41: # %bb.0: +; SSE41-NEXT: roundsd $11, %xmm0, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc_signed_f64_disable_via_attr: +; AVX1: # %bb.0: +; AVX1-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: retq + %i = fptosi double %x to i64 + %r = sitofp i64 %i to double + ret double %r +} + +define float @trunc_unsigned_f32_disable_via_intrinsic(float %x) #1 { +; SSE-LABEL: trunc_unsigned_f32_disable_via_intrinsic: ; SSE: # %bb.0: ; SSE-NEXT: cvttss2si %xmm0, %rax -; SSE-NEXT: movl %eax, %eax +; SSE-NEXT: xorl %ecx, %ecx +; SSE-NEXT: xorps %xmm1, %xmm1 +; SSE-NEXT: ucomiss %xmm1, %xmm0 +; SSE-NEXT: cmovael %eax, %ecx +; SSE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: movl $-1, %eax +; SSE-NEXT: cmovbel %ecx, %eax ; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: cvtsi2ss %rax, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: trunc_unsigned_f32_disable_via_attr: +; AVX1-LABEL: trunc_unsigned_f32_disable_via_intrinsic: ; AVX1: # %bb.0: ; AVX1-NEXT: vcvttss2si %xmm0, %rax -; AVX1-NEXT: movl %eax, %eax -; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm0 +; AVX1-NEXT: xorl %ecx, %ecx +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vucomiss %xmm1, %xmm0 +; AVX1-NEXT: cmovael %eax, %ecx +; AVX1-NEXT: vucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; AVX1-NEXT: movl $-1, %eax +; AVX1-NEXT: cmovbel %ecx, %eax +; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 ; AVX1-NEXT: retq - %i = fptoui float %x to i32 + %i = call i32 @llvm.fptoui.sat.i32.f32(float %x) %r = uitofp i32 %i to float ret float %r } -define double @trunc_signed_f64_disable_via_attr(double %x) #1 { -; SSE-LABEL: trunc_signed_f64_disable_via_attr: +define double @trunc_signed_f64_disable_via_intrinsic(double %x) #1 { +; SSE-LABEL: trunc_signed_f64_disable_via_intrinsic: ; SSE: # %bb.0: ; SSE-NEXT: cvttsd2si %xmm0, %rax +; SSE-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF +; SSE-NEXT: cmovbeq %rax, %rcx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: ucomisd %xmm0, %xmm0 +; SSE-NEXT: cmovnpq %rcx, %rax ; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: cvtsi2sd %rax, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: trunc_signed_f64_disable_via_attr: +; AVX1-LABEL: trunc_signed_f64_disable_via_intrinsic: ; AVX1: # %bb.0: ; AVX1-NEXT: vcvttsd2si %xmm0, %rax +; AVX1-NEXT: vucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; AVX1-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF +; AVX1-NEXT: cmovbeq %rax, %rcx +; AVX1-NEXT: xorl %eax, %eax +; AVX1-NEXT: vucomisd %xmm0, %xmm0 +; AVX1-NEXT: cmovnpq %rcx, %rax ; AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm0 ; AVX1-NEXT: retq - %i = fptosi double %x to i64 + %i = call i64 @llvm.fptosi.sat.i64.f64(double %x) %r = sitofp i64 %i to double ret double %r }