Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11065,9 +11065,14 @@ return SDValue(); // We only do this if the target has legal ftrunc. Otherwise, we'd likely be - // replacing casts with a libcall. + // replacing casts with a libcall. We also must be allowed to ignore -0.0 + // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer + // conversions would return +0.0. + // FIXME: We should be able to use node-level FMF here. + // TODO: If strict math, should we use FABS (+ range check for signed cast)? EVT VT = N->getValueType(0); - if (!TLI.isOperationLegal(ISD::FTRUNC, VT)) + if (!TLI.isOperationLegal(ISD::FTRUNC, VT) || + !DAG.getTarget().Options.NoSignedZerosFPMath) return SDValue(); // fptosi/fptoui round towards zero, so converting from FP to integer and Index: test/CodeGen/AArch64/ftrunc.ll =================================================================== --- test/CodeGen/AArch64/ftrunc.ll +++ test/CodeGen/AArch64/ftrunc.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-unknown-unknown < %s | FileCheck %s -define float @trunc_unsigned_f32(float %x) { +define float @trunc_unsigned_f32(float %x) #0 { ; CHECK-LABEL: trunc_unsigned_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: frintz s0, s0 @@ -11,7 +11,7 @@ ret float %r } -define double @trunc_unsigned_f64(double %x) { +define double @trunc_unsigned_f64(double %x) #0 { ; CHECK-LABEL: trunc_unsigned_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: frintz d0, d0 @@ -21,7 +21,7 @@ ret double %r } -define float @trunc_signed_f32(float %x) { +define float @trunc_signed_f32(float %x) #0 { ; CHECK-LABEL: trunc_signed_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: frintz s0, s0 @@ -31,7 +31,7 @@ ret float %r } -define double @trunc_signed_f64(double %x) { +define double @trunc_signed_f64(double %x) #0 { ; CHECK-LABEL: trunc_signed_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: frintz d0, d0 @@ -41,3 +41,5 @@ ret double %r } +attributes #0 = { "no-signed-zeros-fp-math"="true" } + Index: test/CodeGen/PowerPC/fp-int128-fp-combine.ll =================================================================== --- test/CodeGen/PowerPC/fp-int128-fp-combine.ll +++ test/CodeGen/PowerPC/fp-int128-fp-combine.ll @@ -2,9 +2,35 @@ ; RUN: llc -O0 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s ; xscvdpsxds should NOT be emitted, since it saturates the result down to i64. +; We can't use friz here because it may return -0.0 where the original code doesn't. + define float @f_i128_f(float %v) { ; CHECK-LABEL: f_i128_f: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: std 0, 16(1) +; CHECK-NEXT: stdu 1, -32(1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: nop +; CHECK-NEXT: bl __floattisf +; CHECK-NEXT: nop +; CHECK-NEXT: addi 1, 1, 32 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr +entry: + %a = fptosi float %v to i128 + %b = sitofp i128 %a to float + ret float %b +} + +; NSZ, so it's safe to friz. + +define float @f_i128_fi_nsz(float %v) #0 { +; CHECK-LABEL: f_i128_fi_nsz: +; CHECK: # %bb.0: # %entry ; CHECK-NEXT: friz 1, 1 ; CHECK-NEXT: blr entry: @@ -12,3 +38,6 @@ %b = sitofp i128 %a to float ret float %b } + +attributes #0 = { "no-signed-zeros-fp-math"="true" } + Index: test/CodeGen/PowerPC/fp-to-int-to-fp.ll =================================================================== --- test/CodeGen/PowerPC/fp-to-int-to-fp.ll +++ test/CodeGen/PowerPC/fp-to-int-to-fp.ll @@ -62,5 +62,5 @@ ; FPCVT: blr } -attributes #0 = { nounwind readnone } +attributes #0 = { nounwind readnone "no-signed-zeros-fp-math"="true" } Index: test/CodeGen/PowerPC/ftrunc-vec.ll =================================================================== --- test/CodeGen/PowerPC/ftrunc-vec.ll +++ test/CodeGen/PowerPC/ftrunc-vec.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs < %s | FileCheck %s -define <4 x float> @truncf32(<4 x float> %a) { +define <4 x float> @truncf32(<4 x float> %a) #0 { ; CHECK-LABEL: truncf32: ; CHECK: # %bb.0: ; CHECK-NEXT: xvrspiz 34, 34 @@ -11,7 +11,7 @@ ret <4 x float> %t1 } -define <2 x double> @truncf64(<2 x double> %a) { +define <2 x double> @truncf64(<2 x double> %a) #0 { ; CHECK-LABEL: truncf64: ; CHECK: # %bb.0: ; CHECK-NEXT: xvrdpiz 34, 34 @@ -21,7 +21,7 @@ ret <2 x double> %t1 } -define <4 x float> @truncf32u(<4 x float> %a) { +define <4 x float> @truncf32u(<4 x float> %a) #0 { ; CHECK-LABEL: truncf32u: ; CHECK: # %bb.0: ; CHECK-NEXT: xvrspiz 34, 34 @@ -31,7 +31,7 @@ ret <4 x float> %t1 } -define <2 x double> @truncf64u(<2 x double> %a) { +define <2 x double> @truncf64u(<2 x double> %a) #0 { ; CHECK-LABEL: truncf64u: ; CHECK: # %bb.0: ; CHECK-NEXT: xvrdpiz 34, 34 @@ -41,3 +41,5 @@ ret <2 x double> %t1 } +attributes #0 = { "no-signed-zeros-fp-math"="true" } + Index: test/CodeGen/PowerPC/no-extra-fp-conv-ldst.ll =================================================================== --- test/CodeGen/PowerPC/no-extra-fp-conv-ldst.ll +++ test/CodeGen/PowerPC/no-extra-fp-conv-ldst.ll @@ -76,5 +76,5 @@ ; CHECK: blr } -attributes #0 = { nounwind readonly } +attributes #0 = { nounwind readonly "no-signed-zeros-fp-math"="true" } Index: test/CodeGen/X86/2011-10-19-widen_vselect.ll =================================================================== --- test/CodeGen/X86/2011-10-19-widen_vselect.ll +++ test/CodeGen/X86/2011-10-19-widen_vselect.ll @@ -71,7 +71,8 @@ ; X32-NEXT: subl $60, %esp ; X32-NEXT: .cfi_def_cfa_offset 64 ; X32-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero -; X32-NEXT: roundps $11, %xmm2, %xmm1 +; X32-NEXT: cvttps2dq %xmm2, %xmm0 +; X32-NEXT: cvtdq2ps %xmm0, %xmm1 ; X32-NEXT: xorps %xmm0, %xmm0 ; X32-NEXT: cmpltps %xmm2, %xmm0 ; X32-NEXT: movaps {{.*#+}} xmm3 = <1,1,u,u> @@ -93,7 +94,8 @@ ; X64-LABEL: full_test: ; X64: # %bb.0: # %entry ; X64-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero -; X64-NEXT: roundps $11, %xmm2, %xmm1 +; X64-NEXT: cvttps2dq %xmm2, %xmm0 +; X64-NEXT: cvtdq2ps %xmm0, %xmm1 ; X64-NEXT: xorps %xmm0, %xmm0 ; X64-NEXT: cmpltps %xmm2, %xmm0 ; X64-NEXT: movaps {{.*#+}} xmm3 = <1,1,u,u> Index: test/CodeGen/X86/avx-cvttp2si.ll =================================================================== --- test/CodeGen/X86/avx-cvttp2si.ll +++ test/CodeGen/X86/avx-cvttp2si.ll @@ -8,7 +8,7 @@ declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) -define <8 x float> @float_to_int_to_float_mem_v8f32(<8 x float>* %p) { +define <8 x float> @float_to_int_to_float_mem_v8f32(<8 x float>* %p) #0 { ; AVX-LABEL: float_to_int_to_float_mem_v8f32: ; AVX: # %bb.0: ; AVX-NEXT: vcvttps2dq (%rdi), %ymm0 @@ -20,7 +20,7 @@ ret <8 x float> %sitofp } -define <8 x float> @float_to_int_to_float_reg_v8f32(<8 x float> %x) { +define <8 x float> @float_to_int_to_float_reg_v8f32(<8 x float> %x) #0 { ; AVX-LABEL: float_to_int_to_float_reg_v8f32: ; AVX: # %bb.0: ; AVX-NEXT: vcvttps2dq %ymm0, %ymm0 @@ -31,7 +31,7 @@ ret <8 x float> %sitofp } -define <4 x double> @float_to_int_to_float_mem_v4f64(<4 x double>* %p) { +define <4 x double> @float_to_int_to_float_mem_v4f64(<4 x double>* %p) #0 { ; AVX-LABEL: float_to_int_to_float_mem_v4f64: ; AVX: # %bb.0: ; AVX-NEXT: vcvttpd2dqy (%rdi), %xmm0 @@ -43,7 +43,7 @@ ret <4 x double> %sitofp } -define <4 x double> @float_to_int_to_float_reg_v4f64(<4 x double> %x) { +define <4 x double> @float_to_int_to_float_reg_v4f64(<4 x double> %x) #0 { ; AVX-LABEL: float_to_int_to_float_reg_v4f64: ; AVX: # %bb.0: ; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0 @@ -54,3 +54,5 @@ ret <4 x double> %sitofp } +attributes #0 = { "no-signed-zeros-fp-math"="true" } + Index: test/CodeGen/X86/ftrunc.ll =================================================================== --- test/CodeGen/X86/ftrunc.ll +++ test/CodeGen/X86/ftrunc.ll @@ -3,7 +3,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1 -define float @trunc_unsigned_f32(float %x) nounwind { +define float @trunc_unsigned_f32(float %x) #0 { ; SSE2-LABEL: trunc_unsigned_f32: ; SSE2: # %bb.0: ; SSE2-NEXT: cvttss2si %xmm0, %rax @@ -26,7 +26,7 @@ ret float %r } -define double @trunc_unsigned_f64(double %x) nounwind { +define double @trunc_unsigned_f64(double %x) #0 { ; SSE2-LABEL: trunc_unsigned_f64: ; SSE2: # %bb.0: ; SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero @@ -59,7 +59,7 @@ ret double %r } -define <4 x float> @trunc_unsigned_v4f32(<4 x float> %x) nounwind { +define <4 x float> @trunc_unsigned_v4f32(<4 x float> %x) #0 { ; SSE2-LABEL: trunc_unsigned_v4f32: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 @@ -102,7 +102,7 @@ ret <4 x float> %r } -define <2 x double> @trunc_unsigned_v2f64(<2 x double> %x) nounwind { +define <2 x double> @trunc_unsigned_v2f64(<2 x double> %x) #0 { ; SSE2-LABEL: trunc_unsigned_v2f64: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 @@ -152,7 +152,7 @@ ret <2 x double> %r } -define <4 x double> @trunc_unsigned_v4f64(<4 x double> %x) nounwind { +define <4 x double> @trunc_unsigned_v4f64(<4 x double> %x) #0 { ; SSE2-LABEL: trunc_unsigned_v4f64: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm1, %xmm3 @@ -230,7 +230,7 @@ ret <4 x double> %r } -define float @trunc_signed_f32(float %x) nounwind { +define float @trunc_signed_f32(float %x) #0 { ; SSE2-LABEL: trunc_signed_f32: ; SSE2: # %bb.0: ; SSE2-NEXT: cvttss2si %xmm0, %eax @@ -252,7 +252,7 @@ ret float %r } -define double @trunc_signed_f64(double %x) nounwind { +define double @trunc_signed_f64(double %x) #0 { ; SSE2-LABEL: trunc_signed_f64: ; SSE2: # %bb.0: ; SSE2-NEXT: cvttsd2si %xmm0, %rax @@ -274,7 +274,7 @@ ret double %r } -define <4 x float> @trunc_signed_v4f32(<4 x float> %x) nounwind { +define <4 x float> @trunc_signed_v4f32(<4 x float> %x) #0 { ; SSE2-LABEL: trunc_signed_v4f32: ; SSE2: # %bb.0: ; SSE2-NEXT: cvttps2dq %xmm0, %xmm0 @@ -295,7 +295,7 @@ ret <4 x float> %r } -define <2 x double> @trunc_signed_v2f64(<2 x double> %x) nounwind { +define <2 x double> @trunc_signed_v2f64(<2 x double> %x) #0 { ; SSE2-LABEL: trunc_signed_v2f64: ; SSE2: # %bb.0: ; SSE2-NEXT: cvttsd2si %xmm0, %rax @@ -321,7 +321,7 @@ ret <2 x double> %r } -define <4 x double> @trunc_signed_v4f64(<4 x double> %x) nounwind { +define <4 x double> @trunc_signed_v4f64(<4 x double> %x) #0 { ; SSE2-LABEL: trunc_signed_v4f64: ; SSE2: # %bb.0: ; SSE2-NEXT: cvttsd2si %xmm1, %rax @@ -412,5 +412,6 @@ ret double %r } -attributes #1 = { nounwind "strict-float-cast-overflow"="false" } +attributes #0 = { nounwind "no-signed-zeros-fp-math"="true" } +attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" "strict-float-cast-overflow"="false" } Index: test/CodeGen/X86/sse-cvttp2si.ll =================================================================== --- test/CodeGen/X86/sse-cvttp2si.ll +++ test/CodeGen/X86/sse-cvttp2si.ll @@ -13,7 +13,7 @@ declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) -define float @float_to_int_to_float_mem_f32_i32(<4 x float>* %p) { +define float @float_to_int_to_float_mem_f32_i32(<4 x float>* %p) #0 { ; SSE-LABEL: float_to_int_to_float_mem_f32_i32: ; SSE: # %bb.0: ; SSE-NEXT: cvttss2si (%rdi), %eax @@ -31,7 +31,7 @@ ret float %sitofp } -define float @float_to_int_to_float_reg_f32_i32(<4 x float> %x) { +define float @float_to_int_to_float_reg_f32_i32(<4 x float> %x) #0 { ; SSE-LABEL: float_to_int_to_float_reg_f32_i32: ; SSE: # %bb.0: ; SSE-NEXT: cvttss2si %xmm0, %eax @@ -49,7 +49,7 @@ ret float %sitofp } -define float @float_to_int_to_float_mem_f32_i64(<4 x float>* %p) { +define float @float_to_int_to_float_mem_f32_i64(<4 x float>* %p) #0 { ; SSE-LABEL: float_to_int_to_float_mem_f32_i64: ; SSE: # %bb.0: ; SSE-NEXT: cvttss2si (%rdi), %rax @@ -67,7 +67,7 @@ ret float %sitofp } -define float @float_to_int_to_float_reg_f32_i64(<4 x float> %x) { +define float @float_to_int_to_float_reg_f32_i64(<4 x float> %x) #0 { ; SSE-LABEL: float_to_int_to_float_reg_f32_i64: ; SSE: # %bb.0: ; SSE-NEXT: cvttss2si %xmm0, %rax @@ -85,7 +85,7 @@ ret float %sitofp } -define double @float_to_int_to_float_mem_f64_i32(<2 x double>* %p) { +define double @float_to_int_to_float_mem_f64_i32(<2 x double>* %p) #0 { ; SSE-LABEL: float_to_int_to_float_mem_f64_i32: ; SSE: # %bb.0: ; SSE-NEXT: cvttsd2si (%rdi), %eax @@ -103,7 +103,7 @@ ret double %sitofp } -define double @float_to_int_to_float_reg_f64_i32(<2 x double> %x) { +define double @float_to_int_to_float_reg_f64_i32(<2 x double> %x) #0 { ; SSE-LABEL: float_to_int_to_float_reg_f64_i32: ; SSE: # %bb.0: ; SSE-NEXT: cvttsd2si %xmm0, %eax @@ -121,7 +121,7 @@ ret double %sitofp } -define double @float_to_int_to_float_mem_f64_i64(<2 x double>* %p) { +define double @float_to_int_to_float_mem_f64_i64(<2 x double>* %p) #0 { ; SSE-LABEL: float_to_int_to_float_mem_f64_i64: ; SSE: # %bb.0: ; SSE-NEXT: cvttsd2si (%rdi), %rax @@ -139,7 +139,7 @@ ret double %sitofp } -define double @float_to_int_to_float_reg_f64_i64(<2 x double> %x) { +define double @float_to_int_to_float_reg_f64_i64(<2 x double> %x) #0 { ; SSE-LABEL: float_to_int_to_float_reg_f64_i64: ; SSE: # %bb.0: ; SSE-NEXT: cvttsd2si %xmm0, %rax @@ -157,7 +157,7 @@ ret double %sitofp } -define <4 x float> @float_to_int_to_float_mem_v4f32(<4 x float>* %p) { +define <4 x float> @float_to_int_to_float_mem_v4f32(<4 x float>* %p) #0 { ; SSE-LABEL: float_to_int_to_float_mem_v4f32: ; SSE: # %bb.0: ; SSE-NEXT: cvttps2dq (%rdi), %xmm0 @@ -175,7 +175,7 @@ ret <4 x float> %sitofp } -define <4 x float> @float_to_int_to_float_reg_v4f32(<4 x float> %x) { +define <4 x float> @float_to_int_to_float_reg_v4f32(<4 x float> %x) #0 { ; SSE-LABEL: float_to_int_to_float_reg_v4f32: ; SSE: # %bb.0: ; SSE-NEXT: cvttps2dq %xmm0, %xmm0 @@ -192,7 +192,7 @@ ret <4 x float> %sitofp } -define <2 x double> @float_to_int_to_float_mem_v2f64(<2 x double>* %p) { +define <2 x double> @float_to_int_to_float_mem_v2f64(<2 x double>* %p) #0 { ; SSE-LABEL: float_to_int_to_float_mem_v2f64: ; SSE: # %bb.0: ; SSE-NEXT: cvttpd2dq (%rdi), %xmm0 @@ -211,7 +211,7 @@ ret <2 x double> %sitofp } -define <2 x double> @float_to_int_to_float_reg_v2f64(<2 x double> %x) { +define <2 x double> @float_to_int_to_float_reg_v2f64(<2 x double> %x) #0 { ; SSE-LABEL: float_to_int_to_float_reg_v2f64: ; SSE: # %bb.0: ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 @@ -229,3 +229,5 @@ ret <2 x double> %sitofp } +attributes #0 = { "no-signed-zeros-fp-math"="true" } +