Index: include/llvm/CodeGen/CommandFlags.h =================================================================== --- include/llvm/CodeGen/CommandFlags.h +++ include/llvm/CodeGen/CommandFlags.h @@ -144,6 +144,12 @@ cl::init(false)); cl::opt +EnableNoSignedZerosFPMath("enable-no-signed-zeros-fp-math", + cl::desc("Enable FP math optimizations that assume " + "the sign of 0 is insignificant"), + cl::init(false)); + +cl::opt EnableNoTrappingFPMath("enable-no-trapping-fp-math", cl::desc("Enable setting the FP exceptions build " "attribute not to use exceptions"), @@ -282,6 +288,7 @@ Options.UnsafeFPMath = EnableUnsafeFPMath; Options.NoInfsFPMath = EnableNoInfsFPMath; Options.NoNaNsFPMath = EnableNoNaNsFPMath; + Options.NoSignedZerosFPMath = EnableNoSignedZerosFPMath; Options.NoTrappingFPMath = EnableNoTrappingFPMath; Options.FPDenormalMode = DenormalMode; Options.HonorSignDependentRoundingFPMathOption = Index: include/llvm/Target/TargetOptions.h =================================================================== --- include/llvm/Target/TargetOptions.h +++ include/llvm/Target/TargetOptions.h @@ -153,11 +153,17 @@ /// assume the FP arithmetic arguments and results are never NaNs. unsigned NoNaNsFPMath : 1; - /// NoTrappingFPMath - This flag is enabled when the - /// -enable-no-trapping-fp-math is specified on the command line. This + /// NoTrappingFPMath - This flag is enabled when the + /// -enable-no-trapping-fp-math is specified on the command line. This /// specifies that there are no trap handlers to handle exceptions. unsigned NoTrappingFPMath : 1; + /// NoSignedZerosFPMath - This flag is enabled when the + /// -enable-no-signed-zeros-fp-math is specified on the command line. This + /// specifies that optimizations are allowed to treat the sign of a zero + /// argument or result as insignificant. + unsigned NoSignedZerosFPMath : 1; + /// HonorSignDependentRoundingFPMath - This returns true when the /// -enable-sign-dependent-rounding-fp-math is specified. If this returns /// false (the default), the code generator is allowed to assume that the Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -635,7 +635,8 @@ Depth + 1); case ISD::FSUB: // We can't turn -(A-B) into B-A when we honor signed zeros. - if (!Options->UnsafeFPMath && !Op.getNode()->getFlags()->hasNoSignedZeros()) + if (!Options->NoSignedZerosFPMath && + !Op.getNode()->getFlags()->hasNoSignedZeros()) return 0; // fold (fneg (fsub A, B)) -> (fsub B, A) Index: lib/Target/TargetMachine.cpp =================================================================== --- lib/Target/TargetMachine.cpp +++ lib/Target/TargetMachine.cpp @@ -84,6 +84,7 @@ RESET_OPTION(UnsafeFPMath, "unsafe-fp-math"); RESET_OPTION(NoInfsFPMath, "no-infs-fp-math"); RESET_OPTION(NoNaNsFPMath, "no-nans-fp-math"); + RESET_OPTION(NoSignedZerosFPMath, "no-signed-zeros-fp-math"); RESET_OPTION(NoTrappingFPMath, "no-trapping-math"); StringRef Denormal = Index: test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll @@ -0,0 +1,22 @@ +; RUN: llc -march=amdgcn -enable-no-signed-zeros-fp-math=0 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-SAFE %s +; RUN: llc -march=amdgcn -enable-no-signed-zeros-fp-math=1 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-UNSAFE %s +; RUN: llc -march=amdgcn -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-UNSAFE %s + +; Test that the -enable-no-signed-zeros-fp-math flag works + +; GCN-LABEL: {{^}}fneg_fsub_f32: +; GCN: v_subrev_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} +; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]] + +; GCN-UNSAFE-NOT: xor +define void @fneg_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { + %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1 + %a = load float, float addrspace(1)* %in, align 4 + %b = load float, float addrspace(1)* %b_ptr, align 4 + %result = fsub float %a, %b + %neg.result = fsub float -0.0, %result + store float %neg.result, float addrspace(1)* %out, align 4 + ret void +} + +attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/fsub.ll =================================================================== --- test/CodeGen/AMDGPU/fsub.ll +++ test/CodeGen/AMDGPU/fsub.ll @@ -69,3 +69,61 @@ store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16 ret void } + +; FUNC-LABEL: {{^}}v_fneg_fsub_f32: +; SI: v_subrev_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} +; SI: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]] +define void @v_fneg_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) { + %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1 + %a = load float, float addrspace(1)* %in, align 4 + %b = load float, float addrspace(1)* %b_ptr, align 4 + %result = fsub float %a, %b + %neg.result = fsub float -0.0, %result + store float %neg.result, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_f32: +; SI: v_subrev_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} +; SI-NOT: xor +define void @v_fneg_fsub_nsz_f32(float addrspace(1)* %out, float addrspace(1)* %in) { + %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1 + %a = load float, float addrspace(1)* %in, align 4 + %b = load float, float addrspace(1)* %b_ptr, align 4 + %result = fsub nsz float %a, %b + %neg.result = fsub float -0.0, %result + store float %neg.result, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_attribute_f32: +; SI: v_subrev_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} +; SI-NOT: xor +define void @v_fneg_fsub_nsz_attribute_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { + %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1 + %a = load float, float addrspace(1)* %in, align 4 + %b = load float, float addrspace(1)* %b_ptr, align 4 + %result = fsub float %a, %b + %neg.result = fsub float -0.0, %result + store float %neg.result, float addrspace(1)* %out, align 4 + ret void +} + +; For some reason the attribute has a string "true" or "false", so +; make sure it is disabled and the fneg is not folded if it is not +; "true". +; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_false_attribute_f32: +; SI: v_subrev_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} +; SI: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]] +define void @v_fneg_fsub_nsz_false_attribute_f32(float addrspace(1)* %out, float addrspace(1)* %in) #1 { + %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1 + %a = load float, float addrspace(1)* %in, align 4 + %b = load float, float addrspace(1)* %b_ptr, align 4 + %result = fsub float %a, %b + %neg.result = fsub float -0.0, %result + store float %neg.result, float addrspace(1)* %out, align 4 + ret void +} + +attributes #0 = { nounwind "no-signed-zeros-fp-math"="true" } +attributes #1 = { nounwind "no-signed-zeros-fp-math"="false" } Index: test/CodeGen/X86/negative-sin.ll =================================================================== --- test/CodeGen/X86/negative-sin.ll +++ test/CodeGen/X86/negative-sin.ll @@ -101,5 +101,5 @@ ret double %h } -attributes #0 = { "unsafe-fp-math"="true" } +attributes #0 = { "unsafe-fp-math"="true" "no-signed-zeros-fp-math"="true" }