Index: include/llvm/Analysis/ValueTracking.h =================================================================== --- include/llvm/Analysis/ValueTracking.h +++ include/llvm/Analysis/ValueTracking.h @@ -169,9 +169,15 @@ /// Return true if we can prove that the specified FP value is either a NaN or /// never less than 0.0. + /// If \p IncludeNeg0 is false, -0.0 is considered less than 0.0. bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI, unsigned Depth = 0); + /// \returns true if we can prove that the specified FP value has a 0 sign + /// bit. + bool SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI, + unsigned Depth = 0); + /// If the specified value can be set by repeating the same byte in memory, /// return the i8 value that it is represented with. This is true for all i8 /// values obviously, but is also true for i32 0, i32 -1, i16 0xF0F0, double Index: lib/Analysis/InstructionSimplify.cpp =================================================================== --- lib/Analysis/InstructionSimplify.cpp +++ lib/Analysis/InstructionSimplify.cpp @@ -4308,10 +4308,21 @@ return nullptr; // Unary Ops - if (NumOperands == 1) - if (IntrinsicInst *II = dyn_cast(*ArgBegin)) + if (NumOperands == 1) { + if (IntrinsicInst *II = dyn_cast(*ArgBegin)) { if (II->getIntrinsicID() == IID) return II; + } + + switch (IID) { + case Intrinsic::fabs: { + if (SignBitMustBeZero(*ArgBegin, Q.TLI)) + return *ArgBegin; + } + default: + break; + } + } return nullptr; } Index: lib/Analysis/ValueTracking.cpp =================================================================== --- lib/Analysis/ValueTracking.cpp +++ lib/Analysis/ValueTracking.cpp @@ -2580,51 +2580,70 @@ return false; } -bool llvm::CannotBeOrderedLessThanZero(const Value *V, - const TargetLibraryInfo *TLI, - unsigned Depth) { - if (const ConstantFP *CFP = dyn_cast(V)) - return !CFP->getValueAPF().isNegative() || CFP->getValueAPF().isZero(); +/// If \p SignBitOnly is true, test for a known 0 sign bit rather than a +/// standard ordered compare. e.g. make -0.0 olt 0.0 be true because of the sign +/// bit despite comparing equal. +static bool cannotBeOrderedLessThanZeroImpl(const Value *V, + const TargetLibraryInfo *TLI, + bool SignBitOnly, + unsigned Depth) { + if (const ConstantFP *CFP = dyn_cast(V)) { + return !CFP->getValueAPF().isNegative() || + (!SignBitOnly && CFP->getValueAPF().isZero()); + } if (Depth == MaxDepth) - return false; // Limit search depth. + return false; // Limit search depth. const Operator *I = dyn_cast(V); - if (!I) return false; + if (!I) + return false; switch (I->getOpcode()) { - default: break; + default: + break; // Unsigned integers are always nonnegative. case Instruction::UIToFP: return true; case Instruction::FMul: // x*x is always non-negative or a NaN. - if (I->getOperand(0) == I->getOperand(1)) + if (I->getOperand(0) == I->getOperand(1) && + (!SignBitOnly || cast(I)->hasNoNaNs())) return true; + LLVM_FALLTHROUGH; case Instruction::FAdd: case Instruction::FDiv: case Instruction::FRem: - return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1) && - CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1); + return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, + Depth + 1) && + cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly, + Depth + 1); case Instruction::Select: - return CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1) && - CannotBeOrderedLessThanZero(I->getOperand(2), TLI, Depth + 1); + return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly, + Depth + 1) && + cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly, + Depth + 1); case Instruction::FPExt: case Instruction::FPTrunc: // Widening/narrowing never change sign. - return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1); + return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, + Depth + 1); case Instruction::Call: Intrinsic::ID IID = getIntrinsicForCallSite(cast(I), TLI); switch (IID) { default: break; case Intrinsic::maxnum: - return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1) || - CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1); + return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, + Depth + 1) || + cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly, + Depth + 1); case Intrinsic::minnum: - return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1) && - CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1); + return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, + Depth + 1) && + cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly, + Depth + 1); case Intrinsic::exp: case Intrinsic::exp2: case Intrinsic::fabs: @@ -2636,18 +2655,31 @@ if (CI->getBitWidth() <= 64 && CI->getSExtValue() % 2u == 0) return true; } - return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1); + return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, + Depth + 1); case Intrinsic::fma: case Intrinsic::fmuladd: // x*x+y is non-negative if y is non-negative. return I->getOperand(0) == I->getOperand(1) && - CannotBeOrderedLessThanZero(I->getOperand(2), TLI, Depth + 1); + cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly, + Depth + 1); } break; } return false; } +bool llvm::CannotBeOrderedLessThanZero(const Value *V, + const TargetLibraryInfo *TLI, + unsigned Depth) { + return cannotBeOrderedLessThanZeroImpl(V, TLI, false, Depth); +} + +bool llvm::SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI, + unsigned Depth) { + return cannotBeOrderedLessThanZeroImpl(V, TLI, true, Depth); +} + /// If the specified value can be set by repeating the same byte in memory, /// return the i8 value that it is represented with. This is /// true for all i8 values obviously, but is also true for i32 0, i32 -1, Index: test/Transforms/InstCombine/fabs.ll =================================================================== --- test/Transforms/InstCombine/fabs.ll +++ test/Transforms/InstCombine/fabs.ll @@ -74,7 +74,6 @@ ; CHECK-NEXT: ret fp128 %fabsl } -; TODO: This should be able to elimnated the fabs define float @square_nnan_fabs_intrinsic_f32(float %x) { %mul = fmul nnan float %x, %x %fabsf = call float @llvm.fabs.f32(float %mul) @@ -82,8 +81,7 @@ ; CHECK-LABEL: square_nnan_fabs_intrinsic_f32( ; CHECK-NEXT: %mul = fmul nnan float %x, %x -; CHECK-NEXT: %fabsf = call float @llvm.fabs.f32(float %mul) -; CHECK-NEXT: ret float %fabsf +; CHECK-NEXT: ret float %mul } ; Shrinking a library call to a smaller type should not be inhibited by nor inhibit the square optimization. Index: test/Transforms/InstCombine/fast-math.ll =================================================================== --- test/Transforms/InstCombine/fast-math.ll +++ test/Transforms/InstCombine/fast-math.ll @@ -241,7 +241,7 @@ ; X/C1 * C2 => X * (C2/C1) is disabled if X/C1 has multiple uses @fmul2_external = external global float define float @fmul2_disable(float %f1) { - %div = fdiv fast float 1.000000e+00, %f1 + %div = fdiv fast float 1.000000e+00, %f1 store float %div, float* @fmul2_external %mul = fmul fast float %div, 2.000000e+00 ret float %mul @@ -672,8 +672,7 @@ ; CHECK-LABEL: sqrt_intrinsic_arg_4th( ; CHECK-NEXT: %mul = fmul fast double %x, %x -; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %mul) -; CHECK-NEXT: ret double %fabs +; CHECK-NEXT: ret double %mul } define double @sqrt_intrinsic_arg_5th(double %x) { @@ -685,9 +684,8 @@ ; CHECK-LABEL: sqrt_intrinsic_arg_5th( ; CHECK-NEXT: %mul = fmul fast double %x, %x -; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %mul) ; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %x) -; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 +; CHECK-NEXT: %1 = fmul fast double %mul, %sqrt1 ; CHECK-NEXT: ret double %1 } Index: test/Transforms/InstSimplify/floating-point-arithmetic.ll =================================================================== --- test/Transforms/InstSimplify/floating-point-arithmetic.ll +++ test/Transforms/InstSimplify/floating-point-arithmetic.ll @@ -103,3 +103,95 @@ ret float %7 } +declare float @llvm.fabs.f32(float) + +; CHECK-LABEL: @fabs_select_positive_constants( +; CHECK: %select = select i1 %cmp, float 1.000000e+00, float 2.000000e+00 +; CHECK-NEXT: ret float %select +define float @fabs_select_positive_constants(i32 %c) { + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float 1.0, float 2.0 + %fabs = call float @llvm.fabs.f32(float %select) + ret float %fabs +} + +; CHECK-LABEL: @fabs_select_constant_variable( +; CHECK: %select = select i1 %cmp, float 1.000000e+00, float %x +; CHECK-NEXT: %fabs = call float @llvm.fabs.f32(float %select) +define float @fabs_select_constant_variable(i32 %c, float %x) { + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float 1.0, float %x + %fabs = call float @llvm.fabs.f32(float %select) + ret float %fabs +} + +; CHECK-LABEL: @fabs_select_neg0_pos0( +; CHECK: %select = select i1 %cmp, float -0.000000e+00, float 0.000000e+00 +; CHECK: %fabs = call float @llvm.fabs.f32(float %select) +; CHECK-NEXT: ret float %fabs +define float @fabs_select_neg0_pos0(float addrspace(1)* %out, i32 %c) { + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float -0.0, float 0.0 + %fabs = call float @llvm.fabs.f32(float %select) + ret float %fabs +} + +; CHECK-LABEL: @fabs_select_neg0_neg1( +; CHECK: %select = select i1 %cmp, float -0.000000e+00, float -1.000000e+00 +; CHECK: %fabs = call float @llvm.fabs.f32(float %select) +define float @fabs_select_neg0_neg1(float addrspace(1)* %out, i32 %c) { + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float -0.0, float -1.0 + %fabs = call float @llvm.fabs.f32(float %select) + ret float %fabs +} + +; CHECK-LABEL: @fabs_select_nan_nan( +; CHECK: %select = select i1 %cmp, float 0x7FF8000000000000, float 0x7FF8000100000000 +; CHECK-NEXT: ret float %select +define float @fabs_select_nan_nan(float addrspace(1)* %out, i32 %c) { + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float 0x7FF8000000000000, float 0x7FF8000100000000 + %fabs = call float @llvm.fabs.f32(float %select) + ret float %fabs +} + +; CHECK-LABEL: @fabs_select_negnan_nan( +; CHECK: %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000000000000 +; CHECK: %fabs = call float @llvm.fabs.f32(float %select) +define float @fabs_select_negnan_nan(float addrspace(1)* %out, i32 %c) { + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000000000000 + %fabs = call float @llvm.fabs.f32(float %select) + ret float %fabs +} + +; CHECK-LABEL: @fabs_select_negnan_negnan( +; CHECK: %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000100000000 +; CHECK: %fabs = call float @llvm.fabs.f32(float %select) +define float @fabs_select_negnan_negnan(float addrspace(1)* %out, i32 %c) { + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000100000000 + %fabs = call float @llvm.fabs.f32(float %select) + ret float %fabs +} + +; CHECK-LABEL: @fabs_select_negnan_negzero( +; CHECK: %select = select i1 %cmp, float 0xFFF8000000000000, float -0.000000e+00 +; CHECK: %fabs = call float @llvm.fabs.f32(float %select) +define float @fabs_select_negnan_negzero(float addrspace(1)* %out, i32 %c) { + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float 0xFFF8000000000000, float -0.0 + %fabs = call float @llvm.fabs.f32(float %select) + ret float %fabs +} + +; CHECK-LABEL: @fabs_select_negnan_zero( +; CHECK: %select = select i1 %cmp, float 0xFFF8000000000000, float 0.000000e+00 +; CHECK: %fabs = call float @llvm.fabs.f32(float %select) +define float @fabs_select_negnan_zero(float addrspace(1)* %out, i32 %c) { + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, float 0xFFF8000000000000, float 0.0 + %fabs = call float @llvm.fabs.f32(float %select) + ret float %fabs +}