Index: llvm/trunk/lib/Analysis/InstructionSimplify.cpp =================================================================== --- llvm/trunk/lib/Analysis/InstructionSimplify.cpp +++ llvm/trunk/lib/Analysis/InstructionSimplify.cpp @@ -4249,10 +4249,13 @@ if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZeroFP())) return ConstantFP::getNullValue(Op0->getType()); - // sqrt(X) * sqrt(X) --> X + // sqrt(X) * sqrt(X) --> X, if we can: + // 1. Remove the intermediate rounding (reassociate). + // 2. Ignore non-zero negative numbers because sqrt would produce NAN. + // 3. Ignore -0.0 because sqrt(-0.0) == -0.0, but -0.0 * -0.0 == 0.0. Value *X; - if (FMF.isFast() && Op0 == Op1 && - match(Op0, m_Intrinsic(m_Value(X)))) + if (Op0 == Op1 && match(Op0, m_Intrinsic(m_Value(X))) && + FMF.allowReassoc() && FMF.noNaNs() && FMF.noSignedZeros()) return X; return nullptr; Index: llvm/trunk/test/Transforms/InstSimplify/fast-math.ll =================================================================== --- llvm/trunk/test/Transforms/InstSimplify/fast-math.ll +++ llvm/trunk/test/Transforms/InstSimplify/fast-math.ll @@ -323,7 +323,7 @@ } ; PR21126: http://llvm.org/bugs/show_bug.cgi?id=21126 -; With unsafe/fast math, sqrt(X) * sqrt(X) is just X. +; With loose math, sqrt(X) * sqrt(X) is just X. declare double @llvm.sqrt.f64(double) @@ -332,7 +332,42 @@ ; CHECK-NEXT: ret double [[F:%.*]] ; %sqrt = call double @llvm.sqrt.f64(double %f) - %mul = fmul fast double %sqrt, %sqrt + %mul = fmul reassoc nnan nsz double %sqrt, %sqrt + ret double %mul +} + +; Negative tests for the above transform: we need all 3 of those flags. + +define double @sqrt_squared_not_fast_enough1(double %f) { +; CHECK-LABEL: @sqrt_squared_not_fast_enough1( +; CHECK-NEXT: [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[F:%.*]]) +; CHECK-NEXT: [[MUL:%.*]] = fmul nnan nsz double [[SQRT]], [[SQRT]] +; CHECK-NEXT: ret double [[MUL]] +; + %sqrt = call double @llvm.sqrt.f64(double %f) + %mul = fmul nnan nsz double %sqrt, %sqrt + ret double %mul +} + +define double @sqrt_squared_not_fast_enough2(double %f) { +; CHECK-LABEL: @sqrt_squared_not_fast_enough2( +; CHECK-NEXT: [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[F:%.*]]) +; CHECK-NEXT: [[MUL:%.*]] = fmul reassoc nnan double [[SQRT]], [[SQRT]] +; CHECK-NEXT: ret double [[MUL]] +; + %sqrt = call double @llvm.sqrt.f64(double %f) + %mul = fmul reassoc nnan double %sqrt, %sqrt + ret double %mul +} + +define double @sqrt_squared_not_fast_enough3(double %f) { +; CHECK-LABEL: @sqrt_squared_not_fast_enough3( +; CHECK-NEXT: [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[F:%.*]]) +; CHECK-NEXT: [[MUL:%.*]] = fmul reassoc nsz double [[SQRT]], [[SQRT]] +; CHECK-NEXT: ret double [[MUL]] +; + %sqrt = call double @llvm.sqrt.f64(double %f) + %mul = fmul reassoc nsz double %sqrt, %sqrt ret double %mul }