Index: lib/Analysis/InstructionSimplify.cpp =================================================================== --- lib/Analysis/InstructionSimplify.cpp +++ lib/Analysis/InstructionSimplify.cpp @@ -4239,10 +4239,13 @@ if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZero())) return Op1; - // sqrt(X) * sqrt(X) --> X + // sqrt(X) * sqrt(X) --> X, if we can: + // 1. Remove the intermediate rounding (reassociate). + // 2. Ignore non-zero negative numbers because sqrt would produce NAN. + // 3. Ignore -0.0 because sqrt(-0.0) == -0.0, but -0.0 * -0.0 == 0.0. Value *X; - if (FMF.isFast() && Op0 == Op1 && - match(Op0, m_Intrinsic(m_Value(X)))) + if (Op0 == Op1 && match(Op0, m_Intrinsic(m_Value(X))) && + FMF.allowReassoc() && FMF.noNaNs() && FMF.noSignedZeros()) return X; return nullptr; Index: test/Transforms/InstSimplify/fast-math.ll =================================================================== --- test/Transforms/InstSimplify/fast-math.ll +++ test/Transforms/InstSimplify/fast-math.ll @@ -205,7 +205,7 @@ } ; PR21126: http://llvm.org/bugs/show_bug.cgi?id=21126 -; With unsafe/fast math, sqrt(X) * sqrt(X) is just X. +; With loose math, sqrt(X) * sqrt(X) is just X. declare double @llvm.sqrt.f64(double) @@ -214,7 +214,42 @@ ; CHECK-NEXT: ret double [[F:%.*]] ; %sqrt = call double @llvm.sqrt.f64(double %f) - %mul = fmul fast double %sqrt, %sqrt + %mul = fmul reassoc nnan nsz double %sqrt, %sqrt + ret double %mul +} + +; Negative tests for the above transform: we need all 3 of those flags. + +define double @sqrt_squared_not_fast_enough1(double %f) { +; CHECK-LABEL: @sqrt_squared_not_fast_enough1( +; CHECK-NEXT: [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[F:%.*]]) +; CHECK-NEXT: [[MUL:%.*]] = fmul nnan nsz double [[SQRT]], [[SQRT]] +; CHECK-NEXT: ret double [[MUL]] +; + %sqrt = call double @llvm.sqrt.f64(double %f) + %mul = fmul nnan nsz double %sqrt, %sqrt + ret double %mul +} + +define double @sqrt_squared_not_fast_enough2(double %f) { +; CHECK-LABEL: @sqrt_squared_not_fast_enough2( +; CHECK-NEXT: [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[F:%.*]]) +; CHECK-NEXT: [[MUL:%.*]] = fmul reassoc nnan double [[SQRT]], [[SQRT]] +; CHECK-NEXT: ret double [[MUL]] +; + %sqrt = call double @llvm.sqrt.f64(double %f) + %mul = fmul reassoc nnan double %sqrt, %sqrt + ret double %mul +} + +define double @sqrt_squared_not_fast_enough3(double %f) { +; CHECK-LABEL: @sqrt_squared_not_fast_enough3( +; CHECK-NEXT: [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[F:%.*]]) +; CHECK-NEXT: [[MUL:%.*]] = fmul reassoc nsz double [[SQRT]], [[SQRT]] +; CHECK-NEXT: ret double [[MUL]] +; + %sqrt = call double @llvm.sqrt.f64(double %f) + %mul = fmul reassoc nsz double %sqrt, %sqrt ret double %mul }