Index: llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1397,7 +1397,8 @@ if (TLI->has(LibFunc::sqrtf) && (Callee->getName() == "sqrt" || Callee->getIntrinsicID() == Intrinsic::sqrt)) Ret = optimizeUnaryDoubleFP(CI, B, true); - if (!canUseUnsafeFPMath(CI->getParent()->getParent())) + + if (!CI->hasUnsafeAlgebra()) return Ret; Instruction *I = dyn_cast(CI->getArgOperand(0)); @@ -1406,7 +1407,7 @@ // We're looking for a repeated factor in a multiplication tree, // so we can do this fold: sqrt(x * x) -> fabs(x); - // or this fold: sqrt(x * x * y) -> fabs(x) * sqrt(y). + // or this fold: sqrt((x * x) * y) -> fabs(x) * sqrt(y). Value *Op0 = I->getOperand(0); Value *Op1 = I->getOperand(1); Value *RepeatOp = nullptr; @@ -1421,6 +1422,7 @@ // variations of this pattern because instcombine's visitFMUL and/or the // reassociation pass should give us this form. Value *OtherMul0, *OtherMul1; + // FIXME: This multiply must be unsafe to allow this transform. if (match(Op0, m_FMul(m_Value(OtherMul0), m_Value(OtherMul1)))) { // Pattern: sqrt((x * y) * z) if (OtherMul0 == OtherMul1) { @@ -1435,8 +1437,6 @@ // Fast math flags for any created instructions should match the sqrt // and multiply. - // FIXME: We're not checking the sqrt because it doesn't have - // fast-math-flags (see earlier comment). IRBuilder<>::FastMathFlagGuard Guard(B); B.SetFastMathFlags(I->getFastMathFlags()); // If we found a repeated factor, hoist it out of the square root and Index: llvm/trunk/test/Transforms/InstCombine/fast-math.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/fast-math.ll +++ llvm/trunk/test/Transforms/InstCombine/fast-math.ll @@ -555,18 +555,12 @@ ; A squared factor fed into a square root intrinsic should be hoisted out ; as a fabs() value. -; We have to rely on a function-level attribute to enable this optimization -; because intrinsics don't currently have access to IR-level fast-math -; flags. If that changes, we can relax the requirement on all of these -; tests to just specify 'fast' on the sqrt. - -attributes #0 = { "unsafe-fp-math" = "true" } declare double @llvm.sqrt.f64(double) -define double @sqrt_intrinsic_arg_squared(double %x) #0 { +define double @sqrt_intrinsic_arg_squared(double %x) { %mul = fmul fast double %x, %x - %sqrt = call double @llvm.sqrt.f64(double %mul) + %sqrt = call fast double @llvm.sqrt.f64(double %mul) ret double %sqrt ; CHECK-LABEL: sqrt_intrinsic_arg_squared( @@ -577,10 +571,10 @@ ; Check all 6 combinations of a 3-way multiplication tree where ; one factor is repeated. -define double @sqrt_intrinsic_three_args1(double %x, double %y) #0 { +define double @sqrt_intrinsic_three_args1(double %x, double %y) { %mul = fmul fast double %y, %x %mul2 = fmul fast double %mul, %x - %sqrt = call double @llvm.sqrt.f64(double %mul2) + %sqrt = call fast double @llvm.sqrt.f64(double %mul2) ret double %sqrt ; CHECK-LABEL: sqrt_intrinsic_three_args1( @@ -590,10 +584,10 @@ ; CHECK-NEXT: ret double %1 } -define double @sqrt_intrinsic_three_args2(double %x, double %y) #0 { +define double @sqrt_intrinsic_three_args2(double %x, double %y) { %mul = fmul fast double %x, %y %mul2 = fmul fast double %mul, %x - %sqrt = call double @llvm.sqrt.f64(double %mul2) + %sqrt = call fast double @llvm.sqrt.f64(double %mul2) ret double %sqrt ; CHECK-LABEL: sqrt_intrinsic_three_args2( @@ -603,10 +597,10 @@ ; CHECK-NEXT: ret double %1 } -define double @sqrt_intrinsic_three_args3(double %x, double %y) #0 { +define double @sqrt_intrinsic_three_args3(double %x, double %y) { %mul = fmul fast double %x, %x %mul2 = fmul fast double %mul, %y - %sqrt = call double @llvm.sqrt.f64(double %mul2) + %sqrt = call fast double @llvm.sqrt.f64(double %mul2) ret double %sqrt ; CHECK-LABEL: sqrt_intrinsic_three_args3( @@ -616,10 +610,10 @@ ; CHECK-NEXT: ret double %1 } -define double @sqrt_intrinsic_three_args4(double %x, double %y) #0 { +define double @sqrt_intrinsic_three_args4(double %x, double %y) { %mul = fmul fast double %y, %x %mul2 = fmul fast double %x, %mul - %sqrt = call double @llvm.sqrt.f64(double %mul2) + %sqrt = call fast double @llvm.sqrt.f64(double %mul2) ret double %sqrt ; CHECK-LABEL: sqrt_intrinsic_three_args4( @@ -629,10 +623,10 @@ ; CHECK-NEXT: ret double %1 } -define double @sqrt_intrinsic_three_args5(double %x, double %y) #0 { +define double @sqrt_intrinsic_three_args5(double %x, double %y) { %mul = fmul fast double %x, %y %mul2 = fmul fast double %x, %mul - %sqrt = call double @llvm.sqrt.f64(double %mul2) + %sqrt = call fast double @llvm.sqrt.f64(double %mul2) ret double %sqrt ; CHECK-LABEL: sqrt_intrinsic_three_args5( @@ -642,10 +636,10 @@ ; CHECK-NEXT: ret double %1 } -define double @sqrt_intrinsic_three_args6(double %x, double %y) #0 { +define double @sqrt_intrinsic_three_args6(double %x, double %y) { %mul = fmul fast double %x, %x %mul2 = fmul fast double %y, %mul - %sqrt = call double @llvm.sqrt.f64(double %mul2) + %sqrt = call fast double @llvm.sqrt.f64(double %mul2) ret double %sqrt ; CHECK-LABEL: sqrt_intrinsic_three_args6( @@ -655,10 +649,10 @@ ; CHECK-NEXT: ret double %1 } -define double @sqrt_intrinsic_arg_4th(double %x) #0 { +define double @sqrt_intrinsic_arg_4th(double %x) { %mul = fmul fast double %x, %x %mul2 = fmul fast double %mul, %mul - %sqrt = call double @llvm.sqrt.f64(double %mul2) + %sqrt = call fast double @llvm.sqrt.f64(double %mul2) ret double %sqrt ; CHECK-LABEL: sqrt_intrinsic_arg_4th( @@ -666,11 +660,11 @@ ; CHECK-NEXT: ret double %mul } -define double @sqrt_intrinsic_arg_5th(double %x) #0 { +define double @sqrt_intrinsic_arg_5th(double %x) { %mul = fmul fast double %x, %x %mul2 = fmul fast double %mul, %x %mul3 = fmul fast double %mul2, %mul - %sqrt = call double @llvm.sqrt.f64(double %mul3) + %sqrt = call fast double @llvm.sqrt.f64(double %mul3) ret double %sqrt ; CHECK-LABEL: sqrt_intrinsic_arg_5th( @@ -686,9 +680,9 @@ declare double @sqrt(double) declare fp128 @sqrtl(fp128) -define float @sqrt_call_squared_f32(float %x) #0 { +define float @sqrt_call_squared_f32(float %x) { %mul = fmul fast float %x, %x - %sqrt = call float @sqrtf(float %mul) + %sqrt = call fast float @sqrtf(float %mul) ret float %sqrt ; CHECK-LABEL: sqrt_call_squared_f32( @@ -696,9 +690,9 @@ ; CHECK-NEXT: ret float %fabs } -define double @sqrt_call_squared_f64(double %x) #0 { +define double @sqrt_call_squared_f64(double %x) { %mul = fmul fast double %x, %x - %sqrt = call double @sqrt(double %mul) + %sqrt = call fast double @sqrt(double %mul) ret double %sqrt ; CHECK-LABEL: sqrt_call_squared_f64( @@ -706,9 +700,9 @@ ; CHECK-NEXT: ret double %fabs } -define fp128 @sqrt_call_squared_f128(fp128 %x) #0 { +define fp128 @sqrt_call_squared_f128(fp128 %x) { %mul = fmul fast fp128 %x, %x - %sqrt = call fp128 @sqrtl(fp128 %mul) + %sqrt = call fast fp128 @sqrtl(fp128 %mul) ret fp128 %sqrt ; CHECK-LABEL: sqrt_call_squared_f128( Index: llvm/trunk/test/Transforms/InstCombine/inline-intrinsic-assert.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/inline-intrinsic-assert.ll +++ llvm/trunk/test/Transforms/InstCombine/inline-intrinsic-assert.ll @@ -4,7 +4,7 @@ ; The inliner should not add an edge to an intrinsic and ; then assert that it did not add an edge to an intrinsic! -define float @foo(float %f1) #0 { +define float @foo(float %f1) { %call = call float @bar(float %f1) ret float %call @@ -13,18 +13,16 @@ ; CHECK-NEXT: ret float } -define float @bar(float %f1) #0 { +define float @bar(float %f1) { %call = call float @sqr(float %f1) - %call1 = call float @sqrtf(float %call) #0 + %call1 = call fast float @sqrtf(float %call) ret float %call1 } -define float @sqr(float %f) #0 { +define float @sqr(float %f) { %mul = fmul fast float %f, %f ret float %mul } -declare float @sqrtf(float) #0 - -attributes #0 = { "unsafe-fp-math"="true" } +declare float @sqrtf(float) Index: llvm/trunk/test/Transforms/InstCombine/no_cgscc_assert.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/no_cgscc_assert.ll +++ llvm/trunk/test/Transforms/InstCombine/no_cgscc_assert.ll @@ -6,7 +6,7 @@ define float @bar(float %f) #0 { %mul = fmul fast float %f, %f - %call1 = call float @sqrtf(float %mul) #0 + %call1 = call fast float @sqrtf(float %mul) ret float %call1 ; CHECK-LABEL: @bar( @@ -14,6 +14,5 @@ ; CHECK-NEXT: ret float } -declare float @sqrtf(float) #0 +declare float @sqrtf(float) -attributes #0 = { readnone "unsafe-fp-math"="true" }