Index: lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp =================================================================== --- lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp +++ lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp @@ -43,7 +43,7 @@ // // (after) // v0 = sqrt_noreadmem(src) # native sqrt instruction. - // if (v0 is a NaN) + // if (src < 0) // v1 = sqrt(src) # library call. // dst = phi(v0, v1) // @@ -69,7 +69,8 @@ Call->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone); CurrBB.getTerminator()->eraseFromParent(); Builder.SetInsertPoint(&CurrBB); - Value *FCmp = Builder.CreateFCmpOEQ(Call, Call); + Value *FCmp = Builder.CreateFCmpOGE(Call->getOperand(0), + ConstantFP::get(Call->getType(), 0.0)); Builder.CreateCondBr(FCmp, JoinBB, LibCallBB); // Add phi operands. Index: test/CodeGen/Mips/optimize-fp-math.ll =================================================================== --- test/CodeGen/Mips/optimize-fp-math.ll +++ test/CodeGen/Mips/optimize-fp-math.ll @@ -3,11 +3,11 @@ ; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s -check-prefix=64 ; 32-LABEL: test_sqrtf_float_: -; 32: sqrt.s $f[[R0:[0-9]+]], $f{{[0-9]+}} -; 32: c.un.s $f[[R0]], $f[[R0]] +; 32: c.ult.s $f[[R0:[0-9]+]], $f[[R1:[0-9]+]] +; 32: sqrt.s $f[[R1]], $f[[R0]] ; 64-LABEL: test_sqrtf_float_: -; 64: sqrt.s $f[[R0:[0-9]+]], $f{{[0-9]+}} -; 64: c.un.s $f[[R0]], $f[[R0]] +; 64: c.ult.s $f[[R0:[0-9]+]], $f[[R1:[0-9]+]] +; 64: sqrt.s $f[[R1]], $f[[R0]] define float @test_sqrtf_float_(float %a) { entry: @@ -18,11 +18,11 @@ declare float @sqrtf(float) ; 32-LABEL: test_sqrt_double_: -; 32: sqrt.d $f[[R0:[0-9]+]], $f{{[0-9]+}} -; 32: c.un.d $f[[R0]], $f[[R0]] +; 32: c.ult.d $f[[R0:[0-9]+]], $f[[R1:[0-9]+]] +; 32: sqrt.d $f[[R1]], $f[[R0]] ; 64-LABEL: test_sqrt_double_: -; 64: sqrt.d $f[[R0:[0-9]+]], $f{{[0-9]+}} -; 64: c.un.d $f[[R0]], $f[[R0]] +; 64: c.ult.d $f[[R0:[0-9]+]], $f[[R1:[0-9]+]] +; 64: sqrt.d $f[[R1]], $f[[R0]] define double @test_sqrt_double_(double %a) { entry: Index: test/CodeGen/SystemZ/fp-sqrt-01.ll =================================================================== --- test/CodeGen/SystemZ/fp-sqrt-01.ll +++ test/CodeGen/SystemZ/fp-sqrt-01.ll @@ -157,9 +157,10 @@ ; Check that a call to the normal sqrtf function is lowered. define float @f8(float %dummy, float %val) { ; CHECK-LABEL: f8: +; CHECK: ltebr %f2, %f2 +; CHECK: jnh ; CHECK: sqebr %f0, %f2 -; CHECK: cebr %f0, %f0 -; CHECK: bnor %r14 +; CHECK: br %r14 ; CHECK: ler %f0, %f2 ; CHECK: jg sqrtf@PLT %res = tail call float @sqrtf(float %val) Index: test/CodeGen/SystemZ/fp-sqrt-02.ll =================================================================== --- test/CodeGen/SystemZ/fp-sqrt-02.ll +++ test/CodeGen/SystemZ/fp-sqrt-02.ll @@ -159,9 +159,10 @@ ; Check that a call to the normal sqrt function is lowered. define double @f8(double %dummy, double %val) { ; CHECK-LABEL: f8: +; CHECK: ltdbr %f{{.}}, %f2 +; CHECK: jnh ; CHECK: sqdbr %f0, %f2 -; CHECK: cdbr %f0, %f0 -; CHECK: bnor %r14 +; CHECK: br %r14 ; CHECK: ldr %f0, %f2 ; CHECK: jg sqrt@PLT %res = tail call double @sqrt(double %val) Index: test/CodeGen/X86/sqrt-partial.ll =================================================================== --- test/CodeGen/X86/sqrt-partial.ll +++ test/CodeGen/X86/sqrt-partial.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+sse2 | FileCheck %s + +define float @f(float %val) nounwind { +; CHECK-LABEL: f: +; CHECK: # BB#0: +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: jb .LBB0_2 +; CHECK-NEXT: # BB#1: # %.split +; CHECK-NEXT: sqrtss %xmm0, %xmm0 +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB0_2: # %call.sqrt +; CHECK-NEXT: jmp sqrtf # TAILCALL + %res = tail call float @sqrtf(float %val) + ret float %res +} + +define double @d(double %val) nounwind { +; CHECK-LABEL: d: +; CHECK: # BB#0: +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: ucomisd %xmm1, %xmm0 +; CHECK-NEXT: jb .LBB1_2 +; CHECK-NEXT: # BB#1: # %.split +; CHECK-NEXT: sqrtsd %xmm0, %xmm0 +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB1_2: # %call.sqrt +; CHECK-NEXT: jmp sqrt # TAILCALL + %res = tail call double @sqrt(double %val) + ret double %res +} + +declare float @sqrtf(float) +declare double @sqrt(double) Index: test/Transforms/PartiallyInlineLibCalls/X86/good-prototype.ll =================================================================== --- test/Transforms/PartiallyInlineLibCalls/X86/good-prototype.ll +++ test/Transforms/PartiallyInlineLibCalls/X86/good-prototype.ll @@ -5,7 +5,7 @@ ; CHECK: @f ; CHECK: entry: ; CHECK-NEXT: %[[RES:.+]] = tail call float @sqrtf(float %val) #0 -; CHECK-NEXT: %[[CMP:.+]] = fcmp oeq float %[[RES]], %[[RES]] +; CHECK-NEXT: %[[CMP:.+]] = fcmp oge float %val, 0.000000e+00 ; CHECK-NEXT: br i1 %[[CMP]], label %[[EXIT:.+]], label %[[CALL:.+]] ; CHECK: [[CALL]]: ; CHECK-NEXT: %[[RES2:.+]] = tail call float @sqrtf(float %val){{$}}