diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -23232,7 +23232,7 @@ } if (VT.getScalarType() == MVT::f16 && isTypeLegal(VT) && - Subtarget.hasFP16()) { + Subtarget.hasFP16() && Reciprocal) { if (RefinementSteps == ReciprocalEstimate::Unspecified) RefinementSteps = 0; diff --git a/llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll --- a/llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll @@ -969,6 +969,15 @@ ret <8 x half> %2 } +define <8 x half> @test_sqrt_ph_128_fast2(<8 x half> %a0, <8 x half> %a1) { +; CHECK-LABEL: test_sqrt_ph_128_fast2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsqrtph %xmm0, %xmm0 +; CHECK-NEXT: retq + %1 = call fast <8 x half> @llvm.sqrt.v8f16(<8 x half> %a0) + ret <8 x half> %1 +} + define <8 x half> @test_mask_sqrt_ph_128(<8 x half> %a0, <8 x half> %passthru, i8 %mask) { ; CHECK-LABEL: test_mask_sqrt_ph_128: ; CHECK: # %bb.0: