Index: include/llvm/Analysis/ValueTracking.h =================================================================== --- include/llvm/Analysis/ValueTracking.h +++ include/llvm/Analysis/ValueTracking.h @@ -160,7 +160,8 @@ /// Map a call instruction to an intrinsic ID. Libcalls which have equivalent /// intrinsics are treated as-if they were intrinsics. Intrinsic::ID getIntrinsicForCallSite(ImmutableCallSite ICS, - const TargetLibraryInfo *TLI); + const TargetLibraryInfo *TLI, + bool forVector = false); /// Return true if we can prove that the specified FP value is never equal to /// -0.0. Index: lib/Analysis/ValueTracking.cpp =================================================================== --- lib/Analysis/ValueTracking.cpp +++ lib/Analysis/ValueTracking.cpp @@ -2422,7 +2422,8 @@ } Intrinsic::ID llvm::getIntrinsicForCallSite(ImmutableCallSite ICS, - const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI, + bool forVector) { const Function *F = ICS.getCalledFunction(); if (!F) return Intrinsic::not_intrinsic; @@ -2523,7 +2524,7 @@ case LibFunc::sqrt: case LibFunc::sqrtf: case LibFunc::sqrtl: - if (ICS->hasNoNaNs()) + if (ICS->hasNoNaNs() || forVector) return Intrinsic::sqrt; return Intrinsic::not_intrinsic; } Index: lib/Analysis/VectorUtils.cpp =================================================================== --- lib/Analysis/VectorUtils.cpp +++ lib/Analysis/VectorUtils.cpp @@ -85,7 +85,7 @@ /// its ID, in case it does not found it return not_intrinsic. Intrinsic::ID llvm::getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI) { - Intrinsic::ID ID = getIntrinsicForCallSite(CI, TLI); + Intrinsic::ID ID = getIntrinsicForCallSite(CI, TLI, /*forVector*/ true); if (ID == Intrinsic::not_intrinsic) return Intrinsic::not_intrinsic; Index: test/CodeGen/X86/vector-sqrt.ll =================================================================== --- test/CodeGen/X86/vector-sqrt.ll +++ test/CodeGen/X86/vector-sqrt.ll @@ -1,60 +1,33 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK -; Function Attrs: nounwind readonly uwtable define <2 x double> @sqrtd2(double* nocapture readonly %v) local_unnamed_addr #0 { ; CHECK-LABEL: sqrtd2: -; CHECK: vsqrtsd (%rdi), %xmm0, %xmm0 -; CHECK-NEXT: vsqrtsd 8(%rdi), %xmm1, %xmm1 -; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; CHECK-NEXT: retq +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vsqrtpd (%rdi), %xmm0 +; CHECK-NEXT: retq entry: - %0 = load double, double* %v, align 8 - %call = tail call double @sqrt(double %0) #2 - %arrayidx1 = getelementptr inbounds double, double* %v, i64 1 - %1 = load double, double* %arrayidx1, align 8 - %call2 = tail call double @sqrt(double %1) #2 - %vecinit.i = insertelement <2 x double> undef, double %call, i32 0 - %vecinit1.i = insertelement <2 x double> %vecinit.i, double %call2, i32 1 - ret <2 x double> %vecinit1.i + %0 = bitcast double* %v to <2 x double>* + %1 = load <2 x double>, <2 x double>* %0, align 8 + %2 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %1) + ret <2 x double> %2 } -; Function Attrs: nounwind readnone -declare double @sqrt(double) local_unnamed_addr #1 - -; Function Attrs: nounwind readonly uwtable define <4 x float> @sqrtf4(float* nocapture readonly %v) local_unnamed_addr #0 { ; CHECK-LABEL: sqrtf4: -; CHECK: vsqrtss (%rdi), %xmm0, %xmm0 -; CHECK-NEXT: vsqrtss 4(%rdi), %xmm1, %xmm1 -; CHECK-NEXT: vsqrtss 8(%rdi), %xmm2, %xmm2 -; CHECK-NEXT: vsqrtss 12(%rdi), %xmm3, %xmm3 -; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] -; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0] -; CHECK-NEXT: retq +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vsqrtps (%rdi), %xmm0 +; CHECK-NEXT: retq entry: - %0 = load float, float* %v, align 4 - %call = tail call float @sqrtf(float %0) #2 - %arrayidx1 = getelementptr inbounds float, float* %v, i64 1 - %1 = load float, float* %arrayidx1, align 4 - %call2 = tail call float @sqrtf(float %1) #2 - %arrayidx3 = getelementptr inbounds float, float* %v, i64 2 - %2 = load float, float* %arrayidx3, align 4 - %call4 = tail call float @sqrtf(float %2) #2 - %arrayidx5 = getelementptr inbounds float, float* %v, i64 3 - %3 = load float, float* %arrayidx5, align 4 - %call6 = tail call float @sqrtf(float %3) #2 - %vecinit.i = insertelement <4 x float> undef, float %call, i32 0 - %vecinit1.i = insertelement <4 x float> %vecinit.i, float %call2, i32 1 - %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %call4, i32 2 - %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %call6, i32 3 - ret <4 x float> %vecinit3.i + %0 = bitcast float* %v to <4 x float>* + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %1) + ret <4 x float> %2 } -; Function Attrs: nounwind readnone -declare float @sqrtf(float) local_unnamed_addr #1 +declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #1 +declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #1 + +attributes #0 = { nounwind readonly uwtable "target-features"="+avx2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } -attributes #0 = { nounwind readonly uwtable "target-features"="+avx" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind readnone "target-features"="+avx2" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind readnone }