diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def --- a/llvm/include/llvm/Analysis/VecFuncs.def +++ b/llvm/include/llvm/Analysis/VecFuncs.def @@ -245,6 +245,30 @@ TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf8", 8) TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf16", 16) +TLI_DEFINE_VECFUNC("log2", "__svml_log22", 2) +TLI_DEFINE_VECFUNC("log2", "__svml_log24", 4) +TLI_DEFINE_VECFUNC("log2", "__svml_log28", 8) + +TLI_DEFINE_VECFUNC("log2f", "__svml_log2f4", 4) +TLI_DEFINE_VECFUNC("log2f", "__svml_log2f8", 8) +TLI_DEFINE_VECFUNC("log2f", "__svml_log2f16", 16) + +TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log22", 2) +TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log24", 4) +TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log28", 8) + +TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f4", 4) +TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f8", 8) +TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f16", 16) + +TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log22", 2) +TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log24", 4) +TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log28", 8) + +TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f4", 4) +TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f8", 8) +TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f16", 16) + TLI_DEFINE_VECFUNC("exp2", "__svml_exp22", 2) TLI_DEFINE_VECFUNC("exp2", "__svml_exp24", 4) TLI_DEFINE_VECFUNC("exp2", "__svml_exp28", 8) diff --git a/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll b/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll --- a/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll @@ -243,3 +243,60 @@ !71 = distinct !{!71, !72, !73} !72 = !{!"llvm.loop.vectorize.width", i32 4} !73 = !{!"llvm.loop.vectorize.enable", i1 true} + +declare float @__log2f_finite(float) #0 + +; CHECK-LABEL: @log2_f32 +; CHECK: <4 x float> @__svml_log2f4 +; CHECK: ret +define void @log2_f32(float* nocapture %varray) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call fast float @__log2f_finite(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv + store float %call, float* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21 + +for.end: ; preds = %for.body + ret void +} + +!81 = distinct !{!21, !22, !23} +!82 = !{!"llvm.loop.vectorize.width", i32 4} +!83 = !{!"llvm.loop.vectorize.enable", i1 true} + + +declare double @__log2_finite(double) #0 + +; CHECK-LABEL: @log2_f64 +; CHECK: <4 x double> @__svml_log24 +; CHECK: ret +define void @log2_f64(double* nocapture %varray) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call fast double @__log2_finite(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv + store double %call, double* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31 + +for.end: ; preds = %for.body + ret void +} + +!91 = distinct !{!31, !32, !33} +!92 = !{!"llvm.loop.vectorize.width", i32 4} +!93 = !{!"llvm.loop.vectorize.enable", i1 true} diff --git a/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll --- a/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll @@ -28,6 +28,11 @@ declare double @llvm.log.f64(double) #0 declare float @llvm.log.f32(float) #0 +declare double @log2(double) #0 +declare float @log2f(float) #0 +declare double @llvm.log2.f64(double) #0 +declare float @llvm.log2.f32(float) #0 + declare double @exp2(double) #0 declare float @exp2f(float) #0 declare double @llvm.exp2.f64(double) #0 @@ -501,6 +506,98 @@ ret void } +define void @log2_f64(double* nocapture %varray) { +; CHECK-LABEL: @log2_f64( +; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_log24(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @log2(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @log2_f32(float* nocapture %varray) { +; CHECK-LABEL: @log2_f32( +; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_log2f4(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @log2f(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @log2_f64_intrinsic(double* nocapture %varray) { +; CHECK-LABEL: @log2_f64_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_log24(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.log2.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @log2_f32_intrinsic(float* nocapture %varray) { +; CHECK-LABEL: @log2_f32_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_log2f4(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.log2.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + define void @exp2_f64(double* nocapture %varray) { ; CHECK-LABEL: @exp2_f64( ; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]])