Index: clang/include/clang/Basic/CodeGenOptions.h =================================================================== --- clang/include/clang/Basic/CodeGenOptions.h +++ clang/include/clang/Basic/CodeGenOptions.h @@ -54,11 +54,11 @@ enum VectorLibrary { NoLibrary, // Don't use any vector library. Accelerate, // Use the Accelerate framework. + LIBMVEC, // GLIBC vector math library. MASSV, // IBM MASS vector library. SVML // Intel short vector math library. }; - enum ObjCDispatchMethodKind { Legacy = 0, NonLegacy = 1, Index: clang/include/clang/Basic/CodeGenOptions.def =================================================================== --- clang/include/clang/Basic/CodeGenOptions.def +++ clang/include/clang/Basic/CodeGenOptions.def @@ -349,7 +349,7 @@ ENUM_CODEGENOPT(Inlining, InliningMethod, 2, NormalInlining) // Vector functions library to use. -ENUM_CODEGENOPT(VecLib, VectorLibrary, 2, NoLibrary) +ENUM_CODEGENOPT(VecLib, VectorLibrary, 3, NoLibrary) /// The default TLS model to use. ENUM_CODEGENOPT(DefaultTLSModel, TLSModel, 2, GeneralDynamicTLSModel) Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -1579,7 +1579,7 @@ Group, Flags<[CC1Option]>, HelpText<"Disables an experimental new pass manager in LLVM.">; def fveclib : Joined<["-"], "fveclib=">, Group, Flags<[CC1Option]>, - HelpText<"Use the given vector functions library">, Values<"Accelerate,MASSV,SVML,none">; + HelpText<"Use the given vector functions library">, Values<"Accelerate,libmvec,MASSV,SVML,none">; def fno_lax_vector_conversions : Flag<["-"], "fno-lax-vector-conversions">, Group, Alias, AliasArgs<["none"]>; def fno_merge_all_constants : Flag<["-"], "fno-merge-all-constants">, Group, Index: clang/lib/CodeGen/BackendUtil.cpp =================================================================== --- clang/lib/CodeGen/BackendUtil.cpp +++ clang/lib/CodeGen/BackendUtil.cpp @@ -371,6 +371,16 @@ case CodeGenOptions::Accelerate: TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::Accelerate); break; + case CodeGenOptions::LIBMVEC: + switch(TargetTriple.getArch()) { + default: + break; + case llvm::Triple::x86_64: + TLII->addVectorizableFunctionsFromVecLib( + TargetLibraryInfoImpl::LIBMVEC_X86); + break; + } + break; case CodeGenOptions::MASSV: TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::MASSV); break; Index: clang/lib/Frontend/CompilerInvocation.cpp =================================================================== --- clang/lib/Frontend/CompilerInvocation.cpp +++ clang/lib/Frontend/CompilerInvocation.cpp @@ -749,6 +749,8 @@ StringRef Name = A->getValue(); if (Name == "Accelerate") Opts.setVecLib(CodeGenOptions::Accelerate); + else if (Name == "libmvec") + Opts.setVecLib(CodeGenOptions::LIBMVEC); else if (Name == "MASSV") Opts.setVecLib(CodeGenOptions::MASSV); else if (Name == "SVML") Index: clang/test/Driver/autocomplete.c =================================================================== --- clang/test/Driver/autocomplete.c +++ clang/test/Driver/autocomplete.c @@ -73,6 +73,7 @@ // FLTOALL-NEXT: thin // RUN: %clang --autocomplete=-fveclib= | FileCheck %s -check-prefix=FVECLIBALL // FVECLIBALL: Accelerate +// FVECLIBALL-NEXT: libmvec // FVECLIBALL-NEXT: MASSV // FVECLIBALL-NEXT: none // FVECLIBALL-NEXT: SVML Index: llvm/include/llvm/Analysis/TargetLibraryInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetLibraryInfo.h +++ llvm/include/llvm/Analysis/TargetLibraryInfo.h @@ -88,6 +88,7 @@ enum VectorLibrary { NoLibrary, // Don't use any vector library. Accelerate, // Use Accelerate framework. + LIBMVEC_X86,// GLIBC Vector Math library. MASSV, // IBM MASS vector library. SVML // Intel short vector math library. }; Index: llvm/include/llvm/Analysis/VecFuncs.def =================================================================== --- llvm/include/llvm/Analysis/VecFuncs.def +++ llvm/include/llvm/Analysis/VecFuncs.def @@ -62,6 +62,87 @@ TLI_DEFINE_VECFUNC("atanhf", "vatanhf", 4) +#elif defined(TLI_DEFINE_LIBMVEC_X86_VECFUNCS) +// GLIBC Vector math Functions + +TLI_DEFINE_VECFUNC("sin", "_ZGVbN2v_sin", 2) +TLI_DEFINE_VECFUNC("sin", "_ZGVdN4v_sin", 4) + +TLI_DEFINE_VECFUNC("sinf", "_ZGVbN4v_sinf", 4) +TLI_DEFINE_VECFUNC("sinf", "_ZGVdN8v_sinf", 8) + +TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVbN2v_sin", 2) +TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVdN4v_sin", 4) + +TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVbN4v_sinf", 4) +TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVdN8v_sinf", 8) + +TLI_DEFINE_VECFUNC("cos", "_ZGVbN2v_cos", 2) +TLI_DEFINE_VECFUNC("cos", "_ZGVdN4v_cos", 4) + +TLI_DEFINE_VECFUNC("cosf", "_ZGVbN4v_cosf", 4) +TLI_DEFINE_VECFUNC("cosf", "_ZGVdN8v_cosf", 8) + +TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVbN2v_cos", 2) +TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVdN4v_cos", 4) + +TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVbN4v_cosf", 4) +TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVdN8v_cosf", 8) + +TLI_DEFINE_VECFUNC("pow", "_ZGVbN2vv_pow", 2) +TLI_DEFINE_VECFUNC("pow", "_ZGVdN4vv_pow", 4) + +TLI_DEFINE_VECFUNC("powf", "_ZGVbN4vv_powf", 4) +TLI_DEFINE_VECFUNC("powf", "_ZGVdN8vv_powf", 8) + +TLI_DEFINE_VECFUNC("__pow_finite", "_ZGVbN2vv___pow_finite", 2) +TLI_DEFINE_VECFUNC("__pow_finite", "_ZGVdN4vv___pow_finite", 4) + +TLI_DEFINE_VECFUNC("__powf_finite", "_ZGVbN4vv___powf_finite", 4) +TLI_DEFINE_VECFUNC("__powf_finite", "_ZGVdN8vv___powf_finite", 8) + +TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVbN2vv_pow", 2) +TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVdN4vv_pow", 4) + +TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVbN4vv_powf", 4) +TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVdN8vv_powf", 8) + +TLI_DEFINE_VECFUNC("exp", "_ZGVbN2v_exp", 2) +TLI_DEFINE_VECFUNC("exp", "_ZGVdN4v_exp", 4) + +TLI_DEFINE_VECFUNC("expf", "_ZGVbN4v_expf", 4) +TLI_DEFINE_VECFUNC("expf", "_ZGVdN8v_expf", 8) + +TLI_DEFINE_VECFUNC("__exp_finite", "_ZGVbN2v___exp_finite", 2) +TLI_DEFINE_VECFUNC("__exp_finite", "_ZGVdN4v___exp_finite", 4) + +TLI_DEFINE_VECFUNC("__expf_finite", "_ZGVbN4v___expf_finite", 4) +TLI_DEFINE_VECFUNC("__expf_finite", "_ZGVdN8v___expf_finite", 8) + +TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVbN2v_exp", 2) +TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVdN4v_exp", 4) + +TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVbN4v_expf", 4) +TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVdN8v_expf", 8) + +TLI_DEFINE_VECFUNC("log", "_ZGVbN2v_log", 2) +TLI_DEFINE_VECFUNC("log", "_ZGVdN4v_log", 4) + +TLI_DEFINE_VECFUNC("logf", "_ZGVbN4v_logf", 4) +TLI_DEFINE_VECFUNC("logf", "_ZGVdN8v_logf", 8) + +TLI_DEFINE_VECFUNC("__log_finite", "_ZGVbN2v___log_finite", 2) +TLI_DEFINE_VECFUNC("__log_finite", "_ZGVdN4v___log_finite", 4) + +TLI_DEFINE_VECFUNC("__logf_finite", "_ZGVbN4v___logf_finite", 4) +TLI_DEFINE_VECFUNC("__logf_finite", "_ZGVdN8v___logf_finite", 8) + +TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVbN2v_log", 2) +TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVdN4v_log", 4) + +TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVbN4v_logf", 4) +TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVdN8v_logf", 8) + #elif defined(TLI_DEFINE_MASSV_VECFUNCS) // IBM MASS library's vector Functions @@ -339,6 +420,7 @@ #undef TLI_DEFINE_VECFUNC #undef TLI_DEFINE_ACCELERATE_VECFUNCS +#undef TLI_DEFINE_LIBMVEC_X86_VECFUNCS #undef TLI_DEFINE_MASSV_VECFUNCS #undef TLI_DEFINE_SVML_VECFUNCS #undef TLI_DEFINE_MASSV_VECFUNCS_NAMES Index: llvm/lib/Analysis/TargetLibraryInfo.cpp =================================================================== --- llvm/lib/Analysis/TargetLibraryInfo.cpp +++ llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -24,6 +24,8 @@ "No vector functions library"), clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate", "Accelerate framework"), + clEnumValN(TargetLibraryInfoImpl::LIBMVEC_X86, "LIBMVEC-X86", + "GLIBC Vector Math library"), clEnumValN(TargetLibraryInfoImpl::MASSV, "MASSV", "IBM MASS vector library"), clEnumValN(TargetLibraryInfoImpl::SVML, "SVML", @@ -1559,6 +1561,14 @@ addVectorizableFunctions(VecFuncs); break; } + case LIBMVEC_X86: { + const VecDesc VecFuncs[] = { + #define TLI_DEFINE_LIBMVEC_X86_VECFUNCS + #include "llvm/Analysis/VecFuncs.def" + }; + addVectorizableFunctions(VecFuncs); + break; + } case MASSV: { const VecDesc VecFuncs[] = { #define TLI_DEFINE_MASSV_VECFUNCS Index: llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-finite.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-finite.ll @@ -0,0 +1,484 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -vector-library=LIBMVEC-X86 -inject-tli-mappings -loop-vectorize -S < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare float @__expf_finite(float) #0 + +define void @exp_f32(float* nocapture %varray) { +; CHECK-LABEL: @exp_f32( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 +; CHECK-NEXT: [[STEP_ADD3:%.*]] = add <4 x i32> [[VEC_IND2]], +; CHECK-NEXT: [[TMP8:%.*]] = sitofp <4 x i32> [[VEC_IND2]] to <4 x float> +; CHECK-NEXT: [[TMP9:%.*]] = sitofp <4 x i32> [[STEP_ADD3]] to <4 x float> +; CHECK-NEXT: [[TMP10:%.*]] = call fast <4 x float> @_ZGVbN4v___expf_finite(<4 x float> [[TMP8]]) +; CHECK-NEXT: [[TMP11:%.*]] = call fast <4 x float> @_ZGVbN4v___expf_finite(<4 x float> [[TMP9]]) +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, float* [[VARRAY:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[VARRAY]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast float* [[TMP14]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP10]], <4 x float>* [[TMP15]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 4 +; CHECK-NEXT: [[TMP17:%.*]] = bitcast float* [[TMP16]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP11]], <4 x float>* [[TMP17]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT5]] = add <4 x i32> [[STEP_ADD3]], +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to float +; CHECK-NEXT: [[CALL:%.*]] = tail call fast float @__expf_finite(float [[CONV]]) [[ATTR0:#.*]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[VARRAY]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store float [[CALL]], float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call fast float @__expf_finite(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv + store float %call, float* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1 + +for.end: ; preds = %for.body + ret void +} + +!1 = distinct !{!1, !2, !3} +!2 = !{!"llvm.loop.vectorize.width", i32 4} +!3 = !{!"llvm.loop.vectorize.enable", i1 true} + + +declare double @__exp_finite(double) #0 + +define void @exp_f64(double* nocapture %varray) { +; CHECK-LABEL: @exp_f64( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = sitofp <4 x i32> [[VEC_IND1]] to <4 x double> +; CHECK-NEXT: [[TMP5:%.*]] = call fast <4 x double> @_ZGVdN4v___exp_finite(<4 x double> [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[VARRAY:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast double* [[TMP7]] to <4 x double>* +; CHECK-NEXT: store <4 x double> [[TMP5]], <4 x double>* [[TMP8]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to double +; CHECK-NEXT: [[CALL:%.*]] = tail call fast double @__exp_finite(double [[CONV]]) [[ATTR1:#.*]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[VARRAY]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store double [[CALL]], double* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP5:!llvm.loop !.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call fast double @__exp_finite(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv + store double %call, double* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !11 + +for.end: ; preds = %for.body + ret void +} + +!11 = distinct !{!11, !12, !13} +!12 = !{!"llvm.loop.vectorize.width", i32 4} +!13 = !{!"llvm.loop.vectorize.enable", i1 true} + + + + +declare float @__logf_finite(float) #0 + +define void @log_f32(float* nocapture %varray) { +; CHECK-LABEL: @log_f32( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 +; CHECK-NEXT: [[STEP_ADD3:%.*]] = add <4 x i32> [[VEC_IND2]], +; CHECK-NEXT: [[TMP8:%.*]] = sitofp <4 x i32> [[VEC_IND2]] to <4 x float> +; CHECK-NEXT: [[TMP9:%.*]] = sitofp <4 x i32> [[STEP_ADD3]] to <4 x float> +; CHECK-NEXT: [[TMP10:%.*]] = call fast <4 x float> @_ZGVbN4v___logf_finite(<4 x float> [[TMP8]]) +; CHECK-NEXT: [[TMP11:%.*]] = call fast <4 x float> @_ZGVbN4v___logf_finite(<4 x float> [[TMP9]]) +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, float* [[VARRAY:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[VARRAY]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast float* [[TMP14]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP10]], <4 x float>* [[TMP15]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 4 +; CHECK-NEXT: [[TMP17:%.*]] = bitcast float* [[TMP16]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP11]], <4 x float>* [[TMP17]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT5]] = add <4 x i32> [[STEP_ADD3]], +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to float +; CHECK-NEXT: [[CALL:%.*]] = tail call fast float @__logf_finite(float [[CONV]]) [[ATTR2:#.*]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[VARRAY]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store float [[CALL]], float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP7:!llvm.loop !.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call fast float @__logf_finite(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv + store float %call, float* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21 + +for.end: ; preds = %for.body + ret void +} + +!21 = distinct !{!21, !22, !23} +!22 = !{!"llvm.loop.vectorize.width", i32 4} +!23 = !{!"llvm.loop.vectorize.enable", i1 true} + + +declare double @__log_finite(double) #0 + +define void @log_f64(double* nocapture %varray) { +; CHECK-LABEL: @log_f64( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = sitofp <4 x i32> [[VEC_IND1]] to <4 x double> +; CHECK-NEXT: [[TMP5:%.*]] = call fast <4 x double> @_ZGVdN4v___log_finite(<4 x double> [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[VARRAY:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast double* [[TMP7]] to <4 x double>* +; CHECK-NEXT: store <4 x double> [[TMP5]], <4 x double>* [[TMP8]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to double +; CHECK-NEXT: [[CALL:%.*]] = tail call fast double @__log_finite(double [[CONV]]) [[ATTR3:#.*]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[VARRAY]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store double [[CALL]], double* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP9:!llvm.loop !.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call fast double @__log_finite(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv + store double %call, double* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31 + +for.end: ; preds = %for.body + ret void +} + +!31 = distinct !{!31, !32, !33} +!32 = !{!"llvm.loop.vectorize.width", i32 4} +!33 = !{!"llvm.loop.vectorize.enable", i1 true} + + +declare float @__powf_finite(float, float) #0 + +define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) { +; CHECK-LABEL: @pow_f32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VARRAY1:%.*]] = bitcast float* [[VARRAY:%.*]] to i8* +; CHECK-NEXT: [[EXP3:%.*]] = bitcast float* [[EXP:%.*]] to i8* +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[VARRAY]], i64 1000 +; CHECK-NEXT: [[SCEVGEP2:%.*]] = bitcast float* [[SCEVGEP]] to i8* +; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr float, float* [[EXP]], i64 1000 +; CHECK-NEXT: [[SCEVGEP45:%.*]] = bitcast float* [[SCEVGEP4]] to i8* +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[VARRAY1]], [[SCEVGEP45]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[EXP3]], [[SCEVGEP2]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[FOUND_CONFLICT]], true +; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND6:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = sitofp <4 x i32> [[VEC_IND6]] to <4 x float> +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[EXP]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP7]], align 4, !alias.scope !10 +; CHECK-NEXT: [[TMP8:%.*]] = call fast <4 x float> @_ZGVbN4vv___powf_finite(<4 x float> [[TMP4]], <4 x float> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[VARRAY]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[TMP10]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP8]], <4 x float>* [[TMP11]], align 4, !alias.scope !13, !noalias !10 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT7]] = add <4 x i32> [[VEC_IND6]], +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP15:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to float +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[EXP]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @__powf_finite(float [[CONV]], float [[TMP1]]) [[ATTR4:#.*]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[VARRAY]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store float [[TMP2]], float* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP16:!llvm.loop !.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %arrayidx = getelementptr inbounds float, float* %exp, i64 %indvars.iv + %tmp1 = load float, float* %arrayidx, align 4 + %tmp2 = tail call fast float @__powf_finite(float %conv, float %tmp1) + %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %indvars.iv + store float %tmp2, float* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !41 + +for.end: ; preds = %for.body + ret void +} + +!41 = distinct !{!41, !42, !43} +!42 = !{!"llvm.loop.vectorize.width", i32 4} +!43 = !{!"llvm.loop.vectorize.enable", i1 true} + + +declare double @__pow_finite(double, double) #0 + +define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) { +; CHECK-LABEL: @pow_f64( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VARRAY1:%.*]] = bitcast double* [[VARRAY:%.*]] to i8* +; CHECK-NEXT: [[EXP3:%.*]] = bitcast double* [[EXP:%.*]] to i8* +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[VARRAY]], i64 1000 +; CHECK-NEXT: [[SCEVGEP2:%.*]] = bitcast double* [[SCEVGEP]] to i8* +; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr double, double* [[EXP]], i64 1000 +; CHECK-NEXT: [[SCEVGEP45:%.*]] = bitcast double* [[SCEVGEP4]] to i8* +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[VARRAY1]], [[SCEVGEP45]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[EXP3]], [[SCEVGEP2]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[FOUND_CONFLICT]], true +; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND6:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = sitofp <4 x i32> [[VEC_IND6]] to <4 x double> +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, double* [[EXP]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[TMP6]] to <4 x double>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[TMP7]], align 4, !alias.scope !17 +; CHECK-NEXT: [[TMP8:%.*]] = call fast <4 x double> @_ZGVdN4vv___pow_finite(<4 x double> [[TMP4]], <4 x double> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, double* [[VARRAY]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, double* [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast double* [[TMP10]] to <4 x double>* +; CHECK-NEXT: store <4 x double> [[TMP8]], <4 x double>* [[TMP11]], align 4, !alias.scope !20, !noalias !17 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT7]] = add <4 x i32> [[VEC_IND6]], +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP22:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to double +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[EXP]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load double, double* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast double @__pow_finite(double [[CONV]], double [[TMP1]]) [[ATTR5:#.*]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[VARRAY]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store double [[TMP2]], double* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP23:!llvm.loop !.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to double + %arrayidx = getelementptr inbounds double, double* %exp, i64 %indvars.iv + %tmp1 = load double, double* %arrayidx, align 4 + %tmp2 = tail call fast double @__pow_finite(double %conv, double %tmp1) + %arrayidx2 = getelementptr inbounds double, double* %varray, i64 %indvars.iv + store double %tmp2, double* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !51 + +for.end: ; preds = %for.body + ret void +} + +!51 = distinct !{!51, !52, !53} +!52 = !{!"llvm.loop.vectorize.width", i32 4} +!53 = !{!"llvm.loop.vectorize.enable", i1 true} + Index: llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll @@ -0,0 +1,1010 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -vector-library=LIBMVEC-X86 -inject-tli-mappings -loop-vectorize -S < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare double @sin(double) #0 +declare float @sinf(float) #0 +declare double @llvm.sin.f64(double) #0 +declare float @llvm.sin.f32(float) #0 + +declare double @cos(double) #0 +declare float @cosf(float) #0 +declare double @llvm.cos.f64(double) #0 +declare float @llvm.cos.f32(float) #0 + +define void @sin_f64(double* nocapture %varray) { +; CHECK-LABEL: @sin_f64( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = sitofp <4 x i32> [[VEC_IND1]] to <4 x double> +; CHECK-NEXT: [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_sin(<4 x double> [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[VARRAY:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast double* [[TMP7]] to <4 x double>* +; CHECK-NEXT: store <4 x double> [[TMP5]], <4 x double>* [[TMP8]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to double +; CHECK-NEXT: [[CALL:%.*]] = tail call double @sin(double [[CONV]]) [[ATTR2:#.*]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[VARRAY]], i64 [[IV]] +; CHECK-NEXT: store double [[CALL]], double* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @sin(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1 + +for.end: + ret void +} + +!1 = distinct !{!1, !2, !3} +!2 = !{!"llvm.loop.vectorize.width", i32 4} +!3 = !{!"llvm.loop.vectorize.enable", i1 true} + + +define void @sin_f32(float* nocapture %varray) { +; CHECK-LABEL: @sin_f32( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 +; CHECK-NEXT: [[STEP_ADD3:%.*]] = add <4 x i32> [[VEC_IND2]], +; CHECK-NEXT: [[TMP8:%.*]] = sitofp <4 x i32> [[VEC_IND2]] to <4 x float> +; CHECK-NEXT: [[TMP9:%.*]] = sitofp <4 x i32> [[STEP_ADD3]] to <4 x float> +; CHECK-NEXT: [[TMP10:%.*]] = call <4 x float> @_ZGVbN4v_sinf(<4 x float> [[TMP8]]) +; CHECK-NEXT: [[TMP11:%.*]] = call <4 x float> @_ZGVbN4v_sinf(<4 x float> [[TMP9]]) +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, float* [[VARRAY:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[VARRAY]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast float* [[TMP14]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP10]], <4 x float>* [[TMP15]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 4 +; CHECK-NEXT: [[TMP17:%.*]] = bitcast float* [[TMP16]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP11]], <4 x float>* [[TMP17]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT5]] = add <4 x i32> [[STEP_ADD3]], +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to float +; CHECK-NEXT: [[CALL:%.*]] = tail call float @sinf(float [[CONV]]) [[ATTR3:#.*]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[VARRAY]], i64 [[IV]] +; CHECK-NEXT: store float [[CALL]], float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP5:!llvm.loop !.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @sinf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21 + +for.end: + ret void +} + +!21 = distinct !{!21, !22, !23} +!22 = !{!"llvm.loop.vectorize.width", i32 4} +!23 = !{!"llvm.loop.vectorize.enable", i1 true} + +define void @sin_f64_intrinsic(double* nocapture %varray) { +; CHECK-LABEL: @sin_f64_intrinsic( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = sitofp <4 x i32> [[VEC_IND1]] to <4 x double> +; CHECK-NEXT: [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_sin(<4 x double> [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[VARRAY:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast double* [[TMP7]] to <4 x double>* +; CHECK-NEXT: store <4 x double> [[TMP5]], <4 x double>* [[TMP8]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to double +; CHECK-NEXT: [[CALL:%.*]] = tail call double @llvm.sin.f64(double [[CONV]]) [[ATTR4:#.*]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[VARRAY]], i64 [[IV]] +; CHECK-NEXT: store double [[CALL]], double* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP7:!llvm.loop !.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.sin.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31 + +for.end: + ret void +} + +!31 = distinct !{!31, !32, !33} +!32 = !{!"llvm.loop.vectorize.width", i32 4} +!33 = !{!"llvm.loop.vectorize.enable", i1 true} + +define void @sin_f32_intrinsic(float* nocapture %varray) { +; CHECK-LABEL: @sin_f32_intrinsic( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 +; CHECK-NEXT: [[STEP_ADD3:%.*]] = add <4 x i32> [[VEC_IND2]], +; CHECK-NEXT: [[TMP8:%.*]] = sitofp <4 x i32> [[VEC_IND2]] to <4 x float> +; CHECK-NEXT: [[TMP9:%.*]] = sitofp <4 x i32> [[STEP_ADD3]] to <4 x float> +; CHECK-NEXT: [[TMP10:%.*]] = call <4 x float> @_ZGVbN4v_sinf(<4 x float> [[TMP8]]) +; CHECK-NEXT: [[TMP11:%.*]] = call <4 x float> @_ZGVbN4v_sinf(<4 x float> [[TMP9]]) +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, float* [[VARRAY:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[VARRAY]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast float* [[TMP14]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP10]], <4 x float>* [[TMP15]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 4 +; CHECK-NEXT: [[TMP17:%.*]] = bitcast float* [[TMP16]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP11]], <4 x float>* [[TMP17]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT5]] = add <4 x i32> [[STEP_ADD3]], +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to float +; CHECK-NEXT: [[CALL:%.*]] = tail call float @llvm.sin.f32(float [[CONV]]) [[ATTR5:#.*]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[VARRAY]], i64 [[IV]] +; CHECK-NEXT: store float [[CALL]], float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP9:!llvm.loop !.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.sin.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !41 + +for.end: + ret void +} + +!41 = distinct !{!41, !42, !43} +!42 = !{!"llvm.loop.vectorize.width", i32 4} +!43 = !{!"llvm.loop.vectorize.enable", i1 true} + +define void @cos_f64(double* nocapture %varray) { +; CHECK-LABEL: @cos_f64( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = sitofp <4 x i32> [[VEC_IND1]] to <4 x double> +; CHECK-NEXT: [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_cos(<4 x double> [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[VARRAY:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast double* [[TMP7]] to <4 x double>* +; CHECK-NEXT: store <4 x double> [[TMP5]], <4 x double>* [[TMP8]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to double +; CHECK-NEXT: [[CALL:%.*]] = tail call double @cos(double [[CONV]]) [[ATTR6:#.*]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[VARRAY]], i64 [[IV]] +; CHECK-NEXT: store double [[CALL]], double* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP11:!llvm.loop !.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @cos(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !51 + +for.end: + ret void +} + +!51 = distinct !{!51, !52, !53} +!52 = !{!"llvm.loop.vectorize.width", i32 4} +!53 = !{!"llvm.loop.vectorize.enable", i1 true} + +define void @cos_f32(float* nocapture %varray) { +; CHECK-LABEL: @cos_f32( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 +; CHECK-NEXT: [[STEP_ADD3:%.*]] = add <4 x i32> [[VEC_IND2]], +; CHECK-NEXT: [[TMP8:%.*]] = sitofp <4 x i32> [[VEC_IND2]] to <4 x float> +; CHECK-NEXT: [[TMP9:%.*]] = sitofp <4 x i32> [[STEP_ADD3]] to <4 x float> +; CHECK-NEXT: [[TMP10:%.*]] = call <4 x float> @_ZGVbN4v_cosf(<4 x float> [[TMP8]]) +; CHECK-NEXT: [[TMP11:%.*]] = call <4 x float> @_ZGVbN4v_cosf(<4 x float> [[TMP9]]) +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, float* [[VARRAY:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[VARRAY]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast float* [[TMP14]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP10]], <4 x float>* [[TMP15]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 4 +; CHECK-NEXT: [[TMP17:%.*]] = bitcast float* [[TMP16]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP11]], <4 x float>* [[TMP17]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT5]] = add <4 x i32> [[STEP_ADD3]], +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP12:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to float +; CHECK-NEXT: [[CALL:%.*]] = tail call float @cosf(float [[CONV]]) [[ATTR7:#.*]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[VARRAY]], i64 [[IV]] +; CHECK-NEXT: store float [[CALL]], float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP13:!llvm.loop !.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @cosf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !61 + +for.end: + ret void +} + +!61 = distinct !{!61, !62, !63} +!62 = !{!"llvm.loop.vectorize.width", i32 4} +!63 = !{!"llvm.loop.vectorize.enable", i1 true} + +define void @cos_f64_intrinsic(double* nocapture %varray) { +; CHECK-LABEL: @cos_f64_intrinsic( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = sitofp <4 x i32> [[VEC_IND1]] to <4 x double> +; CHECK-NEXT: [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_cos(<4 x double> [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[VARRAY:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast double* [[TMP7]] to <4 x double>* +; CHECK-NEXT: store <4 x double> [[TMP5]], <4 x double>* [[TMP8]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP14:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to double +; CHECK-NEXT: [[CALL:%.*]] = tail call double @llvm.cos.f64(double [[CONV]]) [[ATTR8:#.*]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[VARRAY]], i64 [[IV]] +; CHECK-NEXT: store double [[CALL]], double* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP15:!llvm.loop !.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.cos.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !71 + +for.end: + ret void +} + +!71 = distinct !{!71, !72, !73} +!72 = !{!"llvm.loop.vectorize.width", i32 4} +!73 = !{!"llvm.loop.vectorize.enable", i1 true} + +define void @cos_f32_intrinsic(float* nocapture %varray) { +; CHECK-LABEL: @cos_f32_intrinsic( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 +; CHECK-NEXT: [[STEP_ADD3:%.*]] = add <4 x i32> [[VEC_IND2]], +; CHECK-NEXT: [[TMP8:%.*]] = sitofp <4 x i32> [[VEC_IND2]] to <4 x float> +; CHECK-NEXT: [[TMP9:%.*]] = sitofp <4 x i32> [[STEP_ADD3]] to <4 x float> +; CHECK-NEXT: [[TMP10:%.*]] = call <4 x float> @_ZGVbN4v_cosf(<4 x float> [[TMP8]]) +; CHECK-NEXT: [[TMP11:%.*]] = call <4 x float> @_ZGVbN4v_cosf(<4 x float> [[TMP9]]) +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, float* [[VARRAY:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[VARRAY]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast float* [[TMP14]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP10]], <4 x float>* [[TMP15]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 4 +; CHECK-NEXT: [[TMP17:%.*]] = bitcast float* [[TMP16]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP11]], <4 x float>* [[TMP17]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT5]] = add <4 x i32> [[STEP_ADD3]], +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP16:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to float +; CHECK-NEXT: [[CALL:%.*]] = tail call float @llvm.cos.f32(float [[CONV]]) [[ATTR9:#.*]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[VARRAY]], i64 [[IV]] +; CHECK-NEXT: store float [[CALL]], float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP17:!llvm.loop !.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.cos.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !81 + +for.end: + ret void +} + +!81 = distinct !{!81, !82, !83} +!82 = !{!"llvm.loop.vectorize.width", i32 4} +!83 = !{!"llvm.loop.vectorize.enable", i1 true} + +declare float @expf(float) #0 + +define void @exp_f32(float* nocapture %varray) { +; CHECK-LABEL: @exp_f32( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 +; CHECK-NEXT: [[STEP_ADD3:%.*]] = add <4 x i32> [[VEC_IND2]], +; CHECK-NEXT: [[TMP8:%.*]] = sitofp <4 x i32> [[VEC_IND2]] to <4 x float> +; CHECK-NEXT: [[TMP9:%.*]] = sitofp <4 x i32> [[STEP_ADD3]] to <4 x float> +; CHECK-NEXT: [[TMP10:%.*]] = call fast <4 x float> @_ZGVbN4v_expf(<4 x float> [[TMP8]]) +; CHECK-NEXT: [[TMP11:%.*]] = call fast <4 x float> @_ZGVbN4v_expf(<4 x float> [[TMP9]]) +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, float* [[VARRAY:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[VARRAY]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast float* [[TMP14]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP10]], <4 x float>* [[TMP15]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 4 +; CHECK-NEXT: [[TMP17:%.*]] = bitcast float* [[TMP16]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP11]], <4 x float>* [[TMP17]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT5]] = add <4 x i32> [[STEP_ADD3]], +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP18:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to float +; CHECK-NEXT: [[CALL:%.*]] = tail call fast float @expf(float [[CONV]]) [[ATTR10:#.*]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[VARRAY]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store float [[CALL]], float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP19:!llvm.loop !.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call fast float @expf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv + store float %call, float* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !91 + +for.end: ; preds = %for.body + ret void +} + +!91 = distinct !{!91, !92, !93} +!92 = !{!"llvm.loop.vectorize.width", i32 4} +!93 = !{!"llvm.loop.vectorize.enable", i1 true} + +declare float @llvm.exp.f32(float) #0 + +define void @exp_f32_intrin(float* nocapture %varray) { +; CHECK-LABEL: @exp_f32_intrin( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 +; CHECK-NEXT: [[STEP_ADD3:%.*]] = add <4 x i32> [[VEC_IND2]], +; CHECK-NEXT: [[TMP8:%.*]] = sitofp <4 x i32> [[VEC_IND2]] to <4 x float> +; CHECK-NEXT: [[TMP9:%.*]] = sitofp <4 x i32> [[STEP_ADD3]] to <4 x float> +; CHECK-NEXT: [[TMP10:%.*]] = call fast <4 x float> @_ZGVbN4v_expf(<4 x float> [[TMP8]]) +; CHECK-NEXT: [[TMP11:%.*]] = call fast <4 x float> @_ZGVbN4v_expf(<4 x float> [[TMP9]]) +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, float* [[VARRAY:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[VARRAY]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast float* [[TMP14]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP10]], <4 x float>* [[TMP15]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 4 +; CHECK-NEXT: [[TMP17:%.*]] = bitcast float* [[TMP16]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP11]], <4 x float>* [[TMP17]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT5]] = add <4 x i32> [[STEP_ADD3]], +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP20:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to float +; CHECK-NEXT: [[CALL:%.*]] = tail call fast float @llvm.exp.f32(float [[CONV]]) [[ATTR11:#.*]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[VARRAY]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store float [[CALL]], float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP21:!llvm.loop !.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call fast float @llvm.exp.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv + store float %call, float* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !101 + +for.end: ; preds = %for.body + ret void +} + +!101 = distinct !{!101, !102, !103} +!102 = !{!"llvm.loop.vectorize.width", i32 4} +!103 = !{!"llvm.loop.vectorize.enable", i1 true} + +declare float @logf(float) #0 + +define void @log_f32(float* nocapture %varray) { +; CHECK-LABEL: @log_f32( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 +; CHECK-NEXT: [[STEP_ADD3:%.*]] = add <4 x i32> [[VEC_IND2]], +; CHECK-NEXT: [[TMP8:%.*]] = sitofp <4 x i32> [[VEC_IND2]] to <4 x float> +; CHECK-NEXT: [[TMP9:%.*]] = sitofp <4 x i32> [[STEP_ADD3]] to <4 x float> +; CHECK-NEXT: [[TMP10:%.*]] = call fast <4 x float> @_ZGVbN4v_logf(<4 x float> [[TMP8]]) +; CHECK-NEXT: [[TMP11:%.*]] = call fast <4 x float> @_ZGVbN4v_logf(<4 x float> [[TMP9]]) +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, float* [[VARRAY:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[VARRAY]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast float* [[TMP14]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP10]], <4 x float>* [[TMP15]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 4 +; CHECK-NEXT: [[TMP17:%.*]] = bitcast float* [[TMP16]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP11]], <4 x float>* [[TMP17]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT5]] = add <4 x i32> [[STEP_ADD3]], +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP22:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to float +; CHECK-NEXT: [[CALL:%.*]] = tail call fast float @logf(float [[CONV]]) [[ATTR12:#.*]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[VARRAY]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store float [[CALL]], float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP23:!llvm.loop !.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call fast float @logf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv + store float %call, float* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !111 + +for.end: ; preds = %for.body + ret void +} + +!111 = distinct !{!111, !112, !113} +!112 = !{!"llvm.loop.vectorize.width", i32 4} +!113 = !{!"llvm.loop.vectorize.enable", i1 true} + +declare float @powf(float, float) #0 + +define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) { +; CHECK-LABEL: @pow_f32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VARRAY1:%.*]] = bitcast float* [[VARRAY:%.*]] to i8* +; CHECK-NEXT: [[EXP3:%.*]] = bitcast float* [[EXP:%.*]] to i8* +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[VARRAY]], i64 1000 +; CHECK-NEXT: [[SCEVGEP2:%.*]] = bitcast float* [[SCEVGEP]] to i8* +; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr float, float* [[EXP]], i64 1000 +; CHECK-NEXT: [[SCEVGEP45:%.*]] = bitcast float* [[SCEVGEP4]] to i8* +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[VARRAY1]], [[SCEVGEP45]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[EXP3]], [[SCEVGEP2]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[FOUND_CONFLICT]], true +; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND6:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = sitofp <4 x i32> [[VEC_IND6]] to <4 x float> +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[EXP]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP7]], align 4, !alias.scope !24 +; CHECK-NEXT: [[TMP8:%.*]] = call fast <4 x float> @_ZGVbN4vv_powf(<4 x float> [[TMP4]], <4 x float> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[VARRAY]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[TMP10]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP8]], <4 x float>* [[TMP11]], align 4, !alias.scope !27, !noalias !24 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT7]] = add <4 x i32> [[VEC_IND6]], +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP29:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to float +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[EXP]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @powf(float [[CONV]], float [[TMP1]]) [[ATTR13:#.*]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[VARRAY]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store float [[TMP2]], float* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP30:!llvm.loop !.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %arrayidx = getelementptr inbounds float, float* %exp, i64 %indvars.iv + %tmp1 = load float, float* %arrayidx, align 4 + %tmp2 = tail call fast float @powf(float %conv, float %tmp1) + %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %indvars.iv + store float %tmp2, float* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !121 + +for.end: ; preds = %for.body + ret void +} + +!121 = distinct !{!121, !122, !123} +!122 = !{!"llvm.loop.vectorize.width", i32 4} +!123 = !{!"llvm.loop.vectorize.enable", i1 true} + +declare float @llvm.pow.f32(float, float) #0 + +define void @pow_f32_intrin(float* nocapture %varray, float* nocapture readonly %exp) { +; CHECK-LABEL: @pow_f32_intrin( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VARRAY1:%.*]] = bitcast float* [[VARRAY:%.*]] to i8* +; CHECK-NEXT: [[EXP3:%.*]] = bitcast float* [[EXP:%.*]] to i8* +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[VARRAY]], i64 1000 +; CHECK-NEXT: [[SCEVGEP2:%.*]] = bitcast float* [[SCEVGEP]] to i8* +; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr float, float* [[EXP]], i64 1000 +; CHECK-NEXT: [[SCEVGEP45:%.*]] = bitcast float* [[SCEVGEP4]] to i8* +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[VARRAY1]], [[SCEVGEP45]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[EXP3]], [[SCEVGEP2]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[FOUND_CONFLICT]], true +; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND6:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = sitofp <4 x i32> [[VEC_IND6]] to <4 x float> +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[EXP]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP7]], align 4, !alias.scope !31 +; CHECK-NEXT: [[TMP8:%.*]] = call fast <4 x float> @_ZGVbN4vv_powf(<4 x float> [[TMP4]], <4 x float> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[VARRAY]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[TMP10]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP8]], <4 x float>* [[TMP11]], align 4, !alias.scope !34, !noalias !31 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT7]] = add <4 x i32> [[VEC_IND6]], +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP36:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to float +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[EXP]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.pow.f32(float [[CONV]], float [[TMP1]]) [[ATTR14:#.*]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[VARRAY]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store float [[TMP2]], float* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP37:!llvm.loop !.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %arrayidx = getelementptr inbounds float, float* %exp, i64 %indvars.iv + %tmp1 = load float, float* %arrayidx, align 4 + %tmp2 = tail call fast float @llvm.pow.f32(float %conv, float %tmp1) + %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %indvars.iv + store float %tmp2, float* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !131 + +for.end: ; preds = %for.body + ret void +} + +!131 = distinct !{!131, !132, !133} +!132 = !{!"llvm.loop.vectorize.width", i32 4} +!133 = !{!"llvm.loop.vectorize.enable", i1 true} + +attributes #0 = { nounwind readnone } Index: llvm/test/Transforms/Util/add-TLI-mappings.ll =================================================================== --- llvm/test/Transforms/Util/add-TLI-mappings.ll +++ llvm/test/Transforms/Util/add-TLI-mappings.ll @@ -3,6 +3,8 @@ ; RUN: opt -vector-library=MASSV -inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,MASSV ; RUN: opt -vector-library=MASSV -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,MASSV ; RUN: opt -vector-library=Accelerate -inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,ACCELERATE +; RUN: opt -vector-library=LIBMVEC-X86 -inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,LIBMVEC-X86 +; RUN: opt -vector-library=LIBMVEC-X86 -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,LIBMVEC-X86 ; RUN: opt -vector-library=Accelerate -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,ACCELERATE target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -21,6 +23,9 @@ ; MASSV-SAME: i8* bitcast (<4 x float> (<4 x float>)* @__log10f4_massv to i8*) ; ACCELERATE-SAME: [1 x i8*] [ ; ACCELERATE-SAME: i8* bitcast (<4 x float> (<4 x float>)* @vlog10f to i8*) +; LIBMVEC-X86-SAME: [2 x i8*] [ +; LIBMVEC-X86-SAME: i8* bitcast (<2 x double> (<2 x double>)* @_ZGVbN2v_sin to i8*), +; LIBMVEC-X86-SAME: i8* bitcast (<4 x double> (<4 x double>)* @_ZGVdN4v_sin to i8*) ; COMMON-SAME: ], section "llvm.metadata" define double @sin_f64(double %in) { @@ -28,6 +33,7 @@ ; SVML: call double @sin(double %{{.*}}) #[[SIN:[0-9]+]] ; MASSV: call double @sin(double %{{.*}}) #[[SIN:[0-9]+]] ; ACCELERATE: call double @sin(double %{{.*}}) +; LIBMVEC-X86: call double @sin(double %{{.*}}) #[[SIN:[0-9]+]] ; No mapping of "sin" to a vector function for Accelerate. ; ACCELERATE-NOT: _ZGV_LLVM_{{.*}}_sin({{.*}}) %call = tail call double @sin(double %in) @@ -39,10 +45,12 @@ define float @call_llvm.log10.f32(float %in) { ; COMMON-LABEL: @call_llvm.log10.f32( ; SVML: call float @llvm.log10.f32(float %{{.*}}) +; LIBMVEC-X86: call float @llvm.log10.f32(float %{{.*}}) ; MASSV: call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]] ; ACCELERATE: call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]] ; No mapping of "llvm.log10.f32" to a vector function for SVML. ; SVML-NOT: _ZGV_LLVM_{{.*}}_llvm.log10.f32({{.*}}) +; LIBMVEC-X86-NOT: _ZGV_LLVM_{{.*}}_llvm.log10.f32({{.*}}) %call = tail call float @llvm.log10.f32(float %in) ret float %call } @@ -62,3 +70,7 @@ ; ACCELERATE: attributes #[[LOG10]] = { "vector-function-abi-variant"= ; ACCELERATE-SAME: "_ZGV_LLVM_N4v_llvm.log10.f32(vlog10f)" } + +; LIBMVEC-X86: attributes #[[SIN]] = { "vector-function-abi-variant"= +; LIBMVEC-X86-SAME: "_ZGV_LLVM_N2v_sin(_ZGVbN2v_sin), +; LIBMVEC-X86-SAME: _ZGV_LLVM_N4v_sin(_ZGVdN4v_sin)" }