Index: llvm/include/llvm/Analysis/LoopAccessAnalysis.h =================================================================== --- llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -44,6 +44,8 @@ static unsigned VectorizationInterleave; /// True if force-vector-interleave was specified by the user. static bool isInterleaveForced(); + /// True if force-vector-width was specified by the user. + static bool isVFForced(); /// \When performing memory disambiguation checks at runtime do not /// make more than this number of comparisons. Index: llvm/lib/Analysis/LoopAccessAnalysis.cpp =================================================================== --- llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -133,6 +133,10 @@ return ::VectorizationInterleave.getNumOccurrences() > 0; } +bool VectorizerParams::isVFForced() { + return ::VectorizationFactor.getNumOccurrences() > 0; +} + Value *llvm::stripIntegerCast(Value *V) { if (auto *CI = dyn_cast(V)) if (CI->getOperand(0)->getType()->isIntegerTy()) Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5234,9 +5234,20 @@ return None; } + auto MaxVF = computeFeasibleMaxVF(TC); + if (UserVF) { + if (!VectorizerParams::isVFForced() && UserVF > MaxVF) + LLVM_DEBUG( + dbgs() << "LV: User VF=" << UserVF + << " is unsafe, using maximum safe VF=" << MaxVF + << ". This can be overridden with '-force-vector-width=X'.\n"); + else + MaxVF = UserVF; + } + switch (ScalarEpilogueStatus) { case CM_ScalarEpilogueAllowed: - return UserVF ? UserVF : computeFeasibleMaxVF(TC); + return MaxVF; case CM_ScalarEpilogueNotNeededUsePredicate: LLVM_DEBUG( dbgs() << "LV: vector predicate hint/switch found.\n" @@ -5272,8 +5283,7 @@ InterleaveInfo.invalidateGroupsRequiringScalarEpilogue(); } - unsigned MaxVF = UserVF ? UserVF : computeFeasibleMaxVF(TC); - assert((UserVF || isPowerOf2_32(MaxVF)) && "MaxVF must be a power of 2"); + assert(isPowerOf2_32(MaxVF) && "MaxVF must be a power of 2"); unsigned MaxVFtimesIC = UserIC ? MaxVF * UserIC : MaxVF; if (TC > 0 && TC % MaxVFtimesIC == 0) { // Accept MaxVF if we do not have a tail. @@ -7000,7 +7010,11 @@ CM.invalidateCostModelingDecisions(); } - if (!UserVF.isZero()) { + unsigned MaxVF = MaybeMaxVF.getValue(); + assert(MaxVF != 0 && "MaxVF is zero."); + + if (!UserVF.isZero() && + (UserVF.getKnownMinValue() <= MaxVF || VectorizerParams::isVFForced())) { LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n"); assert(isPowerOf2_32(UserVF.getKnownMinValue()) && "VF needs to be a power of two"); @@ -7014,9 +7028,6 @@ return {{UserVF, 0}}; } - unsigned MaxVF = MaybeMaxVF.getValue(); - assert(MaxVF != 0 && "MaxVF is zero."); - for (unsigned VF = 1; VF <= MaxVF; VF *= 2) { // Collect Uniform and Scalar instructions after vectorization with VF. CM.collectUniformsAndScalars(ElementCount::getFixed(VF)); Index: llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll +++ llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll @@ -1,12 +1,13 @@ -; RUN: opt -vector-library=LIBMVEC-X86 -inject-tli-mappings -loop-vectorize -S < %s | FileCheck %s +; RUN: opt -vector-library=LIBMVEC-X86 -inject-tli-mappings -loop-vectorize -S < %s | FileCheck %s --check-prefix=CHECK-VF2 +; RUN: opt -vector-library=LIBMVEC-X86 -inject-tli-mappings -loop-vectorize -force-vector-width=8 -S < %s | FileCheck %s --check-prefix=CHECK-VF8 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" define void @sin_f64(double* nocapture %varray) { -; CHECK-LABEL: @sin_f64( -; CHECK-LABEL: vector.body -; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVbN2v_sin(<2 x double> [[TMP4:%.*]]) +; CHECK-VF2-LABEL: @sin_f64( +; CHECK-VF2-LABEL: vector.body +; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @_ZGVbN2v_sin(<2 x double> [[TMP4:%.*]]) ; entry: br label %for.body @@ -32,9 +33,9 @@ define void @sin_f32(float* nocapture %varray) { -; CHECK-LABEL: @sin_f32( -; CHECK-LABEL: vector.body -; CHECK: [[TMP5:%.*]] = call <8 x float> @_ZGVdN8v_sinf(<8 x float> [[TMP4:%.*]]) +; CHECK-VF8-LABEL: @sin_f32( +; CHECK-VF8-LABEL: vector.body +; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @_ZGVdN8v_sinf(<8 x float> [[TMP4:%.*]]) ; entry: br label %for.body @@ -48,16 +49,12 @@ store float %call, float* %arrayidx, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21 + br i1 %exitcond, label %for.end, label %for.body for.end: ret void } -!21 = distinct !{!21, !22, !23} -!22 = !{!"llvm.loop.vectorize.width", i32 8} -!23 = !{!"llvm.loop.vectorize.enable", i1 true} - define void @sin_f64_intrinsic(double* nocapture %varray) { ; CHECK-LABEL: @sin_f64_intrinsic( ; CHECK-LABEL: vector.body @@ -86,9 +83,9 @@ !33 = !{!"llvm.loop.vectorize.enable", i1 true} define void @sin_f32_intrinsic(float* nocapture %varray) { -; CHECK-LABEL: @sin_f32_intrinsic( -; CHECK-LABEL: vector.body -; CHECK: [[TMP5:%.*]] = call <8 x float> @_ZGVdN8v_sinf(<8 x float> [[TMP4:%.*]]) +; CHECK-VF8-LABEL: @sin_f32_intrinsic( +; CHECK-VF8-LABEL: vector.body +; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @_ZGVdN8v_sinf(<8 x float> [[TMP4:%.*]]) ; entry: br label %for.body @@ -102,16 +99,12 @@ store float %call, float* %arrayidx, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !41 + br i1 %exitcond, label %for.end, label %for.body for.end: ret void } -!41 = distinct !{!41, !42, !43} -!42 = !{!"llvm.loop.vectorize.width", i32 8} -!43 = !{!"llvm.loop.vectorize.enable", i1 true} - define void @cos_f64(double* nocapture %varray) { ; CHECK-LABEL: @cos_f64( ; CHECK-LABEL: vector.body @@ -140,9 +133,9 @@ !53 = !{!"llvm.loop.vectorize.enable", i1 true} define void @cos_f32(float* nocapture %varray) { -; CHECK-LABEL: @cos_f32( -; CHECK-LABEL: vector.body -; CHECK: [[TMP5:%.*]] = call <8 x float> @_ZGVdN8v_cosf(<8 x float> [[TMP4:%.*]]) +; CHECK-VF8-LABEL: @cos_f32( +; CHECK-VF8-LABEL: vector.body +; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @_ZGVdN8v_cosf(<8 x float> [[TMP4:%.*]]) ; entry: br label %for.body @@ -156,16 +149,12 @@ store float %call, float* %arrayidx, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !61 + br i1 %exitcond, label %for.end, label %for.body for.end: ret void } -!61 = distinct !{!61, !62, !63} -!62 = !{!"llvm.loop.vectorize.width", i32 8} -!63 = !{!"llvm.loop.vectorize.enable", i1 true} - define void @cos_f64_intrinsic(double* nocapture %varray) { ; CHECK-LABEL: @cos_f64_intrinsic( ; CHECK-LABEL: vector.body @@ -194,9 +183,9 @@ !73 = !{!"llvm.loop.vectorize.enable", i1 true} define void @cos_f32_intrinsic(float* nocapture %varray) { -; CHECK-LABEL: @cos_f32_intrinsic( -; CHECK-LABEL: vector.body -; CHECK: [[TMP5:%.*]] = call <8 x float> @_ZGVdN8v_cosf(<8 x float> [[TMP4:%.*]]) +; CHECK-VF8-LABEL: @cos_f32_intrinsic( +; CHECK-VF8-LABEL: vector.body +; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @_ZGVdN8v_cosf(<8 x float> [[TMP4:%.*]]) ; entry: br label %for.body @@ -210,21 +199,16 @@ store float %call, float* %arrayidx, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !81 + br i1 %exitcond, label %for.end, label %for.body for.end: ret void } -!81 = distinct !{!81, !82, !83} -!82 = !{!"llvm.loop.vectorize.width", i32 8} -!83 = !{!"llvm.loop.vectorize.enable", i1 true} - - define void @exp_f32(float* nocapture %varray) { -; CHECK-LABEL: @exp_f32 -; CHECK-LABEL: vector.body -; CHECK: <8 x float> @_ZGVdN8v_expf +; CHECK-VF8-LABEL: @exp_f32 +; CHECK-VF8-LABEL: vector.body +; CHECK-VF8: <8 x float> @_ZGVdN8v_expf entry: br label %for.body @@ -237,20 +221,16 @@ store float %call, float* %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !91 + br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body ret void } -!91 = distinct !{!91, !92, !93} -!92 = !{!"llvm.loop.vectorize.width", i32 8} -!93 = !{!"llvm.loop.vectorize.enable", i1 true} - define void @exp_f32_intrin(float* nocapture %varray) { -; CHECK-LABEL: @exp_f32_intrin -; CHECK-LABEL: vector.body -; CHECK: <8 x float> @_ZGVdN8v_expf +; CHECK-VF8-LABEL: @exp_f32_intrin +; CHECK-VF8-LABEL: vector.body +; CHECK-VF8: <8 x float> @_ZGVdN8v_expf entry: br label %for.body @@ -263,21 +243,16 @@ store float %call, float* %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !101 + br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body ret void } -!101 = distinct !{!101, !102, !103} -!102 = !{!"llvm.loop.vectorize.width", i32 8} -!103 = !{!"llvm.loop.vectorize.enable", i1 true} - - define void @log_f32(float* nocapture %varray) { -; CHECK-LABEL: @log_f32 -; CHECK-LABEL: vector.body -; CHECK: <8 x float> @_ZGVdN8v_logf +; CHECK-VF8-LABEL: @log_f32 +; CHECK-VF8-LABEL: vector.body +; CHECK-VF8: <8 x float> @_ZGVdN8v_logf entry: br label %for.body @@ -290,20 +265,16 @@ store float %call, float* %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !111 + br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body ret void } -!111 = distinct !{!111, !112, !113} -!112 = !{!"llvm.loop.vectorize.width", i32 8} -!113 = !{!"llvm.loop.vectorize.enable", i1 true} - define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) { -; CHECK-LABEL: @pow_f32 -; CHECK-LABEL: vector.body -; CHECK: <8 x float> @_ZGVdN8vv_powf +; CHECK-VF8-LABEL: @pow_f32 +; CHECK-VF8-LABEL: vector.body +; CHECK-VF8: <8 x float> @_ZGVdN8vv_powf entry: br label %for.body @@ -318,20 +289,16 @@ store float %tmp2, float* %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !121 + br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body ret void } -!121 = distinct !{!121, !122, !123} -!122 = !{!"llvm.loop.vectorize.width", i32 8} -!123 = !{!"llvm.loop.vectorize.enable", i1 true} - define void @pow_f32_intrin(float* nocapture %varray, float* nocapture readonly %exp) { -; CHECK-LABEL: @pow_f32_intrin -; CHECK-LABEL: vector.body -; CHECK: <8 x float> @_ZGVdN8vv_powf +; CHECK-VF8-LABEL: @pow_f32_intrin +; CHECK-VF8-LABEL: vector.body +; CHECK-VF8: <8 x float> @_ZGVdN8vv_powf entry: br label %for.body @@ -346,16 +313,12 @@ store float %tmp2, float* %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !131 + br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body ret void } -!131 = distinct !{!131, !132, !133} -!132 = !{!"llvm.loop.vectorize.width", i32 8} -!133 = !{!"llvm.loop.vectorize.enable", i1 true} - attributes #0 = { nounwind readnone } declare double @sin(double) #0 Index: llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-finite.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-finite.ll +++ llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-finite.ll @@ -1,4 +1,5 @@ ; RUN: opt -vector-library=LIBMVEC-X86 -inject-tli-mappings -loop-vectorize -S < %s | FileCheck %s +; RUN: opt -vector-library=LIBMVEC-X86 -inject-tli-mappings -loop-vectorize -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-FORCE-VF4 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -30,10 +31,10 @@ !3 = !{!"llvm.loop.vectorize.enable", i1 true} define void @exp_f64(double* nocapture %varray) { -; CHECK-LABEL: @exp_f64 -; CHECK-LABEL: vector.body -; CHECK: <4 x double> @_ZGVdN4v___exp_finite -; CHECK: ret +; CHECK-FORCE-VF4-LABEL: @exp_f64 +; CHECK-FORCE-VF4-LABEL: vector.body +; CHECK-FORCE-VF4: <4 x double> @_ZGVdN4v___exp_finite +; CHECK-FORCE-VF4: ret entry: br label %for.body @@ -46,16 +47,12 @@ store double %call, double* %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !11 + br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body ret void } -!11 = distinct !{!11, !12, !13} -!12 = !{!"llvm.loop.vectorize.width", i32 4} -!13 = !{!"llvm.loop.vectorize.enable", i1 true} - define void @log_f32(float* nocapture %varray) { ; CHECK-LABEL: @log_f32 ; CHECK-LABEL: vector.body @@ -84,10 +81,10 @@ !23 = !{!"llvm.loop.vectorize.enable", i1 true} define void @log_f64(double* nocapture %varray) { -; CHECK-LABEL: @log_f64 -; CHECK-LABEL: vector.body -; CHECK: <4 x double> @_ZGVdN4v___log_finite -; CHECK: ret +; CHECK-FORCE-VF4-LABEL: @log_f64 +; CHECK-FORCE-VF4-LABEL: vector.body +; CHECK-FORCE-VF4: <4 x double> @_ZGVdN4v___log_finite +; CHECK-FORCE-VF4: ret entry: br label %for.body @@ -100,16 +97,12 @@ store double %call, double* %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31 + br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body ret void } -!31 = distinct !{!31, !32, !33} -!32 = !{!"llvm.loop.vectorize.width", i32 4} -!33 = !{!"llvm.loop.vectorize.enable", i1 true} - define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) { ; CHECK-LABEL: @pow_f32 ; CHECK-LABEL: vector.body @@ -140,10 +133,10 @@ !43 = !{!"llvm.loop.vectorize.enable", i1 true} define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) { -; CHECK-LABEL: @pow_f64 -; CHECK-LABEL: vector.body -; CHECK: <4 x double> @_ZGVdN4vv___pow_finite -; CHECK: ret +; CHECK-FORCE-VF4-LABEL: @pow_f64 +; CHECK-FORCE-VF4-LABEL: vector.body +; CHECK-FORCE-VF4: <4 x double> @_ZGVdN4vv___pow_finite +; CHECK-FORCE-VF4: ret entry: br label %for.body @@ -158,16 +151,12 @@ store double %tmp2, double* %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !51 + br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body ret void } -!51 = distinct !{!51, !52, !53} -!52 = !{!"llvm.loop.vectorize.width", i32 4} -!53 = !{!"llvm.loop.vectorize.enable", i1 true} - declare float @__expf_finite(float) #0 declare double @__exp_finite(double) #0 declare float @__logf_finite(float) #0 Index: llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll +++ llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll @@ -1,12 +1,13 @@ ; RUN: opt -vector-library=LIBMVEC-X86 -inject-tli-mappings -loop-vectorize -S < %s | FileCheck %s +; RUN: opt -vector-library=LIBMVEC-X86 -inject-tli-mappings -loop-vectorize -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-FORCE-VF4 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" define void @sin_f64(double* nocapture %varray) { -; CHECK-LABEL: @sin_f64( -; CHECK-LABEL: vector.body -; CHECK: [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_sin(<4 x double> [[TMP4:%.*]]) +; CHECK-FORCE-VF4-LABEL: @sin_f64( +; CHECK-FORCE-VF4-LABEL: vector.body +; CHECK-FORCE-VF4: [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_sin(<4 x double> [[TMP4:%.*]]) ; entry: br label %for.body @@ -20,17 +21,12 @@ store double %call, double* %arrayidx, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1 + br i1 %exitcond, label %for.end, label %for.body for.end: ret void } -!1 = distinct !{!1, !2, !3} -!2 = !{!"llvm.loop.vectorize.width", i32 4} -!3 = !{!"llvm.loop.vectorize.enable", i1 true} - - define void @sin_f32(float* nocapture %varray) { ; CHECK-LABEL: @sin_f32( ; CHECK-LABEL: vector.body @@ -59,9 +55,9 @@ !23 = !{!"llvm.loop.vectorize.enable", i1 true} define void @sin_f64_intrinsic(double* nocapture %varray) { -; CHECK-LABEL: @sin_f64_intrinsic( -; CHECK-LABEL: vector.body -; CHECK: [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_sin(<4 x double> [[TMP4:%.*]]) +; CHECK-FORCE-VF4-LABEL: @sin_f64_intrinsic( +; CHECK-FORCE-VF4-LABEL: vector.body +; CHECK-FORCE-VF4: [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_sin(<4 x double> [[TMP4:%.*]]) ; entry: br label %for.body @@ -75,16 +71,11 @@ store double %call, double* %arrayidx, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31 + br i1 %exitcond, label %for.end, label %for.body for.end: ret void } - -!31 = distinct !{!31, !32, !33} -!32 = !{!"llvm.loop.vectorize.width", i32 4} -!33 = !{!"llvm.loop.vectorize.enable", i1 true} - define void @sin_f32_intrinsic(float* nocapture %varray) { ; CHECK-LABEL: @sin_f32_intrinsic( ; CHECK-LABEL: vector.body @@ -113,9 +104,9 @@ !43 = !{!"llvm.loop.vectorize.enable", i1 true} define void @cos_f64(double* nocapture %varray) { -; CHECK-LABEL: @cos_f64( -; CHECK-LABEL: vector.body -; CHECK: [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_cos(<4 x double> [[TMP4:%.*]]) +; CHECK-FORCE-VF4-LABEL: @cos_f64( +; CHECK-FORCE-VF4-LABEL: vector.body +; CHECK-FORCE-VF4: [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_cos(<4 x double> [[TMP4:%.*]]) ; entry: br label %for.body @@ -129,16 +120,12 @@ store double %call, double* %arrayidx, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !51 + br i1 %exitcond, label %for.end, label %for.body for.end: ret void } -!51 = distinct !{!51, !52, !53} -!52 = !{!"llvm.loop.vectorize.width", i32 4} -!53 = !{!"llvm.loop.vectorize.enable", i1 true} - define void @cos_f32(float* nocapture %varray) { ; CHECK-LABEL: @cos_f32( ; CHECK-LABEL: vector.body @@ -167,9 +154,9 @@ !63 = !{!"llvm.loop.vectorize.enable", i1 true} define void @cos_f64_intrinsic(double* nocapture %varray) { -; CHECK-LABEL: @cos_f64_intrinsic( -; CHECK-LABEL: vector.body -; CHECK: [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_cos(<4 x double> [[TMP4:%.*]]) +; CHECK-FORCE-VF4-LABEL: @cos_f64_intrinsic( +; CHECK-FORCE-VF4-LABEL: vector.body +; CHECK-FORCE-VF4: [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_cos(<4 x double> [[TMP4:%.*]]) ; entry: br label %for.body @@ -183,16 +170,12 @@ store double %call, double* %arrayidx, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !71 + br i1 %exitcond, label %for.end, label %for.body for.end: ret void } -!71 = distinct !{!71, !72, !73} -!72 = !{!"llvm.loop.vectorize.width", i32 4} -!73 = !{!"llvm.loop.vectorize.enable", i1 true} - define void @cos_f32_intrinsic(float* nocapture %varray) { ; CHECK-LABEL: @cos_f32_intrinsic( ; CHECK-LABEL: vector.body Index: llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll +++ llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll @@ -1,4 +1,5 @@ ; RUN: opt -vector-library=SVML -inject-tli-mappings -loop-vectorize -S < %s | FileCheck %s +; RUN: opt -vector-library=SVML -inject-tli-mappings -loop-vectorize -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-FORCE-VF4 ; Test to verify that when math headers are built with ; __FINITE_MATH_ONLY__ enabled, causing use of ___finite @@ -38,9 +39,9 @@ declare double @__exp_finite(double) #0 -; CHECK-LABEL: @exp_f64 -; CHECK: <4 x double> @__svml_exp4 -; CHECK: ret +; CHECK-FORCE-VF4-LABEL: @exp_f64 +; CHECK-FORCE-VF4: <4 x double> @__svml_exp4 +; CHECK-FORCE-VF4: ret define void @exp_f64(double* nocapture %varray) { entry: br label %for.body @@ -54,19 +55,12 @@ store double %call, double* %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !11 + br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body ret void } -!11 = distinct !{!11, !12, !13} -!12 = !{!"llvm.loop.vectorize.width", i32 4} -!13 = !{!"llvm.loop.vectorize.enable", i1 true} - - - - declare float @__logf_finite(float) #0 ; CHECK-LABEL: @log_f32 @@ -98,9 +92,9 @@ declare double @__log_finite(double) #0 -; CHECK-LABEL: @log_f64 -; CHECK: <4 x double> @__svml_log4 -; CHECK: ret +; CHECK-FORCE-VF4-LABEL: @log_f64 +; CHECK-FORCE-VF4: <4 x double> @__svml_log4 +; CHECK-FORCE-VF4: ret define void @log_f64(double* nocapture %varray) { entry: br label %for.body @@ -114,17 +108,12 @@ store double %call, double* %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31 + br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body ret void } -!31 = distinct !{!31, !32, !33} -!32 = !{!"llvm.loop.vectorize.width", i32 4} -!33 = !{!"llvm.loop.vectorize.enable", i1 true} - - declare float @__powf_finite(float, float) #0 ; CHECK-LABEL: @pow_f32 @@ -158,9 +147,9 @@ declare double @__pow_finite(double, double) #0 -; CHECK-LABEL: @pow_f64 -; CHECK: <4 x double> @__svml_pow4 -; CHECK: ret +; CHECK-FORCE-VF4-LABEL: @pow_f64 +; CHECK-FORCE-VF4: <4 x double> @__svml_pow4 +; CHECK-FORCE-VF4: ret define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) { entry: br label %for.body @@ -176,16 +165,12 @@ store double %tmp2, double* %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !51 + br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body ret void } -!51 = distinct !{!51, !52, !53} -!52 = !{!"llvm.loop.vectorize.width", i32 4} -!53 = !{!"llvm.loop.vectorize.enable", i1 true} - declare float @__exp2f_finite(float) #0 define void @exp2f_finite(float* nocapture %varray) { @@ -218,9 +203,9 @@ declare double @__exp2_finite(double) #0 define void @exp2_finite(double* nocapture %varray) { -; CHECK-LABEL: @exp2_finite( -; CHECK: call <4 x double> @__svml_exp24(<4 x double> {{.*}}) -; CHECK: ret void +; CHECK-FORCE-VF4-LABEL: @exp2_finite( +; CHECK-FORCE-VF4: call <4 x double> @__svml_exp24(<4 x double> {{.*}}) +; CHECK-FORCE-VF4: ret void ; entry: br label %for.body @@ -234,16 +219,12 @@ store double %call, double* %arrayidx, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !71 + br i1 %exitcond, label %for.end, label %for.body for.end: ret void } -!71 = distinct !{!71, !72, !73} -!72 = !{!"llvm.loop.vectorize.width", i32 4} -!73 = !{!"llvm.loop.vectorize.enable", i1 true} - declare float @__log2f_finite(float) #0 ; CHECK-LABEL: @log2_f32 @@ -275,9 +256,9 @@ declare double @__log2_finite(double) #0 -; CHECK-LABEL: @log2_f64 -; CHECK: <4 x double> @__svml_log24 -; CHECK: ret +; CHECK-FORCE-VF4-LABEL: @log2_f64 +; CHECK-FORCE-VF4: <4 x double> @__svml_log24 +; CHECK-FORCE-VF4: ret define void @log2_f64(double* nocapture %varray) { entry: br label %for.body @@ -291,16 +272,12 @@ store double %call, double* %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31 + br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body ret void } -!91 = distinct !{!31, !32, !33} -!92 = !{!"llvm.loop.vectorize.width", i32 4} -!93 = !{!"llvm.loop.vectorize.enable", i1 true} - declare float @__log10f_finite(float) #0 ; CHECK-LABEL: @log10_f32 @@ -332,9 +309,9 @@ declare double @__log10_finite(double) #0 -; CHECK-LABEL: @log10_f64 -; CHECK: <4 x double> @__svml_log104 -; CHECK: ret +; CHECK-FORCE-VF4-LABEL: @log10_f64 +; CHECK-FORCE-VF4: <4 x double> @__svml_log104 +; CHECK-FORCE-VF4: ret define void @log10_f64(double* nocapture %varray) { entry: br label %for.body @@ -348,16 +325,12 @@ store double %call, double* %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31 + br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body ret void } -!111 = distinct !{!31, !32, !33} -!112 = !{!"llvm.loop.vectorize.width", i32 4} -!113 = !{!"llvm.loop.vectorize.enable", i1 true} - declare float @__sqrtf_finite(float) #0 ; CHECK-LABEL: @sqrt_f32 @@ -389,9 +362,9 @@ declare double @__sqrt_finite(double) #0 -; CHECK-LABEL: @sqrt_f64 -; CHECK: <4 x double> @__svml_sqrt4 -; CHECK: ret +; CHECK-FORCE-VF4-LABEL: @sqrt_f64 +; CHECK-FORCE-VF4: <4 x double> @__svml_sqrt4 +; CHECK-FORCE-VF4: ret define void @sqrt_f64(double* nocapture %varray) { entry: br label %for.body @@ -405,12 +378,8 @@ store double %call, double* %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31 + br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body ret void } - -!131 = distinct !{!31, !32, !33} -!132 = !{!"llvm.loop.vectorize.width", i32 4} -!133 = !{!"llvm.loop.vectorize.enable", i1 true} Index: llvm/test/Transforms/LoopVectorize/metadata-width.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/metadata-width.ll +++ llvm/test/Transforms/LoopVectorize/metadata-width.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -18,13 +18,10 @@ %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0 + br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body, %entry ret void } attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } - -!0 = !{!0, !1} -!1 = !{!"llvm.loop.vectorize.width", i32 8} Index: llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll +++ llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -loop-vectorize -S 2>&1 | FileCheck %s -; RUN: opt < %s -debugify -loop-vectorize -S | FileCheck %s -check-prefix DEBUGLOC +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S 2>&1 | FileCheck %s +; RUN: opt < %s -debugify -loop-vectorize -force-vector-width=4 -S | FileCheck %s -check-prefix DEBUGLOC target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; This test makes sure we don't duplicate the loop vectorizer's metadata Index: llvm/test/Transforms/LoopVectorize/runtime-check.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/runtime-check.ll +++ llvm/test/Transforms/LoopVectorize/runtime-check.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s -; RUN: opt < %s -loop-vectorize -disable-basic-aa -S -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s -check-prefix=FORCED_OPTSIZE +; RUN: opt < %s -loop-vectorize -force-vector-width=2 -disable-basic-aa -S -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s -check-prefix=FORCED_OPTSIZE target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -183,7 +183,7 @@ store i64 %add, i64* %arrayidx4, align 8 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 128 - br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !12 + br i1 %exitcond, label %for.cond.cleanup, label %for.body } !llvm.module.flags = !{!0, !1} @@ -204,6 +204,3 @@ splitDebugFilename: "abc.debug", emissionKind: 2) !10 = !DIFile(filename: "path/to/file", directory: "/path/to/dir") !11 = !{i32 2, !"Debug Info Version", i32 3} -!12 = distinct !{!12, !13, !14} -!13 = !{!"llvm.loop.vectorize.width", i32 2} -!14 = !{!"llvm.loop.vectorize.enable", i1 true} Index: llvm/test/Transforms/LoopVectorize/unsafe-vf-remark.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/unsafe-vf-remark.ll @@ -0,0 +1,44 @@ +; RUN: opt -loop-vectorize -debug-only=loop-vectorize -disable-output -S < %s 2>&1 | FileCheck %s + +; Make sure that we report unsafe user specified vectorization factor. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; void foo(int *a, int *b, int N) { +; #pragma clang loop vectorize(enable) vectorize_width(4) +; for (int i=0; i