diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -319,6 +319,8 @@ getDecisionAndClampRange(const std::function &Predicate, VFRange &Range); + bool isTooManyRuntimeChecks(); + protected: /// Collect the instructions from the original loop that would be trivially /// dead in the vectorized loop if generated. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7591,6 +7591,15 @@ return VectorizationFactor::Disabled(); } +bool LoopVectorizationPlanner::isTooManyRuntimeChecks() { + unsigned NumRuntimePointerChecks = Requirements.getNumRuntimePointerChecks(); + bool PragmaThresholdReached = + NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold; + bool ThresholdReached = + NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold; + return PragmaThresholdReached && ThresholdReached; +} + Optional LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { assert(OrigLoop->isInnermost() && "Inner loop expected."); @@ -10457,7 +10466,9 @@ if (MaybeVF) { VF = *MaybeVF; // Select the interleave count. - IC = CM.selectInterleaveCount(VF.Width, *VF.Cost.getValue()); + if (!LVP.isTooManyRuntimeChecks()) { + IC = CM.selectInterleaveCount(VF.Width, *VF.Cost.getValue()); + } } // Identify the diagnostic messages that should be produced. diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-pointer-runtime-check-unprofitable.ll b/llvm/test/Transforms/LoopVectorize/interleaved-pointer-runtime-check-unprofitable.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/interleaved-pointer-runtime-check-unprofitable.ll @@ -0,0 +1,111 @@ +; RUN: opt -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -S -loop-vectorize -debug-only=loop-vectorize < %s 2>&1 | FileCheck %s + +; CHECK-LABLE: LV: Checking a loop in "eddy_diff_caleddy_" +; CHECK: LV: Interleaving is not beneficial. + +define fastcc void @eddy_diff_caleddy_(i64* %wet_cl, i64* %web_cl, i64* %jtbu_cl, i64* %jbbu_cl, i64* %n2ht_cl, i64* %n2hb_cl, i64* %lwp_cl, i64* %0, double* %1, i64 %2, i64* %3, double* %4, i64* %5, double* %6, i64* %7, double* %8, i64* %9, double* %10, i64* %11, double* %12, i64* %13, double* %14, i64* %15, double* %16, i64* %17, double* %18, i64* %19, double* %20, i64* %21, double* %22, i64* %23, double* %24, i64* %25, double* %26, i64* %27, double* %28) { +L.LB7_2232.preheader: + br label %L.LB7_2232 + +L.LB7_2232: ; preds = %L.LB7_3015, %L.LB7_2232.preheader + br label %vector.ph + +vector.ph: ; preds = %L.LB7_2232 + br label %middle.block.unr-lcssa + +middle.block.unr-lcssa: ; preds = %vector.ph + br label %middle.block + +middle.block: ; preds = %middle.block.unr-lcssa + br label %L.LB7_3015 + +L.LB7_3015: ; preds = %middle.block + br i1 false, label %L.LB7_2238.preheader165, label %L.LB7_2232 + +L.LB7_2238.preheader165: ; preds = %L.LB7_3015 + br label %L.LB7_2238 + +L.LB7_2238: ; preds = %L.LB7_2242.loopexit, %L.LB7_2238.preheader165 + br label %L.LB7_2241.preheader + +L.LB7_2241.preheader: ; preds = %L.LB7_2238 + br label %L.LB7_2241 + +L.LB7_2241: ; preds = %L.LB7_2241, %L.LB7_2241.preheader + br i1 false, label %L.LB7_2242.loopexit.loopexit, label %L.LB7_2241 + +L.LB7_2242.loopexit.loopexit: ; preds = %L.LB7_2241 + br label %L.LB7_2242.loopexit + +L.LB7_2242.loopexit: ; preds = %L.LB7_2242.loopexit.loopexit + br i1 false, label %L.LB7_2249.preheader, label %L.LB7_2238 + +L.LB7_2249.preheader: ; preds = %L.LB7_2242.loopexit + %29 = mul i64 0, 0 + br label %L.LB7_2249 + +L.LB7_2249: ; preds = %L.LB7_2249, %L.LB7_2249.preheader + %indvars.iv774 = phi i64 [ 0, %L.LB7_2249.preheader ], [ %indvars.iv.next775, %L.LB7_2249 ] + %30 = add nsw i64 0, 0 + %31 = getelementptr i64, i64* %wet_cl, i64 undef + %32 = bitcast i64* %wet_cl to double* + store double 0.000000e+00, double* %1, align 8 + %33 = add i64 0, 0 + %34 = getelementptr i64, i64* %wet_cl, i64 %2 + %35 = bitcast i64* %wet_cl to double* + store double 0.000000e+00, double* %4, align 8 + %36 = add i64 %indvars.iv774, undef + %37 = getelementptr i64, i64* %wet_cl, i64 %36 + %38 = bitcast i64* %37 to double* + store double 0.000000e+00, double* %38, align 8 + %39 = getelementptr i64, i64* %wet_cl, i64 undef + %40 = bitcast i64* %wet_cl to double* + store double 0.000000e+00, double* %6, align 8 + %41 = getelementptr i64, i64* %wet_cl, i64 undef + %42 = bitcast i64* %wet_cl to double* + store double 0.000000e+00, double* %8, align 8 + %43 = getelementptr i64, i64* %wet_cl, i64 undef + %44 = bitcast i64* %wet_cl to double* + store double 0.000000e+00, double* %10, align 8 + %45 = getelementptr i64, i64* %wet_cl, i64 undef + %46 = bitcast i64* %wet_cl to double* + store double 0.000000e+00, double* %12, align 8 + %47 = getelementptr i64, i64* %wet_cl, i64 undef + %48 = bitcast i64* %wet_cl to double* + store double 0.000000e+00, double* %14, align 8 + %49 = getelementptr i64, i64* %wet_cl, i64 undef + %50 = bitcast i64* %wet_cl to double* + store double 0.000000e+00, double* %16, align 8 + %51 = getelementptr i64, i64* %wet_cl, i64 undef + %52 = bitcast i64* %wet_cl to double* + store double 0.000000e+00, double* %18, align 8 + %53 = getelementptr i64, i64* %wet_cl, i64 undef + %54 = bitcast i64* %wet_cl to double* + store double 0.000000e+00, double* %20, align 8 + %55 = add i64 %indvars.iv774, 0 + %56 = getelementptr i64, i64* %wet_cl, i64 %55 + %57 = bitcast i64* %56 to double* + store double 0.000000e+00, double* %57, align 8 + %58 = getelementptr i64, i64* %wet_cl, i64 undef + %59 = bitcast i64* %wet_cl to double* + store double 0.000000e+00, double* %22, align 8 + %60 = getelementptr i64, i64* %web_cl, i64 undef + %61 = bitcast i64* %wet_cl to double* + store double 0.000000e+00, double* %24, align 8 + %62 = getelementptr i64, i64* %web_cl, i64 %2 + %63 = bitcast i64* %wet_cl to double* + store double 0.000000e+00, double* %26, align 8 + %64 = getelementptr i64, i64* %web_cl, i64 %36 + %65 = bitcast i64* %64 to double* + store double 0.000000e+00, double* %65, align 8 + %66 = getelementptr i64, i64* %web_cl, i64 undef + %67 = bitcast i64* %wet_cl to double* + store double 0.000000e+00, double* %28, align 8 + %indvars.iv.next775 = add nuw nsw i64 %indvars.iv774, 1 + %exitcond778.not = icmp eq i64 %indvars.iv.next775, 0 + br i1 %exitcond778.not, label %L.LB7_2330.preheader, label %L.LB7_2249 + +L.LB7_2330.preheader: ; preds = %L.LB7_2249 + ret void +} +