Index: llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -224,6 +224,10 @@ /// Plan how to best vectorize, return the best VF and its cost. VectorizationFactor plan(bool OptForSize, unsigned UserVF); + /// Build VPlan with VF = 1 for interleaving without vectorization, if no + /// VPlans have been created earlier. + void planForInterleaving(); + /// Use the VPlan-native path to plan how to best vectorize, return the best /// VF and its cost. VectorizationFactor planInVPlanNativePath(bool OptForSize, unsigned UserVF); Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6161,6 +6161,18 @@ return CM.selectVectorizationFactor(MaxVF); } +void LoopVectorizationPlanner::planForInterleaving() { + // Already created VPlans. + if (!VPlans.empty()) + return; + + assert(OrigLoop->empty() && "Inner loop expected."); + // Collect Uniform and Scalar instructions after vectorization. + CM.collectUniformsAndScalars(1); + buildVPlansWithVPRecipes(1, 1); + LLVM_DEBUG(printPlans(dbgs())); +} + void LoopVectorizationPlanner::setBestPlan(unsigned VF, unsigned UF) { LLVM_DEBUG(dbgs() << "Setting best plan to VF=" << VF << ", UF=" << UF << '\n'); @@ -7319,15 +7331,15 @@ // Get user vectorization factor. unsigned UserVF = Hints.getWidth(); + // Get user interleave count. + unsigned UserIC = Hints.getInterleave(); + // Plan how to best vectorize, return the best VF and its cost. VectorizationFactor VF = LVP.plan(OptForSize, UserVF); // Select the interleave count. unsigned IC = CM.selectInterleaveCount(OptForSize, VF.Width, VF.Cost); - // Get user interleave count. - unsigned UserIC = Hints.getInterleave(); - // Identify the diagnostic messages that should be produced. std::pair VecDiagMsg, IntDiagMsg; bool VectorizeLoop = true, InterleaveLoop = true; @@ -7388,6 +7400,9 @@ }); return false; } else if (!VectorizeLoop && InterleaveLoop) { + // We might reach this code path without building VPlans in LVP.plan(). + // Make sure we have a plan with VF == 1, for interleaving. + LVP.planForInterleaving(); LLVM_DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n'); ORE->emit([&]() { return OptimizationRemarkAnalysis(VAPassName, VecDiagMsg.first, Index: llvm/test/Transforms/LoopVectorize/interleave-need-vplan.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/interleave-need-vplan.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -loop-vectorize -S | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +define void @inc(i32 %n) #0 { +; CHECK-LABEL: @inc( +; CHECK-NEXT: br i1 false, label [[DOTLR_PH_PREHEADER:%.*]], label [[DOT_CRIT_EDGE:%.*]] +; CHECK: .lr.ph.preheader: +; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1, 0 +; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ] +; CHECK-NEXT: br label [[DOTLR_PH:%.*]] +; CHECK: .lr.ph: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop !3 +; CHECK: ._crit_edge.loopexit: +; CHECK-NEXT: br label [[DOT_CRIT_EDGE]] +; CHECK: ._crit_edge: +; CHECK-NEXT: ret void +; + br i1 false, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %.lr.ph, %0 + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] + %indvars.iv.next = add i64 %indvars.iv, 1 + br i1 true, label %._crit_edge, label %.lr.ph, !llvm.loop !0 + +._crit_edge: ; preds = %.lr.ph, %0 + ret void +} + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.interleave.count", i32 2}