diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8939,8 +8939,9 @@ } SmallPtrSet DeadInstructions; - VPlanTransforms::VPInstructionsToVPRecipes( - OrigLoop, Plan, Legal->getInductionVars(), DeadInstructions); + VPlanTransforms::VPInstructionsToVPRecipes(OrigLoop, Plan, + Legal->getInductionVars(), + DeadInstructions, *PSE.getSE()); return Plan; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -25,7 +25,7 @@ static void VPInstructionsToVPRecipes( Loop *OrigLoop, VPlanPtr &Plan, LoopVectorizationLegality::InductionList &Inductions, - SmallPtrSetImpl &DeadInstructions); + SmallPtrSetImpl &DeadInstructions, ScalarEvolution &SE); }; } // namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -19,7 +19,7 @@ void VPlanTransforms::VPInstructionsToVPRecipes( Loop *OrigLoop, VPlanPtr &Plan, LoopVectorizationLegality::InductionList &Inductions, - SmallPtrSetImpl &DeadInstructions) { + SmallPtrSetImpl &DeadInstructions, ScalarEvolution &SE) { auto *TopRegion = cast(Plan->getEntry()); ReversePostOrderTraversal RPOT(TopRegion->getEntry()); @@ -74,6 +74,11 @@ } else if (CallInst *CI = dyn_cast(Inst)) { NewRecipe = new VPWidenCallRecipe( *CI, Plan->mapToVPValues(CI->arg_operands())); + } else if (SelectInst *SI = dyn_cast(Inst)) { + bool InvariantCond = + SE.isLoopInvariant(SE.getSCEV(SI->getOperand(0)), OrigLoop); + NewRecipe = new VPWidenSelectRecipe( + *SI, Plan->mapToVPValues(SI->operands()), InvariantCond); } else { NewRecipe = new VPWidenRecipe(*Inst, Plan->mapToVPValues(Inst->operands())); diff --git a/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll b/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll @@ -0,0 +1,200 @@ +; RUN: opt -loop-vectorize -force-vector-width=4 -enable-vplan-native-path -S %s | FileCheck %s + +; Test that VPlan native path is able to widen select instruction in the +; innermost loop under different conditions when outer loop is marked to be +; vectorized. These conditions include following: +; * Inner and outer loop invariant select condition +; * Select condition depending on outer loop iteration variable. +; * Select condidition depending on inner loop iteration variable. +; * Select conditition depending on both outer and inner loop iteration +; variables. + +define void @loop_invariant_select(double* noalias nocapture %out, i1 %select, double %a, double %b) { +; CHECK-LABEL: @loop_invariant_select( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x double> poison, double [[B:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT2]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR1_LATCH4:%.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR1_LATCH4]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds double, double* [[OUT:%.*]], <4 x i64> [[VEC_IND]] +; CHECK-NEXT: br label [[FOR2_HEADER1:%.*]] +; CHECK: for2.header1: +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP2:%.*]], [[FOR2_HEADER1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[SELECT:%.*]], <4 x double> [[BROADCAST_SPLAT]], <4 x double> [[BROADCAST_SPLAT3]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> [[TMP1]], <4 x double*> [[TMP0]], i32 8, <4 x i1> ) +entry: + br label %for1.header + +for1.header: + %indvar1 = phi i64 [ 0, %entry ], [ %indvar11, %for1.latch ] + %ptr = getelementptr inbounds double, double* %out, i64 %indvar1 + br label %for2.header + +for2.header: + %indvar2 = phi i64 [ 0, %for1.header ], [ %indvar21, %for2.header ] + ; Select condition is loop invariant for both inner and outer loop. + %select.b = select i1 %select, double %a, double %b + store double %select.b, double* %ptr, align 8 + %indvar21 = add nuw nsw i64 %indvar2, 1 + %for2.cond = icmp eq i64 %indvar21, 10000 + br i1 %for2.cond, label %for1.latch, label %for2.header + +for1.latch: + %indvar11 = add nuw nsw i64 %indvar1, 1 + %for1.cond = icmp eq i64 %indvar11, 1000 + br i1 %for1.cond, label %exit, label %for1.header, !llvm.loop !0 + +exit: + ret void +} + +define void @outer_loop_dependant_select(double* noalias nocapture %out, double %a, double %b) { +; CHECK-LABEL: @outer_loop_dependant_select( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x double> poison, double [[B:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT2]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR1_LATCH4:%.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR1_LATCH4]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds double, double* [[OUT:%.*]], <4 x i64> [[VEC_IND]] +; CHECK-NEXT: br label [[FOR2_HEADER1:%.*]] +; CHECK: for2.header1: +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP3:%.*]], [[FOR2_HEADER1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[VEC_IND]] to <4 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x double> [[BROADCAST_SPLAT]], <4 x double> [[BROADCAST_SPLAT3]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> [[TMP2]], <4 x double*> [[TMP0]], i32 8, <4 x i1> ) +entry: + br label %for1.header + +for1.header: + %indvar1 = phi i64 [ 0, %entry ], [ %indvar11, %for1.latch ] + %ptr = getelementptr inbounds double, double* %out, i64 %indvar1 + br label %for2.header + +for2.header: + %indvar2 = phi i64 [ 0, %for1.header ], [ %indvar21, %for2.header ] + %select = trunc i64 %indvar1 to i1 + ; Select condition only depends on outer loop iteration variable. + %select.b = select i1 %select, double %a, double %b + store double %select.b, double* %ptr, align 8 + %indvar21 = add nuw nsw i64 %indvar2, 1 + %for2.cond = icmp eq i64 %indvar21, 10000 + br i1 %for2.cond, label %for1.latch, label %for2.header + +for1.latch: + %indvar11 = add nuw nsw i64 %indvar1, 1 + %for1.cond = icmp eq i64 %indvar11, 1000 + br i1 %for1.cond, label %exit, label %for1.header, !llvm.loop !0 + +exit: + ret void +} + +define void @inner_loop_dependant_select(double* noalias nocapture %out, double %a, double %b) { +; CHECK-LABEL: @inner_loop_dependant_select( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x double> poison, double [[B:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT2]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR1_LATCH4:%.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR1_LATCH4]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds double, double* [[OUT:%.*]], <4 x i64> [[VEC_IND]] +; CHECK-NEXT: br label [[FOR2_HEADER1:%.*]] +; CHECK: for2.header1: +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP3:%.*]], [[FOR2_HEADER1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[VEC_PHI]] to <4 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x double> [[BROADCAST_SPLAT]], <4 x double> [[BROADCAST_SPLAT3]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> [[TMP2]], <4 x double*> [[TMP0]], i32 8, <4 x i1> ) +entry: + br label %for1.header + +for1.header: + %indvar1 = phi i64 [ 0, %entry ], [ %indvar11, %for1.latch ] + %ptr = getelementptr inbounds double, double* %out, i64 %indvar1 + br label %for2.header + +for2.header: + %indvar2 = phi i64 [ 0, %for1.header ], [ %indvar21, %for2.header ] + %select = trunc i64 %indvar2 to i1 + ; Select condition only depends on inner loop iteration variable. + %select.b = select i1 %select, double %a, double %b + store double %select.b, double* %ptr, align 8 + %indvar21 = add nuw nsw i64 %indvar2, 1 + %for2.cond = icmp eq i64 %indvar21, 10000 + br i1 %for2.cond, label %for1.latch, label %for2.header + +for1.latch: + %indvar11 = add nuw nsw i64 %indvar1, 1 + %for1.cond = icmp eq i64 %indvar11, 1000 + br i1 %for1.cond, label %exit, label %for1.header, !llvm.loop !0 + +exit: + ret void +} + +define void @outer_and_inner_loop_dependant_select(double* noalias nocapture %out, double %a, double %b) { +; CHECK-LABEL: @outer_and_inner_loop_dependant_select( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x double> poison, double [[B:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT2]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR1_LATCH4:%.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR1_LATCH4]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds double, double* [[OUT:%.*]], <4 x i64> [[VEC_IND]] +; CHECK-NEXT: br label [[FOR2_HEADER1:%.*]] +; CHECK: for2.header1: +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR2_HEADER1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x double> [[BROADCAST_SPLAT]], <4 x double> [[BROADCAST_SPLAT3]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> [[TMP3]], <4 x double*> [[TMP0]], i32 8, <4 x i1> ) +entry: + br label %for1.header + +for1.header: + %indvar1 = phi i64 [ 0, %entry ], [ %indvar11, %for1.latch ] + %ptr = getelementptr inbounds double, double* %out, i64 %indvar1 + br label %for2.header + +for2.header: + %indvar2 = phi i64 [ 0, %for1.header ], [ %indvar21, %for2.header ] + %sum = add nuw nsw i64 %indvar1, %indvar2 + %select = trunc i64 %sum to i1 + ; Select condition depends on both inner and outer loop iteration variables. + %select.b = select i1 %select, double %a, double %b + store double %select.b, double* %ptr, align 8 + %indvar21 = add nuw nsw i64 %indvar2, 1 + %for2.cond = icmp eq i64 %indvar21, 10000 + br i1 %for2.cond, label %for1.latch, label %for2.header + +for1.latch: + %indvar11 = add nuw nsw i64 %indvar1, 1 + %for1.cond = icmp eq i64 %indvar11, 1000 + br i1 %for1.cond, label %exit, label %for1.header, !llvm.loop !0 + +exit: + ret void +} +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.vectorize.enable", i1 true} diff --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp --- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp @@ -131,7 +131,7 @@ LoopVectorizationLegality::InductionList Inductions; SmallPtrSet DeadInstructions; VPlanTransforms::VPInstructionsToVPRecipes(LI->getLoopFor(LoopHeader), Plan, - Inductions, DeadInstructions); + Inductions, DeadInstructions, *SE); } TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) { @@ -161,7 +161,7 @@ LoopVectorizationLegality::InductionList Inductions; SmallPtrSet DeadInstructions; VPlanTransforms::VPInstructionsToVPRecipes(LI->getLoopFor(LoopHeader), Plan, - Inductions, DeadInstructions); + Inductions, DeadInstructions, *SE); VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock(); EXPECT_NE(nullptr, Entry->getSingleSuccessor()); diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h --- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h +++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h @@ -17,6 +17,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/AsmParser/Parser.h" #include "llvm/IR/Dominators.h" #include "llvm/Support/SourceMgr.h" @@ -28,12 +29,23 @@ /// given loop entry block. class VPlanTestBase : public testing::Test { protected: + TargetLibraryInfoImpl TLII; + TargetLibraryInfo TLI; + DataLayout DL; + std::unique_ptr Ctx; std::unique_ptr M; std::unique_ptr LI; std::unique_ptr DT; + std::unique_ptr AC; + std::unique_ptr SE; - VPlanTestBase() : Ctx(new LLVMContext) {} + VPlanTestBase() + : TLII(), TLI(TLII), + DL("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-" + "f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:" + "16:32:64-S128"), + Ctx(new LLVMContext) {} Module &parseModule(const char *ModuleString) { SMDiagnostic Err; @@ -45,6 +57,8 @@ void doAnalysis(Function &F) { DT.reset(new DominatorTree(F)); LI.reset(new LoopInfo(*DT)); + AC.reset(new AssumptionCache(F)); + SE.reset(new ScalarEvolution(F, TLI, *AC, *DT, *LI)); } VPlanPtr buildHCFG(BasicBlock *LoopHeader) {