diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9532,12 +9532,12 @@ Value *NewVecOp = State.get(getVecOp(), Part); if (VPValue *Cond = getCondOp()) { Value *NewCond = State.get(Cond, Part); - VectorType *VecTy = cast(NewVecOp->getType()); - Value *Iden = RdxDesc->getRecurrenceIdentity( - Kind, VecTy->getElementType(), RdxDesc->getFastMathFlags()); - Value *IdenVec = - State.Builder.CreateVectorSplat(VecTy->getElementCount(), Iden); - Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, IdenVec); + Type *ScalarTy = NewVecOp->getType()->getScalarType(); + Value *Iden = RdxDesc->getRecurrenceIdentity(Kind, ScalarTy, + RdxDesc->getFastMathFlags()); + if (State.VF.isVector()) + Iden = State.Builder.CreateVectorSplat(State.VF, Iden); + Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Iden); NewVecOp = Select; } Value *NewRed; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalar_interleave_masked_reduce.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalar_interleave_masked_reduce.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalar_interleave_masked_reduce.ll @@ -0,0 +1,115 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; REQUIRES: asserts +; RUN: opt -mtriple=aarch64 -passes=loop-vectorize -debug-only=loop-vectorize -force-vector-interleave=4 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s 2>&1 | FileCheck %s + +; Reproducer for a crash in the VPReductionRecipe's execute method +; when the VF is scalar and the recipe has a mask/condtion. +; Note the "-prefer-predicate-over-epilogue" flag, this is what causes +; the reduction recipe to have a condition. The cost-model will select +; a scalar VF because the target CPU is generic. +; "-force-vector-width=1" cannot be used because the vectorizer will +; generate a different VPlan. + +; CHECK: REDUCE ir<%sum.next> = ir<%sum> + reduce.fadd (vp<[[ARG:%.*]]>, vp<[[MASK:%.*]]>) +; CHECK: Executing best plan with VF=1, UF=4 + +define double @loop(i64 %n, ptr %arr) { +; CHECK-LABEL: define double @loop +; CHECK-SAME: (i64 [[N:%.*]], ptr [[ARR:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], 3 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE9:%.*]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[PRED_LOAD_CONTINUE9]] ] +; CHECK-NEXT: [[VEC_IV:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[VEC_IV1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[VEC_IV2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[VEC_IV3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule i64 [[VEC_IV]], [[TRIP_COUNT_MINUS_1]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 [[VEC_IV1]], [[TRIP_COUNT_MINUS_1]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule i64 [[VEC_IV2]], [[TRIP_COUNT_MINUS_1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ule i64 [[VEC_IV3]], [[TRIP_COUNT_MINUS_1]] +; CHECK-NEXT: br i1 [[TMP0]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK: pred.load.if: +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [8 x double], ptr [[ARR]], i64 0, i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = load double, ptr [[TMP5]], align 8 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] +; CHECK: pred.load.continue: +; CHECK-NEXT: [[TMP7:%.*]] = phi double [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF4:%.*]], label [[PRED_LOAD_CONTINUE5:%.*]] +; CHECK: pred.load.if4: +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [8 x double], ptr [[ARR]], i64 0, i64 [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP9]], align 8 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE5]] +; CHECK: pred.load.continue5: +; CHECK-NEXT: [[TMP11:%.*]] = phi double [ poison, [[PRED_LOAD_CONTINUE]] ], [ [[TMP10]], [[PRED_LOAD_IF4]] ] +; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF6:%.*]], label [[PRED_LOAD_CONTINUE7:%.*]] +; CHECK: pred.load.if6: +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [8 x double], ptr [[ARR]], i64 0, i64 [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = load double, ptr [[TMP13]], align 8 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE7]] +; CHECK: pred.load.continue7: +; CHECK-NEXT: [[TMP15:%.*]] = phi double [ poison, [[PRED_LOAD_CONTINUE5]] ], [ [[TMP14]], [[PRED_LOAD_IF6]] ] +; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_LOAD_IF8:%.*]], label [[PRED_LOAD_CONTINUE9]] +; CHECK: pred.load.if8: +; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [8 x double], ptr [[ARR]], i64 0, i64 [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = load double, ptr [[TMP17]], align 8 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE9]] +; CHECK: pred.load.continue9: +; CHECK-NEXT: [[TMP19:%.*]] = phi double [ poison, [[PRED_LOAD_CONTINUE7]] ], [ [[TMP18]], [[PRED_LOAD_IF8]] ] +; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP0]], double [[TMP7]], double -0.000000e+00 +; CHECK-NEXT: [[TMP21:%.*]] = fadd double [[VEC_PHI]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP1]], double [[TMP11]], double -0.000000e+00 +; CHECK-NEXT: [[TMP23:%.*]] = fadd double [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP2]], double [[TMP15]], double -0.000000e+00 +; CHECK-NEXT: [[TMP25:%.*]] = fadd double [[TMP23]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP3]], double [[TMP19]], double -0.000000e+00 +; CHECK-NEXT: [[TMP27]] = fadd double [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi double [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [8 x double], ptr [[ARR]], i64 0, i64 [[I]] +; CHECK-NEXT: [[ARRVAL:%.*]] = load double, ptr [[ARRAYIDX1]], align 8 +; CHECK-NEXT: [[SUM_NEXT]] = fadd double [[SUM]], [[ARRVAL]] +; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi double [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret double [[SUM_NEXT_LCSSA]] +; +entry: + br label %for.body + +for.body: + %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] + %sum = phi double [ 0.000000e+00, %entry ], [ %sum.next, %for.body ] + %arrayidx1 = getelementptr inbounds [8 x double], ptr %arr, i64 0, i64 %i + %arrval = load double, ptr %arrayidx1, align 8 + %sum.next = fadd double %sum, %arrval + %i.next = add nuw nsw i64 %i, 1 + %exitcond.not = icmp eq i64 %i.next, %n + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret double %sum.next +}