diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9532,12 +9532,12 @@
     Value *NewVecOp = State.get(getVecOp(), Part);
     if (VPValue *Cond = getCondOp()) {
       Value *NewCond = State.get(Cond, Part);
-      VectorType *VecTy = cast<VectorType>(NewVecOp->getType());
-      Value *Iden = RdxDesc->getRecurrenceIdentity(
-          Kind, VecTy->getElementType(), RdxDesc->getFastMathFlags());
-      Value *IdenVec =
-          State.Builder.CreateVectorSplat(VecTy->getElementCount(), Iden);
-      Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, IdenVec);
+      Type *ScalarTy = NewVecOp->getType()->getScalarType();
+      Value *Iden = RdxDesc->getRecurrenceIdentity(Kind, ScalarTy,
+                                                   RdxDesc->getFastMathFlags());
+      if (State.VF.isVector())
+        Iden = State.Builder.CreateVectorSplat(State.VF, Iden);
+      Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Iden);
       NewVecOp = Select;
     }
     Value *NewRed;
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalar_interleave_masked_reduce.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalar_interleave_masked_reduce.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalar_interleave_masked_reduce.ll
@@ -0,0 +1,38 @@
+; REQUIRES: asserts
+; RUN: opt -S -passes=loop-vectorize -debug-only=loop-vectorize -force-vector-interleave=4 -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-linux-gnu"
+
+; Reproducer for a crash in the VPReductionRecipe's execute method
+; when the VF is scalar and the recipe has a mask/condtion.
+; Note the "-prefer-predicate-over-epilogue" flag, this is what causes
+; the reduction recipe to have a condition. The cost-model will select
+; a scalar VF because the target CPU is generic.
+; "-force-vector-width=1" cannot be used because the vectorizer will
+; generate a different VPlan.
+
+define double @loop(i64 %n, ptr noalias nocapture noundef readonly %c) #0 {
+
+; CHECK: REDUCE ir<%sum.next> = ir<%sum> + reduce.fadd (vp<[[ARG:%.*]]>, vp<[[MASK:%.*]]>)
+; CHECK: Executing best plan with VF=1, UF=4
+
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %sum = phi double [ 0.000000e+00, %entry ], [ %sum.next, %for.body ]
+  %arrayidx1 = getelementptr inbounds [8 x double], ptr %c, i64 0, i64 %i
+  %cval = load double, ptr %arrayidx1, align 8
+  %sum.next = fadd double %sum, %cval
+  %i.next = add nuw nsw i64 %i, 1
+  %exitcond.not = icmp eq i64 %i.next, %n
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  %sum.lcssa = phi double [ %sum.next, %for.body ]
+  ret double %sum.lcssa
+}
+
+attributes #0 = { nofree nosync nounwind memory(read, inaccessiblemem: none) uwtable "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+crc,+fp-armv8,+lse,+neon,+ras,+rdm,+v8.1a,+v8.2a,+v8a,-fmv" }