Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6319,6 +6319,17 @@ unsigned MinWidth = -1U; unsigned MaxWidth = 8; const DataLayout &DL = TheFunction->getParent()->getDataLayout(); + // For in-loop reductions, no element types are added to ElementTypesInLoop + // if there are no loads/stores in the loop. In this case, set the maximum + // width to be the smallest legal int width, or 32 if there are no legal + // widths set, so that a sensible VF is chosen. + if (ElementTypesInLoop.empty()) { + MaxWidth = 32; + Type *SmallestIntType = + DL.getSmallestLegalIntType(TheLoop->getHeader()->getContext()); + if (SmallestIntType) + MaxWidth = DL.getTypeSizeInBits(SmallestIntType).getFixedSize(); + } for (Type *T : ElementTypesInLoop) { MinWidth = std::min( MinWidth, DL.getTypeSizeInBits(T->getScalarType()).getFixedSize()); Index: llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll @@ -31,3 +31,30 @@ for.end: ret void } + +; For in-loop reductions with no loads or stores in the loop the default widest +; type is 32 bits, which allows a sensible VF to be chosen. + +; CHECK-LABEL: Checking a loop in "no_loads_stores" +; CHECK: The Smallest and Widest types: 4294967295 / 32 bits +; CHECK: Selecting VF: 4 + +define double @no_loads_stores() { +entry: + br label %for.body + +for.body: + %s.09 = phi double [ 0.000000e+00, %entry ], [ %add, %for.body ] + %i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %conv = sitofp i64 %i.08 to double + %mul = fmul double %conv, %conv + %add = fadd double %s.09, %mul + %inc = add nuw nsw i64 %i.08, 1 + %exitcond.not = icmp eq i64 %inc, 1234567 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + %.lcssa = phi double [ %add, %for.body ] + ret double %.lcssa +} + Index: llvm/test/Transforms/LoopVectorize/X86/funclet.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/funclet.ll +++ llvm/test/Transforms/LoopVectorize/X86/funclet.ll @@ -33,7 +33,7 @@ ; CHECK-LABEL: define void @test1( ; CHECK: %[[cpad:.*]] = catchpad within {{.*}} [i8* null, i32 64, i8* null] -; CHECK: call <16 x double> @llvm.floor.v16f64(<16 x double> {{.*}}) [ "funclet"(token %[[cpad]]) ] +; CHECK: call <8 x double> @llvm.floor.v8f64(<8 x double> {{.*}}) [ "funclet"(token %[[cpad]]) ] declare x86_stdcallcc void @_CxxThrowException(i8*, i8*) Index: llvm/test/Transforms/LoopVectorize/pr32859.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/pr32859.ll +++ llvm/test/Transforms/LoopVectorize/pr32859.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s ; Out of the LCSSA form we could have 'phi i32 [ loop-invariant, %for.inc.2.i ]' ; but the IR Verifier requires for PHI one entry for each predecessor of Index: llvm/test/Transforms/LoopVectorize/pr36983.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/pr36983.ll +++ llvm/test/Transforms/LoopVectorize/pr36983.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s ; There could be more than one LCSSA PHIs in loop exit block.