diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5814,15 +5814,17 @@ if (R.LoopInvariantRegs.find(pair.first) != R.LoopInvariantRegs.end()) LoopInvariantRegs = R.LoopInvariantRegs[pair.first]; - unsigned TmpIC = llvm::bit_floor((TargetNumRegisters - LoopInvariantRegs) / - MaxLocalUsers); - // Don't count the induction variable as interleaved. if (EnableIndVarRegisterHeur) { - TmpIC = llvm::bit_floor((TargetNumRegisters - LoopInvariantRegs - 1) / - std::max(1U, (MaxLocalUsers - 1))); + // Treat the induction variable as a LoopInvariantReg. + assert(MaxLocalUsers); + --MaxLocalUsers; + ++LoopInvariantRegs; + } + if (MaxLocalUsers > 0) { + unsigned TmpIC = llvm::bit_floor( + (TargetNumRegisters - LoopInvariantRegs) / MaxLocalUsers); + IC = std::min(IC, TmpIC); } - - IC = std::min(IC, TmpIC); } // Clamp the interleave ranges to reasonable counts. diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-count.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-count.ll --- a/llvm/test/Transforms/LoopVectorize/X86/interleave-count.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-count.ll @@ -5,15 +5,15 @@ ; RUN: -force-target-max-vector-interleave=16 -force-target-num-vector-regs=16 \ ; RUN: %s 2>&1 | FileCheck %s -; Check that the interleave count is limited by 8 even if there is no +; Check that the interleave count is not limited by 8 if there is no ; register use except one induction variable. define void @test(ptr %dst, i64 %size) { ; CHECK-LABEL: LV: Checking a loop in 'test' -; CHECK: LV: IC is 8 +; CHECK: LV: IC is 16 ; ; CHECK-LABEL: define void @test ; -; Number of @llvm.masked.scatter() calls is 8. +; Number of @llvm.masked.scatter() calls is 16. ; CHECK: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> ; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> ; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> @@ -22,6 +22,14 @@ ; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> ; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> ; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> ; CHECK-NOT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> ; entry: