Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5814,15 +5814,17 @@ if (R.LoopInvariantRegs.find(pair.first) != R.LoopInvariantRegs.end()) LoopInvariantRegs = R.LoopInvariantRegs[pair.first]; - unsigned TmpIC = llvm::bit_floor((TargetNumRegisters - LoopInvariantRegs) / - MaxLocalUsers); - // Don't count the induction variable as interleaved. if (EnableIndVarRegisterHeur) { - TmpIC = llvm::bit_floor((TargetNumRegisters - LoopInvariantRegs - 1) / - std::max(1U, (MaxLocalUsers - 1))); + // Treat the induction variable as a LoopInvariantReg. + assert(MaxLocalUsers); + --MaxLocalUsers; + ++LoopInvariantRegs; + } + if (MaxLocalUsers > 0) { + unsigned TmpIC = llvm::bit_floor( + (TargetNumRegisters - LoopInvariantRegs) / MaxLocalUsers); + IC = std::min(IC, TmpIC); } - - IC = std::min(IC, TmpIC); } // Clamp the interleave ranges to reasonable counts. Index: llvm/test/Transforms/LoopVectorize/X86/interleave-count.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/interleave-count.ll +++ llvm/test/Transforms/LoopVectorize/X86/interleave-count.ll @@ -7,11 +7,11 @@ define void @test(ptr %dst, i64 %len) { ; CHECK-LABEL: LV: Checking a loop in 'test' -; CHECK: LV: IC is 8 +; CHECK: LV: IC is 16 ; ; CHECK-LABEL: define void @test ; -; Number of @llvm.masked.scatter() calls is 8. +; Number of @llvm.masked.scatter() calls is 16. ; CHECK: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> ; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> ; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> @@ -20,6 +20,14 @@ ; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> ; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> ; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> ; CHECK-NOT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> ; entry: