Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5814,15 +5814,17 @@ if (R.LoopInvariantRegs.find(pair.first) != R.LoopInvariantRegs.end()) LoopInvariantRegs = R.LoopInvariantRegs[pair.first]; - unsigned TmpIC = llvm::bit_floor((TargetNumRegisters - LoopInvariantRegs) / - MaxLocalUsers); - // Don't count the induction variable as interleaved. if (EnableIndVarRegisterHeur) { - TmpIC = llvm::bit_floor((TargetNumRegisters - LoopInvariantRegs - 1) / - std::max(1U, (MaxLocalUsers - 1))); + // Treat the induction variable as a LoopInvariantReg. + assert(MaxLocalUsers); + --MaxLocalUsers; + ++LoopInvariantRegs; + } + if (MaxLocalUsers > 0) { + unsigned TmpIC = llvm::bit_floor( + (TargetNumRegisters - LoopInvariantRegs) / MaxLocalUsers); + IC = std::min(IC, TmpIC); } - - IC = std::min(IC, TmpIC); } // Clamp the interleave ranges to reasonable counts. Index: llvm/test/Transforms/LoopVectorize/X86/interleave-count.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/X86/interleave-count.ll @@ -0,0 +1,46 @@ +; REQUIRES: asserts +; +; RUN: opt -S -passes=loop-vectorize -debug-only=loop-vectorize -debug-only=loop-accesses \ +; RUN: -mtriple=x86_64-pc_linux -mcpu=cascadelake \ +; RUN: -force-target-max-vector-interleave=16 -force-target-num-vector-regs=16 \ +; RUN: %s 2>&1 | FileCheck %s + +define void @test(double *%dst, i64 %len) { +; CHECK-LABEL: LV: Checking a loop in 'test' +; CHECK: LV: IC is 16 +; +; CHECK-LABEL: define void @test +; +; Number of @llvm.masked.scatter() calls is 16. +; CHECK: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> +; +entry: + %size = shl i64 %len, 3 + %p.end = getelementptr inbounds double, ptr %dst, i64 %size + br label %loop + +loop: + %p = phi double * [%dst, %entry], [%p.next, %loop] + store double -1.000000e+00, ptr %p, align 8 + %p.next = getelementptr inbounds double, ptr %p, i64 8 + %done = icmp eq ptr %p.next, %p.end + br i1 %done, label %exit, label %loop, !prof !{!"branch_weights", i32 1, i32 1000000} + +exit: + ret void +}