Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6079,21 +6079,22 @@ OpenIntervals.insert(I); } + SmallMapVector Invariant; + + for (auto *Inst : LoopInvariants) { + // FIXME: The target might use more than one register for the type + // even in the scalar case. + unsigned Usage = GetRegUsage(Inst->getType(), ElementCount::getFixed(1)); + unsigned ClassID = TTI.getRegisterClassForType(false, Inst->getType()); + Invariant[ClassID] += Usage; + } + + RU.LoopInvariantRegs = Invariant; + for (unsigned i = 0, e = VFs.size(); i < e; ++i) { // Note that elements in this SmallMapVector will be default constructed // as 0. So we can use "Invariant[ClassID] += n" in the code below even if // there is no previous entry for ClassID. - SmallMapVector Invariant; - - for (auto *Inst : LoopInvariants) { - // FIXME: The target might use more than one register for the type - // even in the scalar case. - unsigned Usage = - VFs[i].isScalar() ? 1 : GetRegUsage(Inst->getType(), VFs[i]); - unsigned ClassID = - TTI.getRegisterClassForType(VFs[i].isVector(), Inst->getType()); - Invariant[ClassID] += Usage; - } LLVM_DEBUG({ dbgs() << "LV(REG): VF = " << VFs[i] << '\n'; @@ -6104,6 +6105,14 @@ << TTI.getRegisterClassName(pair.first) << ", " << pair.second << " registers\n"; } + }); + + RU.MaxLocalUsers = MaxUsages[i]; + RUs[i] = RU; + } + + if (VFs.size()) { + LLVM_DEBUG({ dbgs() << "LV(REG): Found invariant usage: " << Invariant.size() << " item\n"; for (const auto &pair : Invariant) { @@ -6112,10 +6121,6 @@ << " registers\n"; } }); - - RU.LoopInvariantRegs = Invariant; - RU.MaxLocalUsers = MaxUsages[i]; - RUs[i] = RU; } return RUs; Index: llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll +++ llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll @@ -175,7 +175,7 @@ ;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers ;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 5 registers ;CHECK-PWR8: LV(REG): Found invariant usage: 1 item -;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 1 registers +;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 1 registers ;CHECK-PWR9: LV(REG): VF = 1 ;CHECK-PWR9: LV(REG): Found max usage: 2 item Index: llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll +++ llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll @@ -31,22 +31,22 @@ ; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers ; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers ; CHECK-LMUL1-NEXT: LV(REG): Found invariant usage: 1 item -; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers +; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers ; CHECK-LMUL2: LV(REG): Found max usage: 2 item ; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers ; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers ; CHECK-LMUL2-NEXT: LV(REG): Found invariant usage: 1 item -; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers +; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers ; CHECK-LMUL4: LV(REG): Found max usage: 2 item ; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers ; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers ; CHECK-LMUL4-NEXT: LV(REG): Found invariant usage: 1 item -; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers +; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers ; CHECK-LMUL8: LV(REG): Found max usage: 2 item ; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers ; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 16 registers ; CHECK-LMUL8-NEXT: LV(REG): Found invariant usage: 1 item -; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 16 registers +; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers entry: %conv = zext i32 %size to i64 Index: llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -102,7 +102,7 @@ ; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers ; CHECK-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers ; CHECK-NEXT: LV(REG): Found invariant usage: 1 item -; CHECK-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers +; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers ; CHECK-NEXT: LV: The target has 31 registers of RISCV::GPRRC register class ; CHECK-NEXT: LV: The target has 32 registers of RISCV::VRRC register class ; CHECK-NEXT: LV: Loop cost is 23 @@ -234,7 +234,7 @@ ; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers ; CHECK-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers ; CHECK-NEXT: LV(REG): Found invariant usage: 1 item -; CHECK-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers +; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers ; CHECK-NEXT: LV: The target has 31 registers of RISCV::GPRRC register class ; CHECK-NEXT: LV: The target has 32 registers of RISCV::VRRC register class ; CHECK-NEXT: LV: Loop cost is 23 Index: llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll +++ llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll @@ -26,7 +26,7 @@ ; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers ; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers ; CHECK-NEXT: LV(REG): Found invariant usage: 1 item -; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers +; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 1 registers define i32 @test_g(ptr nocapture readonly %a, i32 %n) local_unnamed_addr !dbg !6 { entry: @@ -68,7 +68,7 @@ ; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers ; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers ; CHECK-NEXT: LV(REG): Found invariant usage: 1 item -; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers +; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 1 registers define i32 @test(ptr nocapture readonly %a, i32 %n) local_unnamed_addr { entry: Index: llvm/test/Transforms/LoopVectorize/X86/reg-usage.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/reg-usage.ll +++ llvm/test/Transforms/LoopVectorize/X86/reg-usage.ll @@ -14,7 +14,6 @@ ; CHECK-NEXT: LV(REG): Found max usage: 2 item ; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers ; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 7 registers -; CHECK-NEXT: LV(REG): Found invariant usage: 0 item ; CHECK: LV(REG): VF = 16 ; CHECK-NEXT: LV(REG): Found max usage: 2 item ; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers @@ -56,7 +55,6 @@ ; CHECK-NEXT: LV(REG): Found max usage: 2 item ; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers ; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 7 registers -; CHECK-NEXT: LV(REG): Found invariant usage: 0 item ; CHECK: LV(REG): VF = 16 ; CHECK-NEXT: LV(REG): Found max usage: 2 item ; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers Index: llvm/test/Transforms/LoopVectorize/sve-invariant-legalization.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/sve-invariant-legalization.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +;RUN: opt -mtriple arm64-linux -passes=loop-vectorize -mattr=+sve -disable-output %s | FileCheck %s --allow-empty + +; Check that this doesn't crash while finding register usage + +@string = internal unnamed_addr constant [5 x i8] c"abcd\00", align 1 +define dso_local i32 @test(ptr %z) local_unnamed_addr { +L.entry: + %0 = load ptr, ptr %z, align 8 + %1 = getelementptr i8, ptr %0, i64 16 + %2 = load i128, ptr %1, align 16 + %3 = icmp slt i128 %2, 1 + br i1 %3, label %return, label %loopbody.preheader + +loopbody.preheader: ;preds = %L.entry + %4 = load ptr, ptr %0, align 8 + br label %loopbody + +loopbody: ;preds = %loopbody.preheader, %loopbody + %a = phi ptr [ %5, %loopbody ], [ %4, %loopbody.preheader ] + %b = phi ptr [ %6, %loopbody ], [ @string, %loopbody.preheader ] + %len_input = phi i128 [ %len, %loopbody ], [ %2, %loopbody.preheader ] + %len = add nsw i128 %len_input, -1 + %5 = getelementptr i8, ptr %a, i64 1 + %6 = getelementptr i8, ptr %b, i64 1 + %7 = load i8, ptr %b, align 1 + store i8 %7, ptr %a, align 4 + %.not = icmp eq i128 %len, 0 + br i1 %.not, label %return, label %loopbody + +return: ;preds = %loopexit, %L.entry + ret i32 undef +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}}