Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6088,11 +6088,37 @@ for (auto *Inst : LoopInvariants) { // FIXME: The target might use more than one register for the type // even in the scalar case. - unsigned Usage = - VFs[i].isScalar() ? 1 : GetRegUsage(Inst->getType(), VFs[i]); - unsigned ClassID = - TTI.getRegisterClassForType(VFs[i].isVector(), Inst->getType()); - Invariant[ClassID] += Usage; + // uniform/scalar and non uniform/scalar users of Inst + SmallVector UniformScalar, NonUniformNonScalar; + + for (auto *User : dyn_cast(Inst)->users()) { + Instruction *I = dyn_cast(User); + if (TheLoop != LI->getLoopFor(I->getParent())) + continue; + if (isScalarAfterVectorization(I, VFs[i])) { + if (UniformScalar.size() < 1) + UniformScalar.emplace_back(I); + } else { + if (NonUniformNonScalar.size() < 1) + NonUniformNonScalar.emplace_back(I); + else + break; + } + } + + unsigned Usage, ClassID; + if (UniformScalar.size()) { + Usage = GetRegUsage(Inst->getType(), ElementCount::getFixed(1)); + ClassID = TTI.getRegisterClassForType(false, Inst->getType()); + Invariant[ClassID] += Usage; + } + + if (NonUniformNonScalar.size()) { + Usage = GetRegUsage(Inst->getType(), VFs[i]); + ClassID = + TTI.getRegisterClassForType(VFs[i].isVector(), Inst->getType()); + Invariant[ClassID] += Usage; + } } LLVM_DEBUG({ Index: llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll +++ llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll @@ -175,7 +175,7 @@ ;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers ;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 5 registers ;CHECK-PWR8: LV(REG): Found invariant usage: 1 item -;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 1 registers +;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 1 registers ;CHECK-PWR9: LV(REG): VF = 1 ;CHECK-PWR9: LV(REG): Found max usage: 2 item Index: llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll +++ llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll @@ -31,22 +31,22 @@ ; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers ; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers ; CHECK-LMUL1-NEXT: LV(REG): Found invariant usage: 1 item -; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers +; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers ; CHECK-LMUL2: LV(REG): Found max usage: 2 item ; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers ; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers ; CHECK-LMUL2-NEXT: LV(REG): Found invariant usage: 1 item -; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers +; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers ; CHECK-LMUL4: LV(REG): Found max usage: 2 item ; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers ; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers ; CHECK-LMUL4-NEXT: LV(REG): Found invariant usage: 1 item -; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers +; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers ; CHECK-LMUL8: LV(REG): Found max usage: 2 item ; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers ; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 16 registers ; CHECK-LMUL8-NEXT: LV(REG): Found invariant usage: 1 item -; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 16 registers +; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers entry: %conv = zext i32 %size to i64 Index: llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -102,7 +102,7 @@ ; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers ; CHECK-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers ; CHECK-NEXT: LV(REG): Found invariant usage: 1 item -; CHECK-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers +; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers ; CHECK-NEXT: LV: The target has 31 registers of RISCV::GPRRC register class ; CHECK-NEXT: LV: The target has 32 registers of RISCV::VRRC register class ; CHECK-NEXT: LV: Loop cost is 23 @@ -234,7 +234,7 @@ ; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers ; CHECK-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers ; CHECK-NEXT: LV(REG): Found invariant usage: 1 item -; CHECK-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers +; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers ; CHECK-NEXT: LV: The target has 31 registers of RISCV::GPRRC register class ; CHECK-NEXT: LV: The target has 32 registers of RISCV::VRRC register class ; CHECK-NEXT: LV: Loop cost is 23 Index: llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll +++ llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll @@ -26,7 +26,7 @@ ; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers ; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers ; CHECK-NEXT: LV(REG): Found invariant usage: 1 item -; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers +; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 1 registers define i32 @test_g(ptr nocapture readonly %a, i32 %n) local_unnamed_addr !dbg !6 { entry: @@ -68,7 +68,7 @@ ; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers ; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers ; CHECK-NEXT: LV(REG): Found invariant usage: 1 item -; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers +; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 1 registers define i32 @test(ptr nocapture readonly %a, i32 %n) local_unnamed_addr { entry: Index: llvm/test/Transforms/LoopVectorize/sve-invariant-legalization.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/sve-invariant-legalization.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +;RUN: opt -mtriple arm64-linux -passes=loop-vectorize -mattr=+sve -debug-only=loop-vectorize -disable-output <%s 2>&1 | FileCheck %s + +; Check that this doesn't crash while finding register usage + +@string = internal unnamed_addr constant [5 x i8] c"abcd\00", align 1 +define dso_local i32 @get_invariant_reg_usage(ptr %z) local_unnamed_addr { +;CHECK: LV: Checking a loop in 'get_invariant_reg_usage' +;CHECK: LV(REG): VF = vscale x 16 +;CHECK: LV(REG): Found max usage: 1 item +;CHECK: LV(REG): RegisterClass: Generic::ScalarRC, 5 registers +;CHECK: LV(REG): Found invariant usage: 1 item +;CHECK: LV(REG): RegisterClass: Generic::ScalarRC, 3 registers + +L.entry: + %0 = load ptr, ptr %z, align 8 + %1 = getelementptr i8, ptr %0, i64 16 + %2 = load i128, ptr %1, align 16 + %3 = icmp slt i128 %2, 1 + br i1 %3, label %return, label %loopbody.preheader + +loopbody.preheader: ;preds = %L.entry + %4 = load ptr, ptr %0, align 8 + br label %loopbody + +loopbody: ;preds = %loopbody.preheader, %loopbody + %a = phi ptr [ %5, %loopbody ], [ %4, %loopbody.preheader ] + %b = phi ptr [ %6, %loopbody ], [ @string, %loopbody.preheader ] + %len_input = phi i128 [ %len, %loopbody ], [ %2, %loopbody.preheader ] + %len = add nsw i128 %len_input, -1 + %5 = getelementptr i8, ptr %a, i64 1 + %6 = getelementptr i8, ptr %b, i64 1 + %7 = load i8, ptr %b, align 1 + store i8 %7, ptr %a, align 4 + %.not = icmp eq i128 %len, 0 + br i1 %.not, label %return, label %loopbody + +return: ;preds = %loopexit, %L.entry + ret i32 undef +}