Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6079,21 +6079,32 @@ OpenIntervals.insert(I); } + SmallMapVector Invariant; + + for (auto *Inst : LoopInvariants) { + // FIXME: The target might use more than one register for the type + // even in the scalar case. + unsigned Usage = GetRegUsage(Inst->getType(), ElementCount::getFixed(1)); + unsigned ClassID = TTI.getRegisterClassForType(false, Inst->getType()); + Invariant[ClassID] += Usage; + } + + LLVM_DEBUG({ + dbgs() << "LV(REG): Found invariant usage: " << Invariant.size() + << " item\n"; + for (const auto &pair : Invariant) { + dbgs() << "LV(REG): RegisterClass: " + << TTI.getRegisterClassName(pair.first) << ", " << pair.second + << " registers\n"; + } + }); + + RU.LoopInvariantRegs = Invariant; + for (unsigned i = 0, e = VFs.size(); i < e; ++i) { // Note that elements in this SmallMapVector will be default constructed // as 0. So we can use "Invariant[ClassID] += n" in the code below even if // there is no previous entry for ClassID. - SmallMapVector Invariant; - - for (auto *Inst : LoopInvariants) { - // FIXME: The target might use more than one register for the type - // even in the scalar case. - unsigned Usage = - VFs[i].isScalar() ? 1 : GetRegUsage(Inst->getType(), VFs[i]); - unsigned ClassID = - TTI.getRegisterClassForType(VFs[i].isVector(), Inst->getType()); - Invariant[ClassID] += Usage; - } LLVM_DEBUG({ dbgs() << "LV(REG): VF = " << VFs[i] << '\n'; @@ -6104,16 +6115,8 @@ << TTI.getRegisterClassName(pair.first) << ", " << pair.second << " registers\n"; } - dbgs() << "LV(REG): Found invariant usage: " << Invariant.size() - << " item\n"; - for (const auto &pair : Invariant) { - dbgs() << "LV(REG): RegisterClass: " - << TTI.getRegisterClassName(pair.first) << ", " << pair.second - << " registers\n"; - } }); - RU.LoopInvariantRegs = Invariant; RU.MaxLocalUsers = MaxUsages[i]; RUs[i] = RU; } Index: llvm/test/Transforms/LoopVectorize/sve-invariant-legalization.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/sve-invariant-legalization.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +;RUN: opt -mtriple arm64-linux -passes=loop-vectorize -mattr=+sve -disable-output %s | FileCheck %s --allow-empty + +@string = internal unnamed_addr constant [5 x i8] c"abcd\00", align 1 +define dso_local i32 @test(ptr %z) local_unnamed_addr { +L.entry: + %0 = load ptr, ptr %z, align 8 + %1 = getelementptr i8, ptr %0, i64 16 + %2 = load i128, ptr %1, align 16 + %3 = icmp slt i128 %2, 1 + br i1 %3, label %return, label %loopbody.preheader + +loopbody.preheader: ;preds = %L.entry + %4 = load ptr, ptr %0, align 8 + br label %loopbody + +loopbody: ;preds = %loopbody.preheader, %loopbody + %a = phi ptr [ %5, %loopbody ], [ %4, %loopbody.preheader ] + %b = phi ptr [ %6, %loopbody ], [ @string, %loopbody.preheader ] + %len_input = phi i128 [ %len, %loopbody ], [ %2, %loopbody.preheader ] + %len = add nsw i128 %len_input, -1 + %5 = getelementptr i8, ptr %a, i64 1 + %6 = getelementptr i8, ptr %b, i64 1 + %7 = load i8, ptr %b, align 1 + store i8 %7, ptr %a, align 4 + %.not = icmp eq i128 %len, 0 + br i1 %.not, label %return, label %loopbody + +return: ;preds = %loopexit, %L.entry + ret i32 undef +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}}