diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -730,7 +730,7 @@ bool isTypeLegal(Type *Ty) const; /// Returns the estimated number of registers required to represent \p Ty. - InstructionCost getRegUsageForType(Type *Ty) const; + unsigned getRegUsageForType(Type *Ty) const; /// Return true if switches should be turned into lookup tables for the /// target. @@ -1593,7 +1593,7 @@ virtual bool isProfitableToHoist(Instruction *I) = 0; virtual bool useAA() = 0; virtual bool isTypeLegal(Type *Ty) = 0; - virtual InstructionCost getRegUsageForType(Type *Ty) = 0; + virtual unsigned getRegUsageForType(Type *Ty) = 0; virtual bool shouldBuildLookupTables() = 0; virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0; virtual bool shouldBuildRelLookupTables() = 0; @@ -2032,7 +2032,7 @@ } bool useAA() override { return Impl.useAA(); } bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); } - InstructionCost getRegUsageForType(Type *Ty) override { + unsigned getRegUsageForType(Type *Ty) override { return Impl.getRegUsageForType(Ty); } bool shouldBuildLookupTables() override { diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -312,7 +312,7 @@ bool isTypeLegal(Type *Ty) const { return false; } - InstructionCost getRegUsageForType(Type *Ty) const { return 1; } + unsigned getRegUsageForType(Type *Ty) const { return 1; } bool shouldBuildLookupTables() const { return true; } diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -382,10 +382,9 @@ return getTLI()->isTypeLegal(VT); } - InstructionCost getRegUsageForType(Type *Ty) { - InstructionCost Val = getTLI()->getTypeLegalizationCost(DL, Ty).first; - assert(Val >= 0 && "Negative cost!"); - return Val; + unsigned getRegUsageForType(Type *Ty) { + EVT ETy = getTLI()->getValueType(DL, Ty); + return getTLI()->getNumRegisters(Ty->getContext(), ETy); } InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -473,7 +473,7 @@ return TTIImpl->isTypeLegal(Ty); } -InstructionCost TargetTransformInfo::getRegUsageForType(Type *Ty) const { +unsigned TargetTransformInfo::getRegUsageForType(Type *Ty) const { return TTIImpl->getRegUsageForType(Ty); } diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -60,7 +60,7 @@ TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const; - InstructionCost getRegUsageForType(Type *Ty); + unsigned getRegUsageForType(Type *Ty); InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -429,7 +429,7 @@ BaseT::getPeelingPreferences(L, SE, PP); } -InstructionCost RISCVTTIImpl::getRegUsageForType(Type *Ty) { +unsigned RISCVTTIImpl::getRegUsageForType(Type *Ty) { TypeSize Size = Ty->getPrimitiveSizeInBits(); if (Ty->isVectorTy()) { if (Size.isScalable() && ST->hasVInstructions()) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5987,16 +5987,10 @@ LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n"); - // A lambda that gets the register usage for the given type and VF. - const auto &TTICapture = TTI; - auto GetRegUsage = [&TTICapture](Type *Ty, ElementCount VF) -> unsigned { + auto GetRegUsage = [&TTI = TTI](Type *Ty, ElementCount VF) -> unsigned { if (Ty->isTokenTy() || !VectorType::isValidElementType(Ty)) return 0; - InstructionCost::CostType RegUsage = - *TTICapture.getRegUsageForType(VectorType::get(Ty, VF)).getValue(); - assert(RegUsage >= 0 && RegUsage <= std::numeric_limits::max() && - "Nonsensical values for register usage."); - return RegUsage; + return TTI.getRegUsageForType(VectorType::get(Ty, VF)); }; for (unsigned int i = 0, s = IdxToInstr.size(); i < s; ++i) { diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/i1-reg-usage.ll b/llvm/test/Transforms/LoopVectorize/AArch64/i1-reg-usage.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/i1-reg-usage.ll @@ -0,0 +1,57 @@ +; RUN: opt -loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 < %s | FileCheck %s +; REQUIRES: asserts + +target triple = "aarch64" + +; Test that shows how many registers the loop vectorizer thinks an illegal will consume. + +; CHECK-LABEL: LV: Checking a loop in 'or_reduction_neon' from +; CHECK: LV(REG): VF = 32 +; CHECK-NEXT: LV(REG): Found max usage: 2 item +; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 72 registers +; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 1 registers + +define i1 @or_reduction_neon(i32 %arg, ptr %ptr) { +entry: + br label %loop +exit: + ret i1 %reduction_next +loop: + %induction = phi i32 [ 0, %entry ], [ %induction_next, %loop ] + %reduction = phi i1 [ 0, %entry ], [ %reduction_next, %loop ] + %gep = getelementptr inbounds i32, ptr %ptr, i32 %induction + %loaded = load i32, ptr %gep + %i1 = icmp eq i32 %loaded, %induction + %reduction_next = or i1 %i1, %reduction + %induction_next = add nuw i32 %induction, 1 + %cond = icmp eq i32 %induction_next, %arg + br i1 %cond, label %exit, label %loop, !llvm.loop !32 +} + +; CHECK-LABEL: LV: Checking a loop in 'or_reduction_sve' +; CHECK: LV(REG): VF = 64 +; CHECK-NEXT: LV(REG): Found max usage: 2 item +; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 136 registers +; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 1 registers + +define i1 @or_reduction_sve(i32 %arg, ptr %ptr) vscale_range(2,2) "target-features"="+sve" { +entry: + br label %loop +exit: + ret i1 %reduction_next +loop: + %induction = phi i32 [ 0, %entry ], [ %induction_next, %loop ] + %reduction = phi i1 [ true, %entry ], [ %reduction_next, %loop ] + %gep = getelementptr inbounds i32, ptr %ptr, i32 %induction + %loaded = load i32, ptr %gep + %i1 = icmp eq i32 %loaded, %induction + %reduction_next = or i1 %i1, %reduction + %induction_next = add nuw i32 %induction, 1 + %cond = icmp eq i32 %induction_next, %arg + br i1 %cond, label %exit, label %loop, !llvm.loop !64 +} + +!32 = distinct !{!32, !33} +!33 = !{!"llvm.loop.vectorize.width", i32 32} +!64 = distinct !{!64, !65} +!65 = !{!"llvm.loop.vectorize.width", i32 64} diff --git a/llvm/test/Transforms/LoopVectorize/X86/i1-reg-usage.ll b/llvm/test/Transforms/LoopVectorize/X86/i1-reg-usage.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/X86/i1-reg-usage.ll @@ -0,0 +1,32 @@ +; RUN: opt -loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 < %s | FileCheck %s +; REQUIRES: asserts + +target triple = "x86_64" + +; Test that shows how many registers the loop vectorizer thinks an illegal will consume. + +; CHECK-LABEL: LV: Checking a loop in 'or_reduction_avx' from +; CHECK: LV(REG): VF = 64 +; CHECK-NEXT: LV(REG): Found max usage: 2 item +; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 136 registers +; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 1 registers + +define i1 @or_reduction_avx(i32 %arg, ptr %ptr) "target-features"="+avx" { +entry: + br label %loop +exit: + ret i1 %reduction_next +loop: + %induction = phi i32 [ 0, %entry ], [ %induction_next, %loop ] + %reduction = phi i1 [ 0, %entry ], [ %reduction_next, %loop ] + %gep = getelementptr inbounds i32, ptr %ptr, i32 %induction + %loaded = load i32, ptr %gep + %i1 = icmp eq i32 %loaded, %induction + %reduction_next = or i1 %i1, %reduction + %induction_next = add nuw i32 %induction, 1 + %cond = icmp eq i32 %induction_next, %arg + br i1 %cond, label %exit, label %loop, !llvm.loop !64 +} + +!64 = distinct !{!64, !65} +!65 = !{!"llvm.loop.vectorize.width", i32 64}