diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h --- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h +++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h @@ -17,6 +17,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Support/InstructionCost.h" namespace llvm { @@ -123,11 +124,9 @@ Optional UserAllowPartial, Optional UserRuntime, Optional UserUpperBound, Optional UserFullUnrollMaxCount); -unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, - bool &NotDuplicatable, bool &Convergent, - const TargetTransformInfo &TTI, - const SmallPtrSetImpl &EphValues, - unsigned BEInsns); +InstructionCost ApproximateLoopSize(const Loop *L, unsigned &NumCalls, + bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, + const SmallPtrSetImpl &EphValues, unsigned BEInsns); } // end namespace llvm diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -3013,16 +3013,17 @@ unsigned NumInlineCandidates; bool NotDuplicatable; bool Convergent; - unsigned LoopSize = + InstructionCost LoopSizeIC = ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, EphValues, UP.BEInsns); - LLVM_DEBUG(dbgs() << "Estimated loop size is " << LoopSize << "\n"); + LLVM_DEBUG(dbgs() << "Estimated loop size is " << LoopSizeIC << "\n"); // Loop is not unrollable if the loop contains certain instructions. - if (NotDuplicatable || Convergent) { + if (NotDuplicatable || Convergent || !LoopSizeIC.isValid()) { LLVM_DEBUG(dbgs() << "Loop not considered unrollable\n"); return 1; } + unsigned LoopSize = *LoopSizeIC.getValue(); // TODO: Determine trip count of \p CLI if constant, computeUnrollCount might // be able to use it. diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp --- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp @@ -330,14 +330,23 @@ SmallPtrSet EphValues; CodeMetrics::collectEphemeralValues(L, &AC, EphValues); Loop *SubLoop = L->getSubLoops()[0]; - unsigned InnerLoopSize = + InstructionCost InnerLoopSizeIC = ApproximateLoopSize(SubLoop, NumInlineCandidates, NotDuplicatable, Convergent, TTI, EphValues, UP.BEInsns); - unsigned OuterLoopSize = + InstructionCost OuterLoopSizeIC = ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, EphValues, UP.BEInsns); - LLVM_DEBUG(dbgs() << " Outer Loop Size: " << OuterLoopSize << "\n"); - LLVM_DEBUG(dbgs() << " Inner Loop Size: " << InnerLoopSize << "\n"); + LLVM_DEBUG(dbgs() << " Outer Loop Size: " << OuterLoopSizeIC << "\n"); + LLVM_DEBUG(dbgs() << " Inner Loop Size: " << InnerLoopSizeIC << "\n"); + + if (!InnerLoopSizeIC.isValid() || !OuterLoopSizeIC.isValid()) { + LLVM_DEBUG(dbgs() << " Not unrolling loop which contains instructions" + << " with invalid cost.\n"); + return LoopUnrollResult::Unmodified; + } + unsigned InnerLoopSize = *InnerLoopSizeIC.getValue(); + unsigned OuterLoopSize = *OuterLoopSizeIC.getValue(); + if (NotDuplicatable) { LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable " "instructions.\n"); diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -663,7 +663,7 @@ } /// ApproximateLoopSize - Approximate the size of the loop. -unsigned llvm::ApproximateLoopSize( +InstructionCost llvm::ApproximateLoopSize( const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, const SmallPtrSetImpl &EphValues, unsigned BEInsns) { @@ -674,9 +674,7 @@ NotDuplicatable = Metrics.notDuplicatable; Convergent = Metrics.convergent; - // FIXME: This will crash for invalid InstructionCost, we should update the - // callers to gracefully bailout in this case. - unsigned LoopSize = *Metrics.NumInsts.getValue(); + InstructionCost LoopSize = Metrics.NumInsts; // Don't allow an estimate of size zero. This would allows unrolling of loops // with huge iteration counts, which is a compile time problem even if it's @@ -684,7 +682,9 @@ // that each loop has at least three instructions (likely a conditional // branch, a comparison feeding that branch, and some kind of loop increment // feeding that comparison instruction). - LoopSize = std::max(LoopSize, BEInsns + 1); + if (LoopSize.isValid() && *LoopSize.getValue() < BEInsns + 1) + // This is an open coded max() on InstructionCost + LoopSize = BEInsns + 1; return LoopSize; } @@ -1191,10 +1191,18 @@ SmallPtrSet EphValues; CodeMetrics::collectEphemeralValues(L, &AC, EphValues); - unsigned LoopSize = + InstructionCost LoopSizeIC = ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, EphValues, UP.BEInsns); - LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); + LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSizeIC << "\n"); + + if (!LoopSizeIC.isValid()) { + LLVM_DEBUG(dbgs() << " Not unrolling loop which contains instructions" + << " with invalid cost.\n"); + return LoopUnrollResult::Unmodified; + } + unsigned LoopSize = *LoopSizeIC.getValue(); + if (NotDuplicatable) { LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable" << " instructions.\n"); diff --git a/llvm/test/Transforms/LoopUnroll/RISCV/invalid-cost.ll b/llvm/test/Transforms/LoopUnroll/RISCV/invalid-cost.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/RISCV/invalid-cost.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -S -mtriple=riscv64 -loop-unroll | FileCheck %s + +; Demonstrate handling of invalid costs in LoopUnroll. This test uses +; scalable vectors on RISCV w/o +V to create a situation where a construct +; can not be lowered, and is thus invalid regardless of what the target +; does or does not implement in terms of a cost model. + +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" +target triple = "riscv64-unknown-unknown" + +define void @invalid(* %p) nounwind ssp { +; CHECK-LABEL: @invalid( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[A:%.*]] = load , * [[P:%.*]], align 1 +; CHECK-NEXT: [[B:%.*]] = add [[A]], [[A]] +; CHECK-NEXT: store [[B]], * [[P]], align 1 +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 10 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %a = load , * %p + %b = add %a, %a + store %b, * %p + %inc = add nsw i32 %i.0, 1 + %cmp = icmp slt i32 %i.0, 10 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + + +declare void @f()