diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h --- a/llvm/include/llvm/Support/MathExtras.h +++ b/llvm/include/llvm/Support/MathExtras.h @@ -732,6 +732,11 @@ return alignTo(Numerator, Denominator) / Denominator; } +/// Returns the integer nearest(Numerator / Denominator). +inline uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator) { + return (Numerator + (Denominator / 2)) / Denominator; +} + /// Returns the largest uint64_t less than or equal to \p Value and is /// \p Skew mod \p Align. \p Align must be non-zero inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) { diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -703,19 +703,20 @@ // To estimate the number of times the loop body was executed, we want to // know the number of times the backedge was taken, vs. the number of times // we exited the loop. - uint64_t TrueVal, FalseVal; - if (!LatchBR->extractProfMetadata(TrueVal, FalseVal)) + uint64_t LatchCycleWeight, LatchExitWeight; + if (!LatchBR->extractProfMetadata(LatchCycleWeight, LatchExitWeight)) return None; - if (!TrueVal || !FalseVal) + if (LatchBR->getSuccessor(0) != L->getHeader()) + std::swap(LatchCycleWeight, LatchExitWeight); + + if (!LatchCycleWeight || !LatchExitWeight) return 0; - // Divide the count of the backedge by the count of the edge exiting the loop, - // rounding to nearest. - if (LatchBR->getSuccessor(0) == L->getHeader()) - return (TrueVal + (FalseVal / 2)) / FalseVal; - else - return (FalseVal + (TrueVal / 2)) / TrueVal; + // Trip count is a ratio of loop header block execution weight to loop exit + // weight. + uint64_t HeaderBlockWeight = LatchCycleWeight + LatchExitWeight; + return llvm::divideNearest(HeaderBlockWeight, LatchExitWeight); } bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop, diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-1.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-1.ll --- a/llvm/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-1.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-1.ll @@ -11,7 +11,7 @@ define void @test1(i32 %k) !prof !4 { ; CHECK: Loop Unroll: F[test1] Loop %for.body ; CHECK: PEELING loop %for.body with iteration count 2! -; CHECK: PEELING loop %for.body with iteration count 4! +; CHECK: PEELING loop %for.body with iteration count 5! ; CHECK: llvm.loop.unroll.disable for.body.lr.ph: br label %for.body diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom-2.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom-2.ll --- a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom-2.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom-2.ll @@ -5,7 +5,7 @@ ; Regression test for setting the correct idom for exit blocks. ; CHECK: Loop Unroll: F[basic] -; CHECK: PEELING loop %for.body with iteration count 1! +; CHECK: PEELING loop %for.body with iteration count 2! define i32 @basic(i32* %p, i32 %k, i1 %c1, i1 %c2) #0 !prof !3 { entry: diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom.ll --- a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom.ll @@ -5,7 +5,7 @@ ; Regression test for setting the correct idom for exit blocks. ; CHECK: Loop Unroll: F[basic] -; CHECK: PEELING loop %for.body with iteration count 1! +; CHECK: PEELING loop %for.body with iteration count 2! define i32 @basic(i32* %p, i32 %k, i1 %c1, i1 %c2) #0 !prof !3 { entry: diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll --- a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll @@ -8,7 +8,7 @@ ; All side exits to deopt does not change weigths. ; CHECK: Loop Unroll: F[basic] -; CHECK: PEELING loop %for.body with iteration count 3! +; CHECK: PEELING loop %for.body with iteration count 4! ; CHECK-NO-PEEL-NOT: PEELING loop %for.body ; CHECK-LABEL: @basic ; CHECK: br i1 %c, label %{{.*}}, label %side_exit, !prof !15 diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll --- a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll @@ -9,7 +9,7 @@ ; from the loop, and update the branch weights for the peeled loop properly. ; CHECK: Loop Unroll: F[basic] -; CHECK: PEELING loop %for.body with iteration count 3! +; CHECK: PEELING loop %for.body with iteration count 4! ; CHECK: Loop Unroll: F[optsize] ; CHECK-NOT: PEELING diff --git a/llvm/unittests/Transforms/Utils/CMakeLists.txt b/llvm/unittests/Transforms/Utils/CMakeLists.txt --- a/llvm/unittests/Transforms/Utils/CMakeLists.txt +++ b/llvm/unittests/Transforms/Utils/CMakeLists.txt @@ -15,6 +15,7 @@ FunctionComparatorTest.cpp IntegerDivisionTest.cpp LocalTest.cpp + LoopUtilsTest.cpp SizeOptsTest.cpp SSAUpdaterBulkTest.cpp UnrollLoopTest.cpp diff --git a/llvm/unittests/Transforms/Utils/LoopUtilsTest.cpp b/llvm/unittests/Transforms/Utils/LoopUtilsTest.cpp new file mode 100644 --- /dev/null +++ b/llvm/unittests/Transforms/Utils/LoopUtilsTest.cpp @@ -0,0 +1,118 @@ +//===--------- LoopUtilsTest.cpp - unit tests -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/SourceMgr.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { +struct LoopUtilsPass : public PassInfoMixin { + template LoopUtilsPass(T &&Arg) : Func(std::forward(Arg)) {} + + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U) { + return Func(L, AM, AR, U); + } + + std::function + Func; +}; + +struct LoopUtilsTest : public testing::Test { + + PassBuilder PB; + LoopAnalysisManager LAM; + FunctionAnalysisManager FAM; + CGSCCAnalysisManager CGAM; + ModuleAnalysisManager MAM; + LLVMContext Context; + std::unique_ptr M; + + LoopUtilsTest() { + // Register all the basic analyses with the managers. + PB.registerModuleAnalyses(MAM); + PB.registerCGSCCAnalyses(CGAM); + PB.registerFunctionAnalyses(FAM); + PB.registerLoopAnalyses(LAM); + PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); + } + + void ParseAssembly(const char *IR) { + SMDiagnostic Error; + M = parseAssemblyString(IR, Error, Context); + + std::string errMsg; + raw_string_ostream os(errMsg); + Error.print("", os); + + // A failure here means that the test itself is buggy. + if (!M) + report_fatal_error(os.str().c_str()); + } +}; + +TEST_F(LoopUtilsTest, getLoopEstimatedTripCountTest1) { + ParseAssembly(R"IR( + @a = dso_local global [1024 x i32] zeroinitializer, align 16 + @b = dso_local global [1024 x i32] zeroinitializer, align 16 + define dso_local void @_Z3foov() { + entry: + br label %for.body + for.cond.cleanup: ; preds = %for.body + ret void + for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @b, i64 0, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %1 = trunc i64 %indvars.iv to i32 + %mul = mul nsw i32 %0, %1 + %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @a, i64 0, i64 %indvars.iv + %2 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %2, %mul + store i32 %add, i32* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.cond.cleanup, label %for.body, !prof !1} + + !1 = !{!"branch_weights", i32 1, i32 15 })IR"); + + + LoopPassManager LPM; + LPM.addPass(LoopUtilsPass([&](Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U) { + auto TC = getLoopEstimatedTripCount(&L); + EXPECT_TRUE(TC.hasValue()); + EXPECT_EQ(*TC, (unsigned)16); + return PreservedAnalyses::all(); + })); + + FunctionPassManager FPM; + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM))); + + ModulePassManager MPM; + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + + MPM.run(*M, MAM); +} +}