diff --git a/llvm/include/llvm/Transforms/Utils/LoopPeel.h b/llvm/include/llvm/Transforms/Utils/LoopPeel.h --- a/llvm/include/llvm/Transforms/Utils/LoopPeel.h +++ b/llvm/include/llvm/Transforms/Utils/LoopPeel.h @@ -18,7 +18,7 @@ namespace llvm { -bool canPeel(Loop *L); +bool canPeel(Loop *L, DominatorTree &DT, LoopInfo &LI); bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA); @@ -32,7 +32,7 @@ void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, - unsigned TripCount, DominatorTree &DT, + unsigned TripCount, DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE, unsigned Threshold = UINT_MAX); } // end namespace llvm diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp --- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp @@ -180,17 +180,20 @@ /// are used to establish ordering of the FusionCandidates based on dominance. const DominatorTree *DT; const PostDominatorTree *PDT; + /// Needed to identify if peeling is valid. + LoopInfo *LI; OptimizationRemarkEmitter &ORE; FusionCandidate(Loop *L, const DominatorTree *DT, - const PostDominatorTree *PDT, OptimizationRemarkEmitter &ORE, - TTI::PeelingPreferences PP) + const PostDominatorTree *PDT, LoopInfo *LI, + OptimizationRemarkEmitter &ORE, TTI::PeelingPreferences PP) : Preheader(L->getLoopPreheader()), Header(L->getHeader()), ExitingBlock(L->getExitingBlock()), ExitBlock(L->getExitBlock()), Latch(L->getLoopLatch()), L(L), Valid(true), - GuardBranch(L->getLoopGuardBranch()), PP(PP), AbleToPeel(canPeel(L)), - Peeled(false), DT(DT), PDT(PDT), ORE(ORE) { + GuardBranch(L->getLoopGuardBranch()), PP(PP), + AbleToPeel(canPeel(L, *(const_cast(DT)), *LI)), + Peeled(false), DT(DT), PDT(PDT), LI(LI), ORE(ORE) { // Walk over all blocks in the loop and check for conditions that may // prevent fusion. For each block, walk over all instructions and collect @@ -645,7 +648,7 @@ for (Loop *L : LV) { TTI::PeelingPreferences PP = gatherPeelingPreferences(L, SE, TTI, None, None); - FusionCandidate CurrCand(L, &DT, &PDT, ORE, PP); + FusionCandidate CurrCand(L, &DT, &PDT, &LI, ORE, PP); if (!CurrCand.isEligibleForFusion(SE)) continue; @@ -990,7 +993,7 @@ FuseCounter); FusionCandidate FusedCand( - performFusion((Peel ? FC0Copy : *FC0), *FC1), &DT, &PDT, ORE, + performFusion((Peel ? FC0Copy : *FC0), *FC1), &DT, &PDT, &LI, ORE, FC0Copy.PP); FusedCand.verify(); assert(FusedCand.isEligibleForFusion(SE) && diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -979,7 +979,7 @@ } // 5th priority is loop peeling. - computePeelCount(L, LoopSize, PP, TripCount, DT, SE, UP.Threshold); + computePeelCount(L, LoopSize, PP, TripCount, DT, *LI, SE, UP.Threshold); if (PP.PeelCount) { UP.Runtime = false; UP.Count = 1; diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -77,11 +77,16 @@ static const char *PeeledCountMetaData = "llvm.loop.peeled.count"; // Check whether we are capable of peeling this loop. -bool llvm::canPeel(Loop *L) { +bool llvm::canPeel(Loop *L, DominatorTree &DT, LoopInfo &LI) { // Make sure the loop is in simplified form if (!L->isLoopSimplifyForm()) return false; + // We require the loop to be in LCSSA form when we peel and map values from + // peeled region to their uses outside the loop. + if (!L->isRecursivelyLCSSAForm(DT, LI)) + return false; + // Don't try to peel loops where the latch is not the exiting block. // This can be an indication of two different things: // 1) The loop is not rotated. @@ -359,14 +364,14 @@ // Return the number of iterations we want to peel off. void llvm::computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, - unsigned TripCount, DominatorTree &DT, + unsigned TripCount, DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE, unsigned Threshold) { assert(LoopSize > 0 && "Zero loop size is not allowed!"); // Save the PP.PeelCount value set by the target in // TTI.getPeelingPreferences or by the flag -unroll-peel-count. unsigned TargetPeelCount = PP.PeelCount; PP.PeelCount = 0; - if (!canPeel(L)) + if (!canPeel(L, DT, LI)) return; // Only try to peel innermost loops by default. @@ -740,7 +745,7 @@ ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA) { assert(PeelCount > 0 && "Attempt to peel out zero iterations?"); - assert(canPeel(L) && "Attempt to peel a loop which is not peelable?"); + assert(canPeel(L, *DT, *LI) && "Attempt to peel a loop which is not peelable?"); LoopBlocksDFS LoopBlocks(L); LoopBlocks.perform(LI); diff --git a/llvm/unittests/Transforms/Utils/CMakeLists.txt b/llvm/unittests/Transforms/Utils/CMakeLists.txt --- a/llvm/unittests/Transforms/Utils/CMakeLists.txt +++ b/llvm/unittests/Transforms/Utils/CMakeLists.txt @@ -18,6 +18,7 @@ FunctionComparatorTest.cpp IntegerDivisionTest.cpp LocalTest.cpp + LoopPeelTest.cpp LoopRotationUtilsTest.cpp LoopUtilsTest.cpp ModuleUtilsTest.cpp diff --git a/llvm/unittests/Transforms/Utils/LoopPeelTest.cpp b/llvm/unittests/Transforms/Utils/LoopPeelTest.cpp new file mode 100644 --- /dev/null +++ b/llvm/unittests/Transforms/Utils/LoopPeelTest.cpp @@ -0,0 +1,88 @@ +//===- LoopPeelUtilsTest.cpp - Unit tests for LoopRotation utility ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/LoopPeel.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "gtest/gtest.h" + +using namespace llvm; + +static std::unique_ptr parseIR(LLVMContext &C, const char *IR) { + SMDiagnostic Err; + std::unique_ptr Mod = parseAssemblyString(IR, Err, C); + if (!Mod) + Err.print("LoopPeelUtilsTest", errs()); + return Mod; +} + +/// This test contains loop that is not in LCSSA form. We cannot peel such +/// loops, unless they are in LCSSA form. +TEST(LoopPeel, NonLCSSAForm) { + LLVMContext C; + + std::unique_ptr M = parseIR(C, + R"( +declare i32 @llvm.experimental.deoptimize.i32(...) + +define i32 @test(i32 * nonnull %a, i64 %x) { +entry: + br label %for.cond1 + +for.cond1: + %idx = phi i64 [ 0, %entry ], [ %idx.next, %for.tail ] + %sum = phi i32 [ 0, %entry ], [ %sum.next, %for.tail ] + %a.idx = getelementptr inbounds i32, i32 *%a, i64 %idx + %val.a.idx = load i32, i32* %a.idx, align 4 + %zero.check = icmp eq i32 %val.a.idx, 0 + br i1 %zero.check, label %deopt.exit, label %for.tail + +for.tail: + %sum.next = add i32 %sum, %val.a.idx + %idx.next = add nuw nsw i64 %idx, 1 + %for.check = icmp ult i64 %idx, %x + br i1 %for.check, label %for.cond1, label %return + +return: + ret i32 %sum + +deopt.exit: + %deopt.val = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %val.a.idx) ] + ret i32 %deopt.val +})"); + + auto *F = M->getFunction("test"); + DominatorTree DT(*F); + LoopInfo LI(DT); + AssumptionCache AC(*F); + TargetLibraryInfoImpl TLII; + TargetLibraryInfo TLI(TLII); + ScalarEvolution SE(*F, TLI, AC, DT, LI); + + Loop *L = *LI.begin(); + + EXPECT_FALSE(L->isLCSSAForm(DT)); + bool ret = canPeel(L, DT, LI); + // We cannot peel loops that are not in LCSSA form. + EXPECT_FALSE(ret); + formLCSSARecursively(*L, DT, &LI, &SE); + bool ret2 = canPeel(L, DT, LI); + EXPECT_TRUE(ret2); + bool result = + peelLoop(L, 1 /*PeelCount*/, &LI, &SE, &DT, &AC, /*preserveLCSSA*/ true); + EXPECT_TRUE(result); +}