diff --git a/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h b/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h --- a/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h +++ b/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h @@ -5,6 +5,11 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +// +// This file contains two Pass: a LoopPass and a FunctionPass. The LoopPass +// applies both trivial and non-trivial LoopUnswitch, while the FunctionPass +// only applies non-trivial LoopUnswitch. +//===----------------------------------------------------------------------===// #ifndef LLVM_TRANSFORMS_SCALAR_SIMPLELOOPUNSWITCH_H #define LLVM_TRANSFORMS_SCALAR_SIMPLELOOPUNSWITCH_H @@ -79,6 +84,16 @@ function_ref MapClassName2PassName); }; +// This pass only applies non-trivial LoopUnswitch, and share code with the +// above LoopPass implementation. We re-implement the non-trivial LoopUnswitch +// with FunctionPass becuase we may need FunctionAnalysis to apply +// non-trivial LoopUnswitch, +class FuncSimpleLoopUnswitchPass + : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + /// Create the legacy pass object for the simple loop unswitcher. /// /// See the documentaion for `SimpleLoopUnswitchPass` for details. diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -359,6 +359,7 @@ FUNCTION_PASS("scalarizer", ScalarizerPass()) FUNCTION_PASS("separate-const-offset-from-gep", SeparateConstOffsetFromGEPPass()) FUNCTION_PASS("sccp", SCCPPass()) +FUNCTION_PASS("loop-unswitch-func", FuncSimpleLoopUnswitchPass()) FUNCTION_PASS("sink", SinkingPass()) FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass()) FUNCTION_PASS("slsr", StraightLineStrengthReducePass()) diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -1,10 +1,15 @@ -///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===// +///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow +///---------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +// +// This file implements LoopUnswitch for both trivial and non-trivial cases +// +//===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/ADT/DenseMap.h" @@ -3169,6 +3174,93 @@ return PA; } +PreservedAnalyses FuncSimpleLoopUnswitchPass::run(Function &F, + FunctionAnalysisManager &AM) { + LoopInfo &LI = AM.getResult(F); + auto &DT = AM.getResult(F); + auto &SE = AM.getResult(F); + auto MSSA = &AM.getResult(F).getMSSA(); + Optional MSSAU; + if (MSSA) { + MSSAU = MemorySSAUpdater(MSSA); + if (VerifyMemorySSA) + MSSA->verifyMemorySSA(); + } + bool Changed = false; + for (const auto &L : LI) { + Changed |= simplifyLoop(L, &DT, &LI, &SE, nullptr, MSSAU.getPointer(), + /*PreserveLCSSA=*/false); + Changed |= formLCSSARecursively(*L, DT, &LI, &SE); + } + + SmallPriorityWorklist Worklist; + appendLoopsToWorklist(LI, Worklist); + while (!Worklist.empty()) { + Loop &L = *Worklist.pop_back_val(); + LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L + << "\n"); + + // Save the current loop name in a variable so that we can report it even + // after it has been deleted. + std::string LoopName = std::string(L.getName()); + + auto &LAM = AM.getResult(F).getManager(); + + auto UnswitchCB = [&L, &LAM, &LoopName, &Worklist]( + bool CurrentLoopValid, bool PartiallyInvariant, + ArrayRef NewLoops) { + // If we did a non-trivial unswitch, we have added new (cloned) loops. + if (!NewLoops.empty()) + appendLoopsToWorklist(NewLoops, Worklist); + + // If the current loop remains valid, we should revisit it to catch any + // other unswitch opportunities. Otherwise, we need to mark it as deleted. + if (CurrentLoopValid) { + if (PartiallyInvariant) { + // Mark the new loop as partially unswitched, to avoid unswitching on + // the same condition again. + auto &Context = L.getHeader()->getContext(); + MDNode *DisableUnswitchMD = MDNode::get( + Context, + MDString::get(Context, "llvm.loop.unswitch.partial.disable")); + MDNode *NewLoopID = makePostTransformationMetadata( + Context, L.getLoopID(), {"llvm.loop.unswitch.partial"}, + {DisableUnswitchMD}); + L.setLoopID(NewLoopID); + } else + Worklist.insert(&L); + } else + LAM.clear(L, LoopName); + }; + + auto DestroyLoopCB = [&LAM](Loop &L, StringRef Name) { + LAM.clear(L, Name); + }; + Changed |= unswitchLoop( + L, AM.getResult(F), + AM.getResult(F), AM.getResult(F), + AM.getResult(F), AM.getResult(F), + /*Trivial=*/false, /*NonTrivial=*/true, + UnswitchCB, &AM.getResult(F), + MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, DestroyLoopCB); + } + if (!Changed) + return PreservedAnalyses::all(); + + if (MSSA && VerifyMemorySSA) + MSSA->verifyMemorySSA(); + + // Historically this pass has had issues with the dominator tree so verify it + // in asserts builds. + assert(AM.getResult(F).verify( + DominatorTree::VerificationLevel::Fast)); + + auto PA = getLoopPassPreservedAnalyses(); + if (MSSA) + PA.preserve(); + return PA; +} + void SimpleLoopUnswitchPass::printPipeline( raw_ostream &OS, function_ref MapClassName2PassName) { static_cast *>(this)->printPipeline( diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch-nested.ll b/llvm/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch-nested.ll --- a/llvm/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch-nested.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch-nested.ll @@ -18,6 +18,10 @@ ; RUN: -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=16 \ ; RUN: -passes='loop-mssa(simple-loop-unswitch),print' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1 ; +; RUN: opt < %s -enable-unswitch-cost-multiplier=true \ +; RUN: -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=16 \ +; RUN: -passes='loop-unswitch-func,print' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1 +; ; When we relax the candidates part of a multiplier formula ; (unscaled candidates == 4) we start getting some unswitches, ; which leads to siblings multiplier kicking in. diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch.ll --- a/llvm/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch.ll @@ -18,6 +18,10 @@ ; RUN: -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=8 \ ; RUN: -passes='loop-mssa(simple-loop-unswitch),print' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1 ; +; RUN: opt < %s -enable-unswitch-cost-multiplier=true \ +; RUN: -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=8 \ +; RUN: -passes='loop-unswitch-func,print' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1 +; ; With relaxed candidates multiplier (unscaled candidates == 8) we should allow ; some unswitches to happen until siblings multiplier starts kicking in: ; With relaxed candidates multiplier (unscaled candidates == 8) we should allow @@ -31,6 +35,10 @@ ; RUN: -unswitch-num-initial-unscaled-candidates=8 -unswitch-siblings-toplevel-div=1 \ ; RUN: -passes='loop-mssa(simple-loop-unswitch),print' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP5 ; +; RUN: opt < %s -enable-unswitch-cost-multiplier=true \ +; RUN: -unswitch-num-initial-unscaled-candidates=8 -unswitch-siblings-toplevel-div=1 \ +; RUN: -passes='loop-unswitch-func,print' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP5 +; ; With relaxed candidates multiplier (unscaled candidates == 8) and with relaxed ; siblings multiplier for top-level loops (toplevel-div == 8) we should get ; 2^(num conds) == 2^5 == 32 @@ -44,6 +52,10 @@ ; RUN: -unswitch-num-initial-unscaled-candidates=8 -unswitch-siblings-toplevel-div=8 \ ; RUN: -passes='loop-mssa(simple-loop-unswitch),print' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP32 ; +; RUN: opt < %s -enable-unswitch-cost-multiplier=true \ +; RUN: -unswitch-num-initial-unscaled-candidates=8 -unswitch-siblings-toplevel-div=8 \ +; RUN: -passes='loop-unswitch-func,print' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP32 +; ; Similarly get ; 2^(num conds) == 2^5 == 32 ; copies of the loop when cost multiplier is disabled: diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch2.ll b/llvm/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch2.ll --- a/llvm/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch2.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch2.ll @@ -43,6 +43,9 @@ ; RUN: opt < %s -enable-unswitch-cost-multiplier=false \ ; RUN: -passes='loop-mssa(simple-loop-unswitch),print' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1 ; +; RUN: opt < %s -enable-unswitch-cost-multiplier=false \ +; RUN: -passes='loop-unswitch-func,print' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1 +; ; ; Single loop, not unswitched ; LOOP1: Loop at depth 1 containing: diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-cost.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-cost.ll --- a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-cost.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-cost.ll @@ -3,6 +3,7 @@ ; RUN: opt -passes='loop(simple-loop-unswitch),verify' -unswitch-threshold=5 -S < %s | FileCheck %s ; RUN: opt -passes='loop-mssa(simple-loop-unswitch),verify' -unswitch-threshold=5 -S < %s | FileCheck %s ; RUN: opt -simple-loop-unswitch -enable-nontrivial-unswitch -unswitch-threshold=5 -verify-memoryssa -S < %s | FileCheck %s +; RUN: opt -passes='loop-unswitch-func,verify' -unswitch-threshold=5 -S < %s | FileCheck %s declare void @a() declare void @b() diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-freeze.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-freeze.ll --- a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-freeze.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-freeze.ll @@ -2,6 +2,7 @@ ; RUN: opt -freeze-loop-unswitch-cond -passes='loop(simple-loop-unswitch),verify' -S < %s | FileCheck %s ; RUN: opt -freeze-loop-unswitch-cond -passes='loop-mssa(simple-loop-unswitch),verify' -S < %s | FileCheck %s ; RUN: opt -freeze-loop-unswitch-cond -simple-loop-unswitch -enable-nontrivial-unswitch -verify-memoryssa -S < %s | FileCheck %s +; RUN: opt -freeze-loop-unswitch-cond -passes='loop-unswitch-func,verify' -S < %s | FileCheck %s declare i32 @a() declare i32 @b() diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-markloopasdeleted.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-markloopasdeleted.ll --- a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-markloopasdeleted.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-markloopasdeleted.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -enable-loop-distribute -passes='loop-distribute,loop-mssa(simple-loop-unswitch),loop-distribute' -o /dev/null -S -debug-pass-manager=verbose 2>&1 | FileCheck %s - +; RUN: opt < %s -enable-loop-distribute -passes='loop-distribute,loop-mssa(simple-loop-unswitch),loop-distribute' -o /dev/null -S -debug-pass-manager=verbose 2>&1 | FileCheck %s --check-prefixes=LOOPPASS +; RUN: opt < %s -enable-loop-distribute -passes='loop-distribute,loop-unswitch-func,loop-distribute' -o /dev/null -S -debug-pass-manager=verbose 2>&1 | FileCheck %s --check-prefixes=FUNCPASS ; Running loop-distribute will result in LoopAccessAnalysis being required and ; cached in the LoopAnalysisManagerFunctionProxy. @@ -17,8 +17,9 @@ ; SimpleLoopUnswitch not marking the Loop as removed, so we missed clearing ; the analysis caches. ; -; CHECK: Running pass: SimpleLoopUnswitchPass on Loop at depth 1 containing: %loop_begin
,%loop_b,%loop_b_inner,%loop_b_inner_exit,%loop_a,%loop_a_inner,%loop_a_inner_exit,%latch -; CHECK-NEXT: Clearing all analysis results for: loop_a_inner +; LOOPPASS: Running pass: SimpleLoopUnswitchPass on Loop at depth 1 containing: %loop_begin
,%loop_b,%loop_b_inner,%loop_b_inner_exit,%loop_a,%loop_a_inner,%loop_a_inner_exit,%latch +; FUNCPASS: Running pass: FuncSimpleLoopUnswitchPass on test6 +; CHECK: Clearing all analysis results for: loop_a_inner ; When running loop-distribute the second time we can see that loop_a_inner diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-redundant-switch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-redundant-switch.ll --- a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-redundant-switch.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-redundant-switch.ll @@ -2,6 +2,7 @@ ; RUN: opt -passes='simple-loop-unswitch' -disable-output -S < %s ; RUN: opt -passes='loop-mssa(simple-loop-unswitch)' -disable-output -S < %s ; RUN: opt -simple-loop-unswitch -enable-nontrivial-unswitch -disable-output -S < %s +; RUN: opt -passes='loop-unswitch-func' -disable-output -S < %s ; This loop shouldn't trigger asserts in SimpleLoopUnswitch. define void @test_redundant_switch(i1* %ptr, i32 %cond) { diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-trivial-select.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-trivial-select.ll --- a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-trivial-select.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-trivial-select.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -passes='simple-loop-unswitch' -S < %s | FileCheck %s +; RUN: opt -passes='loop-unswitch-func' -S < %s | FileCheck %s ; If we try to replace uses of `true` outside of `@foo`, we'll see it here. define i1 @bar() {