Index: clang/test/Frontend/optimization-remark-analysis.c =================================================================== --- clang/test/Frontend/optimization-remark-analysis.c +++ clang/test/Frontend/optimization-remark-analysis.c @@ -1,8 +1,8 @@ // RUN: %clang -O1 -fvectorize -target x86_64-unknown-unknown -emit-llvm -Rpass-analysis -S %s -o - 2>&1 | FileCheck %s --check-prefix=RPASS // RUN: %clang -O1 -fvectorize -target x86_64-unknown-unknown -emit-llvm -S %s -o - 2>&1 | FileCheck %s -// RPASS: {{.*}}:7:8: remark: loop not vectorized: loop contains a switch statement -// CHECK-NOT: {{.*}}:7:8: remark: loop not vectorized: loop contains a switch statement +// RPASS: {{.*}}:7:8: remark: loop not vectorized: value that could not be identified as reduction is used outside the loop +// CHECK-NOT: {{.*}}:7:8: remark: loop not vectorized: value that could not be identified as reduction is used outside the loop double foo(int N, int *Array) { double v = 0.0; Index: llvm/include/llvm/Transforms/Utils/LowerSwitch.h =================================================================== --- llvm/include/llvm/Transforms/Utils/LowerSwitch.h +++ llvm/include/llvm/Transforms/Utils/LowerSwitch.h @@ -18,7 +18,13 @@ #include "llvm/IR/PassManager.h" namespace llvm { +class LoopInfo; + struct LowerSwitchPass : public PassInfoMixin { + bool LoopUnswitch; + LoopInfo *LI = nullptr; + LowerSwitchPass() : LoopUnswitch(false) {} + LowerSwitchPass(bool LoopUnswitch) : LoopUnswitch(LoopUnswitch) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; } // namespace llvm Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -1203,6 +1203,7 @@ /// TODO: Should LTO cause any differences to this set of passes? void PassBuilder::addVectorPasses(OptimizationLevel Level, FunctionPassManager &FPM, bool IsFullLTO) { + FPM.addPass(LowerSwitchPass(true)); FPM.addPass(LoopVectorizePass( LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization))); Index: llvm/lib/Transforms/Scalar/StructurizeCFG.cpp =================================================================== --- llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -348,7 +348,6 @@ AU.addRequiredID(LowerSwitchID); AU.addRequired(); - AU.addPreserved(); RegionPass::getAnalysisUsage(AU); } }; Index: llvm/lib/Transforms/Utils/FixIrreducible.cpp =================================================================== --- llvm/lib/Transforms/Utils/FixIrreducible.cpp +++ llvm/lib/Transforms/Utils/FixIrreducible.cpp @@ -90,8 +90,6 @@ AU.addRequired(); AU.addRequired(); AU.addPreservedID(LowerSwitchID); - AU.addPreserved(); - AU.addPreserved(); } bool runOnFunction(Function &F) override; Index: llvm/lib/Transforms/Utils/LowerSwitch.cpp =================================================================== --- llvm/lib/Transforms/Utils/LowerSwitch.cpp +++ llvm/lib/Transforms/Utils/LowerSwitch.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -32,6 +33,7 @@ #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/KnownBits.h" @@ -49,6 +51,10 @@ #define DEBUG_TYPE "lower-switch" +static cl::opt + ForceLoopUnswitch("force-loop-unswitch", cl::Hidden, cl::init(false), + cl::desc("Unswitch simple switches in loops")); + namespace { struct IntRange { @@ -106,6 +112,45 @@ return O << "]"; } +namespace { +class LowerSwitch { + +private: + LoopInfo *LI; + bool LoopUnswitch; + +public: + LowerSwitch(LoopInfo *LI, bool &LoopUnswitch) + : LI(LI), LoopUnswitch(LoopUnswitch) {} + + bool run(); + + void FixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB, + const unsigned NumMergedCases); + + BasicBlock *NewLeafBlock(CaseRange &Leaf, Value *Val, ConstantInt *LowerBound, + ConstantInt *UpperBound, BasicBlock *OrigBlock, + BasicBlock *Default); + + BasicBlock *SimpleSwitchConvert(SwitchInst *SI, BasicBlock *OrigBlock, + BasicBlock *DefaultBlock); + + BasicBlock *SwitchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, + ConstantInt *UpperBound, Value *Val, + BasicBlock *Predecessor, BasicBlock *OrigBlock, + BasicBlock *Default, + const std::vector &UnreachableRanges); + + unsigned Clusterify(CaseVector &Cases, SwitchInst *SI); + + void ProcessSwitchInst(SwitchInst *SI, + SmallPtrSetImpl &DeleteList, + AssumptionCache *AC, LazyValueInfo *LVI); + + bool LowerSwitches(Function &F, LazyValueInfo *LVI, AssumptionCache *AC); +}; +} // namespace + /// Update the first occurrence of the "switch statement" BB in the PHI /// node with the "new" BB. The other occurrences will: /// @@ -116,7 +161,7 @@ /// 2) Removed if subsequent incoming values now share the same case, i.e., /// multiple outcome edges are condensed into one. This is necessary to keep the /// number of phi values equal to the number of branches to SuccBB. -void FixPhis( +void LowerSwitch::FixPhis( BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB, const unsigned NumMergedCases = std::numeric_limits::max()) { for (BasicBlock::iterator I = SuccBB->begin(), @@ -153,7 +198,7 @@ /// switch's value == the case's value. If not, then it jumps to the default /// branch. At this point in the tree, the value can't be another valid case /// value, so the jump to the "default" branch is warranted. -BasicBlock *NewLeafBlock(CaseRange &Leaf, Value *Val, ConstantInt *LowerBound, +BasicBlock *LowerSwitch::NewLeafBlock(CaseRange &Leaf, Value *Val, ConstantInt *LowerBound, ConstantInt *UpperBound, BasicBlock *OrigBlock, BasicBlock *Default) { Function *F = OrigBlock->getParent(); @@ -213,16 +258,32 @@ return NewLeaf; } +BasicBlock *LowerSwitch::SimpleSwitchConvert(SwitchInst *SI, + BasicBlock *OrigBlock, + BasicBlock *DefaultBlock) { + BasicBlock *FalseDest = DefaultBlock; + + for (auto CI : SI->cases()) { + BasicBlock *TrueDest = CI.getCaseSuccessor(); + CaseRange Case = CaseRange(CI.getCaseValue(), CI.getCaseValue(), TrueDest); + FalseDest = NewLeafBlock(Case, SI->getCondition(), Case.Low, Case.High, + OrigBlock, FalseDest); + } + + return FalseDest; +} + /// Convert the switch statement into a binary lookup of the case values. /// The function recursively builds this tree. LowerBound and UpperBound are /// used to keep track of the bounds for Val that have already been checked by /// a block emitted by one of the previous calls to switchConvert in the call /// stack. -BasicBlock *SwitchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, - ConstantInt *UpperBound, Value *Val, - BasicBlock *Predecessor, BasicBlock *OrigBlock, - BasicBlock *Default, - const std::vector &UnreachableRanges) { +BasicBlock * +LowerSwitch::SwitchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, + ConstantInt *UpperBound, Value *Val, + BasicBlock *Predecessor, BasicBlock *OrigBlock, + BasicBlock *Default, + const std::vector &UnreachableRanges) { assert(LowerBound && UpperBound && "Bounds must be initialized"); unsigned Size = End - Begin; @@ -301,7 +362,7 @@ /// Transform simple list of \p SI's cases into list of CaseRange's \p Cases. /// \post \p Cases wouldn't contain references to \p SI's default BB. /// \returns Number of \p SI's cases that do not reference \p SI's default BB. -unsigned Clusterify(CaseVector &Cases, SwitchInst *SI) { +unsigned LowerSwitch::Clusterify(CaseVector &Cases, SwitchInst *SI) { unsigned NumSimpleCases = 0; // Start with "simple" cases @@ -342,9 +403,9 @@ /// Replace the specified switch instruction with a sequence of chained if-then /// insts in a balanced binary search. -void ProcessSwitchInst(SwitchInst *SI, - SmallPtrSetImpl &DeleteList, - AssumptionCache *AC, LazyValueInfo *LVI) { +void LowerSwitch::ProcessSwitchInst(SwitchInst *SI, + SmallPtrSetImpl &DeleteList, + AssumptionCache *AC, LazyValueInfo *LVI) { BasicBlock *OrigBlock = SI->getParent(); Function *F = OrigBlock->getParent(); Value *Val = SI->getCondition(); // The value we are switching on... @@ -374,6 +435,17 @@ return; } + bool SimpleSwitch = true; + for (auto Case : SI->cases()) + if (!SI->findCaseDest(Case.getCaseSuccessor())) + SimpleSwitch = false; + + // If we're running this pass before loop vectorise, we should only + // attempt to convert simple switches which are in a loop + if ((LoopUnswitch || ForceLoopUnswitch) && + (!SimpleSwitch || !LI->getLoopFor(OrigBlock))) + return; + ConstantInt *LowerBound = nullptr; ConstantInt *UpperBound = nullptr; bool DefaultIsUnreachableFromSwitch = false; @@ -418,7 +490,7 @@ std::vector UnreachableRanges; - if (DefaultIsUnreachableFromSwitch) { + if (DefaultIsUnreachableFromSwitch && !(LoopUnswitch || ForceLoopUnswitch)) { DenseMap Popularity; unsigned MaxPop = 0; BasicBlock *PopSucc = nullptr; @@ -500,9 +572,13 @@ F->getBasicBlockList().insert(Default->getIterator(), NewDefault); BranchInst::Create(Default, NewDefault); - BasicBlock *SwitchBlock = - SwitchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val, - OrigBlock, OrigBlock, NewDefault, UnreachableRanges); + BasicBlock *SwitchBlock; + if ((LoopUnswitch || ForceLoopUnswitch) && SimpleSwitch) + SwitchBlock = SimpleSwitchConvert(SI, OrigBlock, NewDefault); + else + SwitchBlock = + SwitchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val, + OrigBlock, OrigBlock, NewDefault, UnreachableRanges); // If there are entries in any PHI nodes for the default edge, make sure // to update them as well. @@ -520,7 +596,8 @@ DeleteList.insert(OldDefault); } -bool LowerSwitch(Function &F, LazyValueInfo *LVI, AssumptionCache *AC) { +bool LowerSwitch::LowerSwitches(Function &F, LazyValueInfo *LVI, + AssumptionCache *AC) { bool Changed = false; SmallPtrSet DeleteList; @@ -552,8 +629,10 @@ public: // Pass identification, replacement for typeid static char ID; + bool LoopUnswitch; - LowerSwitchLegacyPass() : FunctionPass(ID) { + LowerSwitchLegacyPass(bool LoopUnswitch = false) + : FunctionPass(ID), LoopUnswitch(LoopUnswitch) { initializeLowerSwitchLegacyPassPass(*PassRegistry::getPassRegistry()); } @@ -561,6 +640,8 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addRequired(); + AU.addRequired(); } }; @@ -575,6 +656,8 @@ "Lower SwitchInst's to branches", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(LowerSwitchLegacyPass, "lowerswitch", "Lower SwitchInst's to branches", false, false) @@ -585,15 +668,19 @@ bool LowerSwitchLegacyPass::runOnFunction(Function &F) { LazyValueInfo *LVI = &getAnalysis().getLVI(); + auto *LI = &getAnalysis().getLoopInfo(); auto *ACT = getAnalysisIfAvailable(); AssumptionCache *AC = ACT ? &ACT->getAssumptionCache(F) : nullptr; - return LowerSwitch(F, LVI, AC); + LowerSwitch LS = LowerSwitch(LI, LoopUnswitch); + return LS.LowerSwitches(F, LVI, AC); } PreservedAnalyses LowerSwitchPass::run(Function &F, FunctionAnalysisManager &AM) { LazyValueInfo *LVI = &AM.getResult(F); + LoopInfo *LI = &AM.getResult(F); AssumptionCache *AC = AM.getCachedResult(F); - return LowerSwitch(F, LVI, AC) ? PreservedAnalyses::none() - : PreservedAnalyses::all(); + LowerSwitch LS = LowerSwitch(LI, LoopUnswitch); + return LS.LowerSwitches(F, LVI, AC) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); } Index: llvm/lib/Transforms/Utils/UnifyLoopExits.cpp =================================================================== --- llvm/lib/Transforms/Utils/UnifyLoopExits.cpp +++ llvm/lib/Transforms/Utils/UnifyLoopExits.cpp @@ -40,8 +40,6 @@ AU.addRequired(); AU.addRequired(); AU.addPreservedID(LowerSwitchID); - AU.addPreserved(); - AU.addPreserved(); } bool runOnFunction(Function &F) override; Index: llvm/test/CodeGen/AMDGPU/llc-pipeline.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -56,6 +56,8 @@ ; GCN-O0-NEXT: FunctionPass Manager ; GCN-O0-NEXT: AMDGPU Lower Kernel Arguments ; GCN-O0-NEXT: Lazy Value Information Analysis +; GCN-O0-NEXT: Dominator Tree Construction +; GCN-O0-NEXT: Natural Loop Information ; GCN-O0-NEXT: Lower SwitchInst's to branches ; GCN-O0-NEXT: Lower invoke and unwind, for unwindless code generators ; GCN-O0-NEXT: Remove unreachable blocks from the CFG @@ -65,16 +67,22 @@ ; GCN-O0-NEXT: Legacy Divergence Analysis ; GCN-O0-NEXT: Unify divergent function exit nodes ; GCN-O0-NEXT: Lazy Value Information Analysis +; GCN-O0-NEXT: Dominator Tree Construction +; GCN-O0-NEXT: Natural Loop Information ; GCN-O0-NEXT: Lower SwitchInst's to branches ; GCN-O0-NEXT: Dominator Tree Construction ; GCN-O0-NEXT: Natural Loop Information ; GCN-O0-NEXT: Convert irreducible control-flow into natural loops +; GCN-O0-NEXT: Dominator Tree Construction +; GCN-O0-NEXT: Natural Loop Information ; GCN-O0-NEXT: Fixup each natural loop to have a single exit block +; GCN-O0-NEXT: Dominator Tree Construction ; GCN-O0-NEXT: Post-Dominator Tree Construction ; GCN-O0-NEXT: Dominance Frontier Construction ; GCN-O0-NEXT: Detect single entry single exit regions ; GCN-O0-NEXT: Region Pass Manager ; GCN-O0-NEXT: Structurize control flow +; GCN-O0-NEXT: Dominator Tree Construction ; GCN-O0-NEXT: Post-Dominator Tree Construction ; GCN-O0-NEXT: Natural Loop Information ; GCN-O0-NEXT: Legacy Divergence Analysis @@ -223,6 +231,8 @@ ; GCN-O1-NEXT: Natural Loop Information ; GCN-O1-NEXT: CodeGen Prepare ; GCN-O1-NEXT: Lazy Value Information Analysis +; GCN-O1-NEXT: Dominator Tree Construction +; GCN-O1-NEXT: Natural Loop Information ; GCN-O1-NEXT: Lower SwitchInst's to branches ; GCN-O1-NEXT: Lower invoke and unwind, for unwindless code generators ; GCN-O1-NEXT: Remove unreachable blocks from the CFG @@ -241,16 +251,22 @@ ; GCN-O1-NEXT: Legacy Divergence Analysis ; GCN-O1-NEXT: Unify divergent function exit nodes ; GCN-O1-NEXT: Lazy Value Information Analysis +; GCN-O1-NEXT: Dominator Tree Construction +; GCN-O1-NEXT: Natural Loop Information ; GCN-O1-NEXT: Lower SwitchInst's to branches ; GCN-O1-NEXT: Dominator Tree Construction ; GCN-O1-NEXT: Natural Loop Information ; GCN-O1-NEXT: Convert irreducible control-flow into natural loops +; GCN-O1-NEXT: Dominator Tree Construction +; GCN-O1-NEXT: Natural Loop Information ; GCN-O1-NEXT: Fixup each natural loop to have a single exit block +; GCN-O1-NEXT: Dominator Tree Construction ; GCN-O1-NEXT: Post-Dominator Tree Construction ; GCN-O1-NEXT: Dominance Frontier Construction ; GCN-O1-NEXT: Detect single entry single exit regions ; GCN-O1-NEXT: Region Pass Manager ; GCN-O1-NEXT: Structurize control flow +; GCN-O1-NEXT: Dominator Tree Construction ; GCN-O1-NEXT: Post-Dominator Tree Construction ; GCN-O1-NEXT: Natural Loop Information ; GCN-O1-NEXT: Legacy Divergence Analysis @@ -517,16 +533,22 @@ ; GCN-O1-OPTS-NEXT: Legacy Divergence Analysis ; GCN-O1-OPTS-NEXT: Unify divergent function exit nodes ; GCN-O1-OPTS-NEXT: Lazy Value Information Analysis +; GCN-O1-OPTS-NEXT: Dominator Tree Construction +; GCN-O1-OPTS-NEXT: Natural Loop Information ; GCN-O1-OPTS-NEXT: Lower SwitchInst's to branches ; GCN-O1-OPTS-NEXT: Dominator Tree Construction ; GCN-O1-OPTS-NEXT: Natural Loop Information ; GCN-O1-OPTS-NEXT: Convert irreducible control-flow into natural loops +; GCN-O1-OPTS-NEXT: Dominator Tree Construction +; GCN-O1-OPTS-NEXT: Natural Loop Information ; GCN-O1-OPTS-NEXT: Fixup each natural loop to have a single exit block +; GCN-O1-OPTS-NEXT: Dominator Tree Construction ; GCN-O1-OPTS-NEXT: Post-Dominator Tree Construction ; GCN-O1-OPTS-NEXT: Dominance Frontier Construction ; GCN-O1-OPTS-NEXT: Detect single entry single exit regions ; GCN-O1-OPTS-NEXT: Region Pass Manager ; GCN-O1-OPTS-NEXT: Structurize control flow +; GCN-O1-OPTS-NEXT: Dominator Tree Construction ; GCN-O1-OPTS-NEXT: Post-Dominator Tree Construction ; GCN-O1-OPTS-NEXT: Natural Loop Information ; GCN-O1-OPTS-NEXT: Legacy Divergence Analysis @@ -801,16 +823,22 @@ ; GCN-O2-NEXT: Legacy Divergence Analysis ; GCN-O2-NEXT: Unify divergent function exit nodes ; GCN-O2-NEXT: Lazy Value Information Analysis +; GCN-O2-NEXT: Dominator Tree Construction +; GCN-O2-NEXT: Natural Loop Information ; GCN-O2-NEXT: Lower SwitchInst's to branches ; GCN-O2-NEXT: Dominator Tree Construction ; GCN-O2-NEXT: Natural Loop Information ; GCN-O2-NEXT: Convert irreducible control-flow into natural loops +; GCN-O2-NEXT: Dominator Tree Construction +; GCN-O2-NEXT: Natural Loop Information ; GCN-O2-NEXT: Fixup each natural loop to have a single exit block +; GCN-O2-NEXT: Dominator Tree Construction ; GCN-O2-NEXT: Post-Dominator Tree Construction ; GCN-O2-NEXT: Dominance Frontier Construction ; GCN-O2-NEXT: Detect single entry single exit regions ; GCN-O2-NEXT: Region Pass Manager ; GCN-O2-NEXT: Structurize control flow +; GCN-O2-NEXT: Dominator Tree Construction ; GCN-O2-NEXT: Post-Dominator Tree Construction ; GCN-O2-NEXT: Natural Loop Information ; GCN-O2-NEXT: Legacy Divergence Analysis @@ -1100,16 +1128,22 @@ ; GCN-O3-NEXT: Legacy Divergence Analysis ; GCN-O3-NEXT: Unify divergent function exit nodes ; GCN-O3-NEXT: Lazy Value Information Analysis +; GCN-O3-NEXT: Dominator Tree Construction +; GCN-O3-NEXT: Natural Loop Information ; GCN-O3-NEXT: Lower SwitchInst's to branches ; GCN-O3-NEXT: Dominator Tree Construction ; GCN-O3-NEXT: Natural Loop Information ; GCN-O3-NEXT: Convert irreducible control-flow into natural loops +; GCN-O3-NEXT: Dominator Tree Construction +; GCN-O3-NEXT: Natural Loop Information ; GCN-O3-NEXT: Fixup each natural loop to have a single exit block +; GCN-O3-NEXT: Dominator Tree Construction ; GCN-O3-NEXT: Post-Dominator Tree Construction ; GCN-O3-NEXT: Dominance Frontier Construction ; GCN-O3-NEXT: Detect single entry single exit regions ; GCN-O3-NEXT: Region Pass Manager ; GCN-O3-NEXT: Structurize control flow +; GCN-O3-NEXT: Dominator Tree Construction ; GCN-O3-NEXT: Post-Dominator Tree Construction ; GCN-O3-NEXT: Natural Loop Information ; GCN-O3-NEXT: Legacy Divergence Analysis Index: llvm/test/Other/new-pm-defaults.ll =================================================================== --- llvm/test/Other/new-pm-defaults.ll +++ llvm/test/Other/new-pm-defaults.ll @@ -216,6 +216,8 @@ ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LoopDistributePass ; CHECK-O-NEXT: Running pass: InjectTLIMappings +; CHECK-O-NEXT: Running pass: LowerSwitchPass +; CHECK-O-NEXT: Running analysis: LazyValueAnalysis ; CHECK-O-NEXT: Running pass: LoopVectorizePass ; CHECK-O-NEXT: Running analysis: BlockFrequencyAnalysis ; CHECK-O-NEXT: Running analysis: BranchProbabilityAnalysis Index: llvm/test/Other/new-pm-lto-defaults.ll =================================================================== --- llvm/test/Other/new-pm-lto-defaults.ll +++ llvm/test/Other/new-pm-lto-defaults.ll @@ -105,6 +105,7 @@ ; CHECK-O23SZ-NEXT: Running pass: LoopDeletionPass on Loop ; CHECK-O23SZ-NEXT: Running pass: LoopFullUnrollPass on Loop ; CHECK-O23SZ-NEXT: Running pass: LoopDistributePass on foo +; CHECK-O23SZ-NEXT: Running pass: LowerSwitchPass on foo ; CHECK-O23SZ-NEXT: Running pass: LoopVectorizePass on foo ; CHECK-O23SZ-NEXT: Running analysis: BlockFrequencyAnalysis on foo ; CHECK-O23SZ-NEXT: Running analysis: BranchProbabilityAnalysis on foo Index: llvm/test/Other/new-pm-thinlto-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-defaults.ll +++ llvm/test/Other/new-pm-thinlto-defaults.ll @@ -197,6 +197,8 @@ ; CHECK-POSTLINK-O-NEXT: Running pass: LoopRotatePass ; CHECK-POSTLINK-O-NEXT: Running pass: LoopDistributePass ; CHECK-POSTLINK-O-NEXT: Running pass: InjectTLIMappings +; CHECK-POSTLINK-O-NEXT: Running pass: LowerSwitchPass +; CHECK-POSTLINK-O-NEXT: Running analysis: LazyValueAnalysis ; CHECK-POSTLINK-O-NEXT: Running pass: LoopVectorizePass ; CHECK-POSTLINK-O-NEXT: Running analysis: BlockFrequencyAnalysis ; CHECK-POSTLINK-O-NEXT: Running analysis: BranchProbabilityAnalysis Index: llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -168,6 +168,8 @@ ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LoopDistributePass ; CHECK-O-NEXT: Running pass: InjectTLIMappings +; CHECK-O-NEXT: Running pass: LowerSwitchPass +; CHECK-O-NEXT: Running analysis: LazyValueAnalysis ; CHECK-O-NEXT: Running pass: LoopVectorizePass ; CHECK-O-NEXT: Running pass: LoopLoadEliminationPass ; CHECK-O-NEXT: Running analysis: LoopAccessAnalysis Index: llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -180,6 +180,8 @@ ; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LoopDistributePass ; CHECK-O-NEXT: Running pass: InjectTLIMappings +; CHECK-O-NEXT: Running pass: LowerSwitchPass +; CHECK-O-NEXT: Running analysis: LazyValueAnalysis ; CHECK-O-NEXT: Running pass: LoopVectorizePass ; CHECK-O-NEXT: Running pass: LoopLoadEliminationPass ; CHECK-O-NEXT: Running analysis: LoopAccessAnalysis Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-remove-switches.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/AArch64/sve-remove-switches.ll @@ -0,0 +1,413 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -O3 -loop-vectorize -mtriple aarch64-linux-gnu -mattr=+sve -scalable-vectorization=on -S | FileCheck %s + +define void @switch(i32* noalias %a, i32* noalias %b, i32* noalias %c, i64 %N) #0 { +; CHECK-LABEL: @switch( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ugt i64 [[TMP1]], [[N:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 2 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to * +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP7]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 3, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 2, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 4, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP12:%.*]] = xor [[TMP9]], shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP13:%.*]] = select [[TMP8]], shufflevector ( insertelement ( poison, i1 false, i32 0), poison, zeroinitializer), [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = xor [[TMP10]], shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP15:%.*]] = select [[TMP13]], [[TMP14]], shufflevector ( insertelement ( poison, i1 false, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP11]] to * +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP16]], i32 4, [[TMP15]], poison) +; CHECK-NEXT: [[TMP17:%.*]] = mul nsw [[WIDE_MASKED_LOAD]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP18:%.*]] = add nsw [[TMP17]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP20:%.*]] = select [[TMP13]], [[TMP10]], shufflevector ( insertelement ( poison, i1 false, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP19]] to * +; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP21]], i32 4, [[TMP20]], poison) +; CHECK-NEXT: [[TMP22:%.*]] = select [[TMP8]], shufflevector ( insertelement ( poison, i1 false, i32 0), poison, zeroinitializer), [[TMP9]] +; CHECK-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP11]] to * +; CHECK-NEXT: [[WIDE_MASKED_LOAD7:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP23]], i32 4, [[TMP22]], poison) +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP15]], [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD7]] +; CHECK-NEXT: [[PREDPHI8:%.*]] = select [[TMP15]], [[TMP18]], shufflevector ( insertelement ( poison, i32 2, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP24:%.*]] = mul nsw [[PREDPHI]], [[PREDPHI]] +; CHECK-NEXT: [[TMP25:%.*]] = add nsw [[TMP24]], [[PREDPHI8]] +; CHECK-NEXT: [[TMP26:%.*]] = or [[TMP22]], [[TMP15]] +; CHECK-NEXT: [[PREDPHI9:%.*]] = select [[TMP26]], [[TMP25]], shufflevector ( insertelement ( poison, i32 3, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP27:%.*]] = or [[TMP8]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = bitcast i32* [[TMP19]] to * +; CHECK-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP28]], i32 4, [[TMP27]], poison) +; CHECK-NEXT: [[TMP29:%.*]] = mul nsw [[WIDE_MASKED_LOAD10]], [[PREDPHI9]] +; CHECK-NEXT: [[TMP30:%.*]] = add nsw [[TMP29]], [[PREDPHI9]] +; CHECK-NEXT: [[PREDPHI11:%.*]] = select [[TMP27]], [[WIDE_MASKED_LOAD10]], [[WIDE_MASKED_LOAD6]] +; CHECK-NEXT: [[PREDPHI12:%.*]] = select [[TMP27]], [[TMP30]], shufflevector ( insertelement ( poison, i32 4, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP31:%.*]] = mul nsw [[PREDPHI11]], [[PREDPHI11]] +; CHECK-NEXT: [[TMP32:%.*]] = add nsw [[TMP31]], [[PREDPHI12]] +; CHECK-NEXT: [[TMP33:%.*]] = bitcast i32* [[TMP6]] to * +; CHECK-NEXT: store [[TMP32]], * [[TMP33]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP34]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[I_PH:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[INC:%.*]], [[L4:%.*]] ], [ [[I_PH]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I]] +; CHECK-NEXT: [[TMP35:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: switch i32 [[TMP35]], label [[L1:%.*]] [ +; CHECK-NEXT: i32 3, label [[L3:%.*]] +; CHECK-NEXT: i32 2, label [[FOR_BODY_L2_CRIT_EDGE:%.*]] +; CHECK-NEXT: i32 4, label [[FOR_BODY_L4_CRIT_EDGE:%.*]] +; CHECK-NEXT: ] +; CHECK: for.body.L4_crit_edge: +; CHECK-NEXT: [[ARRAYIDX17_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[I]] +; CHECK-NEXT: [[DOTPRE1:%.*]] = load i32, i32* [[ARRAYIDX17_PHI_TRANS_INSERT]], align 4 +; CHECK-NEXT: br label [[L4]] +; CHECK: for.body.L2_crit_edge: +; CHECK-NEXT: [[ARRAYIDX7_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[I]] +; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[ARRAYIDX7_PHI_TRANS_INSERT]], align 4 +; CHECK-NEXT: br label [[L2:%.*]] +; CHECK: L1: +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[I]] +; CHECK-NEXT: [[TMP36:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP36]], [[TMP35]] +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[MUL]], [[TMP35]] +; CHECK-NEXT: br label [[L2]] +; CHECK: L2: +; CHECK-NEXT: [[TMP37:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_BODY_L2_CRIT_EDGE]] ], [ [[TMP36]], [[L1]] ] +; CHECK-NEXT: [[TMP38:%.*]] = phi i32 [ 2, [[FOR_BODY_L2_CRIT_EDGE]] ], [ [[ADD]], [[L1]] ] +; CHECK-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP37]], [[TMP37]] +; CHECK-NEXT: [[ADD11:%.*]] = add nsw i32 [[MUL9]], [[TMP38]] +; CHECK-NEXT: br label [[L3]] +; CHECK: L3: +; CHECK-NEXT: [[TMP39:%.*]] = phi i32 [ [[TMP35]], [[FOR_BODY]] ], [ [[ADD11]], [[L2]] ] +; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[I]] +; CHECK-NEXT: [[TMP40:%.*]] = load i32, i32* [[ARRAYIDX13]], align 4 +; CHECK-NEXT: [[MUL14:%.*]] = mul nsw i32 [[TMP40]], [[TMP39]] +; CHECK-NEXT: [[ADD16:%.*]] = add nsw i32 [[MUL14]], [[TMP39]] +; CHECK-NEXT: br label [[L4]] +; CHECK: L4: +; CHECK-NEXT: [[TMP41:%.*]] = phi i32 [ [[DOTPRE1]], [[FOR_BODY_L4_CRIT_EDGE]] ], [ [[TMP40]], [[L3]] ] +; CHECK-NEXT: [[TMP42:%.*]] = phi i32 [ 4, [[FOR_BODY_L4_CRIT_EDGE]] ], [ [[ADD16]], [[L3]] ] +; CHECK-NEXT: [[MUL19:%.*]] = mul nsw i32 [[TMP41]], [[TMP41]] +; CHECK-NEXT: [[ADD21:%.*]] = add nsw i32 [[MUL19]], [[TMP42]] +; CHECK-NEXT: store i32 [[ADD21]], i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: for.end.loopexit: +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; + +entry: + br label %for.body + +for.body: + %i = phi i64 [ %inc, %L4 ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %i + %0 = load i32, i32* %arrayidx + switch i32 %0, label %L1 [ + i32 4, label %L4 + i32 2, label %L2 + i32 3, label %L3 + ] + +L1: + %arrayidx5 = getelementptr inbounds i32, i32* %b, i64 %i + %1 = load i32, i32* %arrayidx5 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, %0 + store i32 %add, i32* %arrayidx + br label %L2 + +L2: + %2 = phi i32 [ 2, %for.body ], [ %add, %L1 ] + %arrayidx7 = getelementptr inbounds i32, i32* %b, i64 %i + %3 = load i32, i32* %arrayidx7 + %mul9 = mul nsw i32 %3, %3 + %add11 = add nsw i32 %2, %mul9 + store i32 %add11, i32* %arrayidx + br label %L3 + +L3: + %4 = phi i32 [ 3, %for.body ], [ %add11, %L2 ] + %arrayidx13 = getelementptr inbounds i32, i32* %c, i64 %i + %5 = load i32, i32* %arrayidx13 + %mul14 = mul nsw i32 %5, %4 + %add16 = add nsw i32 %mul14, %4 + store i32 %add16, i32* %arrayidx + br label %L4 + +L4: + %6 = phi i32 [ 4, %for.body ], [ %add16, %L3 ] + %arrayidx17 = getelementptr inbounds i32, i32* %c, i64 %i + %7 = load i32, i32* %arrayidx17 + %mul19 = mul nsw i32 %7, %7 + %add21 = add nsw i32 %6, %mul19 + store i32 %add21, i32* %arrayidx + %inc = add nuw nsw i64 %i, 1 + %exitcond.not = icmp eq i64 %inc, %N + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret void +} + +define void @switch_VF1_UF2(i32* noalias %a, i32* noalias %b, i32* noalias %c, i64 %N) #0 { +; CHECK-LABEL: @switch_VF1_UF2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -2 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] +; CHECK-NEXT: [[INDUCTION3:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <2 x i32>* +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP5:%.*]] = mul nsw <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> , <2 x i32> [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 +; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_LOAD_CONTINUE:%.*]], label [[PRED_LOAD_IF:%.*]] +; CHECK: pred.load.if: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] +; CHECK: pred.load.continue: +; CHECK-NEXT: [[TMP10:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1 +; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_LOAD_CONTINUE6]], label [[PRED_LOAD_IF5:%.*]] +; CHECK: pred.load.if5: +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION3]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] +; CHECK: pred.load.continue6: +; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[TMP14]], i32 1 +; CHECK-NEXT: [[TMP17:%.*]] = mul nsw <2 x i32> [[TMP16]], +; CHECK-NEXT: [[TMP18:%.*]] = add nsw <2 x i32> [[TMP17]], [[TMP6]] +; CHECK-NEXT: [[TMP19:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> , <2 x i32> [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP20]] to <2 x i32>* +; CHECK-NEXT: [[TMP22:%.*]] = load <2 x i32>, <2 x i32>* [[TMP21]], align 4 +; CHECK-NEXT: [[TMP23:%.*]] = shl nsw <2 x i32> [[TMP22]], +; CHECK-NEXT: [[TMP24:%.*]] = add nsw <2 x i32> [[TMP23]], [[TMP19]] +; CHECK-NEXT: [[TMP25:%.*]] = bitcast i32* [[TMP0]] to <2 x i32>* +; CHECK-NEXT: store <2 x i32> [[TMP24]], <2 x i32>* [[TMP25]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[I_PH:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[INC:%.*]], [[L3:%.*]] ], [ [[I_PH]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I]] +; CHECK-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: switch i32 [[TMP27]], label [[FOR_BODY_SWITCH2:%.*]] [ +; CHECK-NEXT: i32 2, label [[L2:%.*]] +; CHECK-NEXT: i32 3, label [[L3]] +; CHECK-NEXT: ] +; CHECK: for.body.switch2: +; CHECK-NEXT: [[ADD:%.*]] = mul nsw i32 [[TMP27]], 3 +; CHECK-NEXT: br label [[L2]] +; CHECK: L2: +; CHECK-NEXT: [[TMP28:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY_SWITCH2]] ], [ [[TMP27]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[I]] +; CHECK-NEXT: [[TMP29:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP29]], 3 +; CHECK-NEXT: [[ADD8:%.*]] = add nsw i32 [[MUL6]], [[TMP28]] +; CHECK-NEXT: br label [[L3]] +; CHECK: L3: +; CHECK-NEXT: [[TMP30:%.*]] = phi i32 [ [[ADD8]], [[L2]] ], [ [[TMP27]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[I]] +; CHECK-NEXT: [[TMP31:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4 +; CHECK-NEXT: [[MUL10:%.*]] = shl nsw i32 [[TMP31]], 2 +; CHECK-NEXT: [[ADD12:%.*]] = add nsw i32 [[MUL10]], [[TMP30]] +; CHECK-NEXT: store i32 [[ADD12]], i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: for.end.loopexit: +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; + +entry: + br label %for.body + +for.body: + %i = phi i64 [ %inc, %L3 ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %i + %0 = load i32, i32* %arrayidx + %switch = icmp eq i32 %0, 3 + br i1 %switch, label %L3, label %for.body.switch + +for.body.switch: + %switch1 = icmp eq i32 %0, 2 + br i1 %switch1, label %L2, label %for.body.switch2 + +for.body.switch2: + %add = mul nsw i32 %0, 3 + store i32 %add, i32* %arrayidx + br label %L2 + +L2: + %1 = phi i32 [ %add, %for.body.switch2 ], [ %0, %for.body.switch ] + %arrayidx5 = getelementptr inbounds i32, i32* %b, i64 %i + %2 = load i32, i32* %arrayidx5 + %mul6 = mul nsw i32 %2, 3 + %add8 = add nsw i32 %1, %mul6 + store i32 %add8, i32* %arrayidx + br label %L3 + +L3: + %3 = phi i32 [ %0, %for.body ], [ %add8, %L2 ] + %arrayidx9 = getelementptr inbounds i32, i32* %c, i64 %i + %4 = load i32, i32* %arrayidx9 + %mul10 = shl nsw i32 %4, 2 + %add12 = add nsw i32 %3, %mul10 + store i32 %add12, i32* %arrayidx + %inc = add nuw nsw i64 %i, 1 + %exitcond.not = icmp eq i64 %inc, %N + br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 + +for.end: + ret void +} + +; This loop will not vectorize due to unsafe FP ops, ensure the switch statement is created again in for.body +define float @switch_no_vectorize(i32* noalias %a, i32* noalias %b, i32* noalias %c, float %val, i64 %N) { +; CHECK-LABEL: @switch_no_vectorize( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[INC:%.*]], [[L3:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SUM_033:%.*]] = phi float [ [[CONV20:%.*]], [[L3]] ], [ 2.000000e+00, [[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[I]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: switch i32 [[TMP0]], label [[L1:%.*]] [ +; CHECK-NEXT: i32 2, label [[L2:%.*]] +; CHECK-NEXT: i32 3, label [[L3]] +; CHECK-NEXT: ] +; CHECK: L1: +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float +; CHECK-NEXT: [[CONV4:%.*]] = fpext float [[CONV]] to double +; CHECK-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.000000e+00 +; CHECK-NEXT: [[CONV5:%.*]] = fpext float [[SUM_033]] to double +; CHECK-NEXT: [[MUL:%.*]] = fmul double [[ADD]], [[CONV5]] +; CHECK-NEXT: [[CONV6:%.*]] = fptrunc double [[MUL]] to float +; CHECK-NEXT: br label [[L2]] +; CHECK: L2: +; CHECK-NEXT: [[SUM_1:%.*]] = phi float [ [[CONV6]], [[L1]] ], [ [[SUM_033]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[I]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4 +; CHECK-NEXT: [[CONV8:%.*]] = sitofp i32 [[TMP1]] to float +; CHECK-NEXT: [[CONV9:%.*]] = fpext float [[CONV8]] to double +; CHECK-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 2.000000e+00 +; CHECK-NEXT: [[CONV11:%.*]] = fpext float [[SUM_1]] to double +; CHECK-NEXT: [[MUL12:%.*]] = fmul double [[ADD10]], [[CONV11]] +; CHECK-NEXT: [[CONV13:%.*]] = fptrunc double [[MUL12]] to float +; CHECK-NEXT: br label [[L3]] +; CHECK: L3: +; CHECK-NEXT: [[SUM_2:%.*]] = phi float [ [[CONV13]], [[L2]] ], [ [[SUM_033]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[I]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX14]], align 4 +; CHECK-NEXT: [[CONV15:%.*]] = sitofp i32 [[TMP2]] to float +; CHECK-NEXT: [[CONV16:%.*]] = fpext float [[CONV15]] to double +; CHECK-NEXT: [[ADD17:%.*]] = fadd double [[CONV16]], 3.000000e+00 +; CHECK-NEXT: [[CONV18:%.*]] = fpext float [[SUM_2]] to double +; CHECK-NEXT: [[MUL19:%.*]] = fmul double [[ADD17]], [[CONV18]] +; CHECK-NEXT: [[CONV20]] = fptrunc double [[MUL19]] to float +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N:%.*]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: [[CONV20_LCSSA:%.*]] = phi float [ [[CONV20]], [[L3]] ] +; CHECK-NEXT: ret float [[CONV20_LCSSA]] +; + +entry: + br label %for.body + +for.body: + %i = phi i64 [ %inc, %L3 ], [ 0, %entry ] + %sum.033 = phi float [ %conv20, %L3 ], [ 2.000000e+00, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %i + %0 = load i32, i32* %arrayidx + switch i32 %0, label %L1 [ + i32 3, label %L3 + i32 2, label %L2 + ] + +L1: + %conv = sitofp i32 %0 to float + %conv4 = fpext float %conv to double + %add = fadd double %conv4, 1.000000e+00 + %conv5 = fpext float %sum.033 to double + %mul = fmul double %add, %conv5 + %conv6 = fptrunc double %mul to float + br label %L2 + +L2: + %sum.1 = phi float [ %conv6, %L1 ], [ %sum.033, %for.body ] + %arrayidx7 = getelementptr inbounds i32, i32* %b, i64 %i + %1 = load i32, i32* %arrayidx7 + %conv8 = sitofp i32 %1 to float + %conv9 = fpext float %conv8 to double + %add10 = fadd double %conv9, 2.000000e+00 + %conv11 = fpext float %sum.1 to double + %mul12 = fmul double %add10, %conv11 + %conv13 = fptrunc double %mul12 to float + br label %L3 + +L3: + %sum.2 = phi float [ %conv13, %L2 ], [ %sum.033, %for.body ] + %arrayidx14 = getelementptr inbounds i32, i32* %c, i64 %i + %2 = load i32, i32* %arrayidx14 + %conv15 = sitofp i32 %2 to float + %conv16 = fpext float %conv15 to double + %add17 = fadd double %conv16, 3.000000e+00 + %conv18 = fpext float %sum.2 to double + %mul19 = fmul double %add17, %conv18 + %conv20 = fptrunc double %mul19 to float + %inc = add nuw nsw i64 %i, 1 + %exitcond.not = icmp eq i64 %inc, %N + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret float %conv20 +} + +!0 = distinct !{!0, !1, !2, !3, !4} +!1 = !{!"llvm.loop.vectorize.width", i32 1} +!2 = !{!"llvm.loop.interleave.count", i32 2} +!3 = !{!"llvm.loop.vectorize.enable", i1 true} +!4 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} Index: llvm/test/Transforms/LoopVectorize/remove-switches.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/remove-switches.ll @@ -0,0 +1,467 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -O3 -loop-vectorize -pass-remarks-analysis=loop-vectorize -S 2>%t | FileCheck %s +; RUN: cat %t | FileCheck %s -check-prefix=CHECK-REMARKS + +; We should not vectorize this loop since we do not have masked loads and stores +; CHECK-REMARKS: remark: :0:0: the cost-model indicates that vectorization is not beneficial +define void @switch_cost(i32* noalias %a, i32* noalias readonly %b, i32* noalias readonly %c, i64 %N) #0 { +; CHECK-LABEL: @switch_cost( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[INC:%.*]], [[L4:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[I]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: switch i32 [[TMP0]], label [[L1:%.*]] [ +; CHECK-NEXT: i32 3, label [[L3:%.*]] +; CHECK-NEXT: i32 2, label [[FOR_BODY_L2_CRIT_EDGE:%.*]] +; CHECK-NEXT: i32 4, label [[FOR_BODY_L4_CRIT_EDGE:%.*]] +; CHECK-NEXT: ] +; CHECK: for.body.L4_crit_edge: +; CHECK-NEXT: [[ARRAYIDX17_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[I]] +; CHECK-NEXT: [[DOTPRE1:%.*]] = load i32, i32* [[ARRAYIDX17_PHI_TRANS_INSERT]], align 4 +; CHECK-NEXT: br label [[L4]] +; CHECK: for.body.L2_crit_edge: +; CHECK-NEXT: [[ARRAYIDX7_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[I]] +; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[ARRAYIDX7_PHI_TRANS_INSERT]], align 4 +; CHECK-NEXT: br label [[L2:%.*]] +; CHECK: L1: +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[I]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[MUL]], [[TMP0]] +; CHECK-NEXT: br label [[L2]] +; CHECK: L2: +; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_BODY_L2_CRIT_EDGE]] ], [ [[TMP1]], [[L1]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ 2, [[FOR_BODY_L2_CRIT_EDGE]] ], [ [[ADD]], [[L1]] ] +; CHECK-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP2]], [[TMP2]] +; CHECK-NEXT: [[ADD11:%.*]] = add nsw i32 [[MUL9]], [[TMP3]] +; CHECK-NEXT: br label [[L3]] +; CHECK: L3: +; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[TMP0]], [[FOR_BODY]] ], [ [[ADD11]], [[L2]] ] +; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[I]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX13]], align 4 +; CHECK-NEXT: [[MUL14:%.*]] = mul nsw i32 [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[ADD16:%.*]] = add nsw i32 [[MUL14]], [[TMP4]] +; CHECK-NEXT: br label [[L4]] +; CHECK: L4: +; CHECK-NEXT: [[TMP6:%.*]] = phi i32 [ [[DOTPRE1]], [[FOR_BODY_L4_CRIT_EDGE]] ], [ [[TMP5]], [[L3]] ] +; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ 4, [[FOR_BODY_L4_CRIT_EDGE]] ], [ [[ADD16]], [[L3]] ] +; CHECK-NEXT: [[MUL19:%.*]] = mul nsw i32 [[TMP6]], [[TMP6]] +; CHECK-NEXT: [[ADD21:%.*]] = add nsw i32 [[MUL19]], [[TMP7]] +; CHECK-NEXT: store i32 [[ADD21]], i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N:%.*]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; + +entry: + br label %for.body + +for.body: + %i = phi i64 [ %inc, %L4 ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %i + %0 = load i32, i32* %arrayidx + switch i32 %0, label %L1 [ + i32 4, label %L4 + i32 2, label %L2 + i32 3, label %L3 + ] + +L1: + %arrayidx5 = getelementptr inbounds i32, i32* %b, i64 %i + %1 = load i32, i32* %arrayidx5 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, %0 + store i32 %add, i32* %arrayidx + br label %L2 + +L2: + %2 = phi i32 [ 2, %for.body ], [ %add, %L1 ] + %arrayidx7 = getelementptr inbounds i32, i32* %b, i64 %i + %3 = load i32, i32* %arrayidx7 + %mul9 = mul nsw i32 %3, %3 + %add11 = add nsw i32 %2, %mul9 + store i32 %add11, i32* %arrayidx + br label %L3 + +L3: + %4 = phi i32 [ 3, %for.body ], [ %add11, %L2 ] + %arrayidx13 = getelementptr inbounds i32, i32* %c, i64 %i + %5 = load i32, i32* %arrayidx13 + %mul14 = mul nsw i32 %5, %4 + %add16 = add nsw i32 %mul14, %4 + store i32 %add16, i32* %arrayidx + br label %L4 + +L4: + %6 = phi i32 [ 4, %for.body ], [ %add16, %L3 ] + %arrayidx17 = getelementptr inbounds i32, i32* %c, i64 %i + %7 = load i32, i32* %arrayidx17 + %mul19 = mul nsw i32 %7, %7 + %add21 = add nsw i32 %6, %mul19 + store i32 %add21, i32* %arrayidx + %inc = add nuw nsw i64 %i, 1 + %exitcond.not = icmp eq i64 %inc, %N + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret void +} + +define void @switch(i32* noalias %a, i32* noalias %b, i64 %N) { +; CHECK-LABEL: @switch( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP14:%.*]] = icmp sgt i64 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP14]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER5:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -4 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[PREDPHI_OP:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> , <4 x i32> [[PREDPHI_OP]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD4]], [[TMP4]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER5]] +; CHECK: for.body.preheader5: +; CHECK-NEXT: [[I_015_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[I_015:%.*]] = phi i64 [ [[INC:%.*]], [[L3:%.*]] ], [ [[I_015_PH]], [[FOR_BODY_PREHEADER5]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_015]] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: switch i32 [[TMP10]], label [[L1:%.*]] [ +; CHECK-NEXT: i32 2, label [[L2:%.*]] +; CHECK-NEXT: i32 3, label [[L3]] +; CHECK-NEXT: ] +; CHECK: L1: +; CHECK-NEXT: br label [[L2]] +; CHECK: L2: +; CHECK-NEXT: [[R_0:%.*]] = phi i32 [ 12, [[L1]] ], [ 5, [[FOR_BODY]] ] +; CHECK-NEXT: br label [[L3]] +; CHECK: L3: +; CHECK-NEXT: [[R_1:%.*]] = phi i32 [ [[R_0]], [[L2]] ], [ [[TMP10]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ADD4:%.*]] = add nuw nsw i32 [[R_1]], 4 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[I_015]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], [[ADD4]] +; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_015]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; + +entry: + %cmp14 = icmp sgt i64 %N, 0 + br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %L3 + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +for.body: ; preds = %for.body.preheader, %L3 + %i.015 = phi i64 [ %inc, %L3 ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %i.015 + %0 = load i32, i32* %arrayidx + switch i32 %0, label %L1 [ + i32 3, label %L3 + i32 2, label %L2 + ] + +L1: ; preds = %for.body + br label %L2 + +L2: ; preds = %for.body, %L1 + %r.0 = phi i32 [ 12, %L1 ], [ 5, %for.body ] + br label %L3 + +L3: ; preds = %for.body, %L2 + %r.1 = phi i32 [ %r.0, %L2 ], [ 3, %for.body ] + %add4 = add nuw nsw i32 %r.1, 4 + %arrayidx5 = getelementptr inbounds i32, i32* %b, i64 %i.015 + %1 = load i32, i32* %arrayidx5 + %mul = mul nsw i32 %1, %add4 + store i32 %mul, i32* %arrayidx5 + %inc = add nuw nsw i64 %i.015, 1 + %exitcond.not = icmp eq i64 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body, !llvm.loop !0 +} + +define void @switch_VF1_UF2(i32* noalias %a, i32* noalias readonly %b, i32* noalias readonly %c, i64 %N) { +; CHECK-LABEL: @switch_VF1_UF2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -2 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] +; CHECK-NEXT: [[INDUCTION3:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDUCTION3]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP2]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP3]], 2 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP2]], 3 +; CHECK-NEXT: [[DOTNOT9:%.*]] = icmp eq i32 [[TMP3]], 3 +; CHECK-NEXT: [[TMP6:%.*]] = mul nsw i32 [[TMP2]], 3 +; CHECK-NEXT: [[TMP7:%.*]] = mul nsw i32 [[TMP3]], 3 +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP4]], i32 2, i32 [[TMP6]] +; CHECK-NEXT: [[PREDPHI4:%.*]] = select i1 [[TMP5]], i32 2, i32 [[TMP7]] +; CHECK-NEXT: br i1 [[DOTNOT]], label [[PRED_LOAD_CONTINUE:%.*]], label [[PRED_LOAD_IF:%.*]] +; CHECK: pred.load.if: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] +; CHECK: pred.load.continue: +; CHECK-NEXT: [[TMP10:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: br i1 [[DOTNOT9]], label [[PRED_LOAD_CONTINUE6]], label [[PRED_LOAD_IF5:%.*]] +; CHECK: pred.load.if5: +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION3]] +; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] +; CHECK: pred.load.continue6: +; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP14:%.*]] = mul nsw i32 [[TMP10]], 3 +; CHECK-NEXT: [[TMP15:%.*]] = mul nsw i32 [[TMP13]], 3 +; CHECK-NEXT: [[TMP16:%.*]] = add nsw i32 [[TMP14]], [[PREDPHI]] +; CHECK-NEXT: [[TMP17:%.*]] = add nsw i32 [[TMP15]], [[PREDPHI4]] +; CHECK-NEXT: [[PREDPHI7:%.*]] = select i1 [[DOTNOT]], i32 3, i32 [[TMP16]] +; CHECK-NEXT: [[PREDPHI8:%.*]] = select i1 [[DOTNOT9]], i32 3, i32 [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDUCTION3]] +; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP18]], align 4 +; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP19]], align 4 +; CHECK-NEXT: [[TMP22:%.*]] = shl nsw i32 [[TMP20]], 2 +; CHECK-NEXT: [[TMP23:%.*]] = shl nsw i32 [[TMP21]], 2 +; CHECK-NEXT: [[TMP24:%.*]] = add nsw i32 [[TMP22]], [[PREDPHI7]] +; CHECK-NEXT: [[TMP25:%.*]] = add nsw i32 [[TMP23]], [[PREDPHI8]] +; CHECK-NEXT: store i32 [[TMP24]], i32* [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[TMP25]], i32* [[TMP1]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[I_PH:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[INC:%.*]], [[L3:%.*]] ], [ [[I_PH]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I]] +; CHECK-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: switch i32 [[TMP27]], label [[FOR_BODY_SWITCH2:%.*]] [ +; CHECK-NEXT: i32 2, label [[L2:%.*]] +; CHECK-NEXT: i32 3, label [[L3]] +; CHECK-NEXT: ] +; CHECK: for.body.switch2: +; CHECK-NEXT: [[ADD:%.*]] = mul nsw i32 [[TMP27]], 3 +; CHECK-NEXT: br label [[L2]] +; CHECK: L2: +; CHECK-NEXT: [[TMP28:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY_SWITCH2]] ], [ [[TMP27]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[I]] +; CHECK-NEXT: [[TMP29:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP29]], 3 +; CHECK-NEXT: [[ADD8:%.*]] = add nsw i32 [[MUL6]], [[TMP28]] +; CHECK-NEXT: br label [[L3]] +; CHECK: L3: +; CHECK-NEXT: [[TMP30:%.*]] = phi i32 [ [[ADD8]], [[L2]] ], [ [[TMP27]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[I]] +; CHECK-NEXT: [[TMP31:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4 +; CHECK-NEXT: [[MUL10:%.*]] = shl nsw i32 [[TMP31]], 2 +; CHECK-NEXT: [[ADD12:%.*]] = add nsw i32 [[MUL10]], [[TMP30]] +; CHECK-NEXT: store i32 [[ADD12]], i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: for.end.loopexit: +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; + +entry: + br label %for.body + +for.body: + %i = phi i64 [ %inc, %L3 ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %i + %0 = load i32, i32* %arrayidx + %switch = icmp eq i32 %0, 3 + br i1 %switch, label %L3, label %for.body.switch + +for.body.switch: + %switch1 = icmp eq i32 %0, 2 + br i1 %switch1, label %L2, label %for.body.switch2 + +for.body.switch2: + %add = mul nsw i32 %0, 3 + store i32 %add, i32* %arrayidx + br label %L2 + +L2: + %1 = phi i32 [ %add, %for.body.switch2 ], [ %0, %for.body.switch ] + %arrayidx5 = getelementptr inbounds i32, i32* %b, i64 %i + %2 = load i32, i32* %arrayidx5 + %mul6 = mul nsw i32 %2, 3 + %add8 = add nsw i32 %1, %mul6 + store i32 %add8, i32* %arrayidx + br label %L3 + +L3: + %3 = phi i32 [ %0, %for.body ], [ %add8, %L2 ] + %arrayidx9 = getelementptr inbounds i32, i32* %c, i64 %i + %4 = load i32, i32* %arrayidx9 + %mul10 = shl nsw i32 %4, 2 + %add12 = add nsw i32 %3, %mul10 + store i32 %add12, i32* %arrayidx + %inc = add nuw nsw i64 %i, 1 + %exitcond.not = icmp eq i64 %inc, %N + br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1 + +for.end: + ret void +} + +; This loop will not vectorize due to unsafe FP ops, ensure the switch statement is created again in for.body +define float @switch_no_vectorize(i32* noalias %a, i32* noalias readonly %b, i32* noalias readonly %c, float %val, i64 %N) { +; CHECK-LABEL: @switch_no_vectorize( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[INC:%.*]], [[L3:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SUM_033:%.*]] = phi float [ [[CONV20:%.*]], [[L3]] ], [ 2.000000e+00, [[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[I]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: switch i32 [[TMP0]], label [[L1:%.*]] [ +; CHECK-NEXT: i32 2, label [[L2:%.*]] +; CHECK-NEXT: i32 3, label [[L3]] +; CHECK-NEXT: ] +; CHECK: L1: +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float +; CHECK-NEXT: [[CONV4:%.*]] = fpext float [[CONV]] to double +; CHECK-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.000000e+00 +; CHECK-NEXT: [[CONV5:%.*]] = fpext float [[SUM_033]] to double +; CHECK-NEXT: [[MUL:%.*]] = fmul double [[ADD]], [[CONV5]] +; CHECK-NEXT: [[CONV6:%.*]] = fptrunc double [[MUL]] to float +; CHECK-NEXT: br label [[L2]] +; CHECK: L2: +; CHECK-NEXT: [[SUM_1:%.*]] = phi float [ [[CONV6]], [[L1]] ], [ [[SUM_033]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[I]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4 +; CHECK-NEXT: [[CONV8:%.*]] = sitofp i32 [[TMP1]] to float +; CHECK-NEXT: [[CONV9:%.*]] = fpext float [[CONV8]] to double +; CHECK-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 2.000000e+00 +; CHECK-NEXT: [[CONV11:%.*]] = fpext float [[SUM_1]] to double +; CHECK-NEXT: [[MUL12:%.*]] = fmul double [[ADD10]], [[CONV11]] +; CHECK-NEXT: [[CONV13:%.*]] = fptrunc double [[MUL12]] to float +; CHECK-NEXT: br label [[L3]] +; CHECK: L3: +; CHECK-NEXT: [[SUM_2:%.*]] = phi float [ [[CONV13]], [[L2]] ], [ [[SUM_033]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[I]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX14]], align 4 +; CHECK-NEXT: [[CONV15:%.*]] = sitofp i32 [[TMP2]] to float +; CHECK-NEXT: [[CONV16:%.*]] = fpext float [[CONV15]] to double +; CHECK-NEXT: [[ADD17:%.*]] = fadd double [[CONV16]], 3.000000e+00 +; CHECK-NEXT: [[CONV18:%.*]] = fpext float [[SUM_2]] to double +; CHECK-NEXT: [[MUL19:%.*]] = fmul double [[ADD17]], [[CONV18]] +; CHECK-NEXT: [[CONV20]] = fptrunc double [[MUL19]] to float +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N:%.*]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: [[CONV20_LCSSA:%.*]] = phi float [ [[CONV20]], [[L3]] ] +; CHECK-NEXT: ret float [[CONV20_LCSSA]] +; + +entry: + br label %for.body + +for.body: + %i = phi i64 [ %inc, %L3 ], [ 0, %entry ] + %sum.033 = phi float [ %conv20, %L3 ], [ 2.000000e+00, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %i + %0 = load i32, i32* %arrayidx + switch i32 %0, label %L1 [ + i32 3, label %L3 + i32 2, label %L2 + ] + +L1: + %conv = sitofp i32 %0 to float + %conv4 = fpext float %conv to double + %add = fadd double %conv4, 1.000000e+00 + %conv5 = fpext float %sum.033 to double + %mul = fmul double %add, %conv5 + %conv6 = fptrunc double %mul to float + br label %L2 + +L2: + %sum.1 = phi float [ %conv6, %L1 ], [ %sum.033, %for.body ] + %arrayidx7 = getelementptr inbounds i32, i32* %b, i64 %i + %1 = load i32, i32* %arrayidx7 + %conv8 = sitofp i32 %1 to float + %conv9 = fpext float %conv8 to double + %add10 = fadd double %conv9, 2.000000e+00 + %conv11 = fpext float %sum.1 to double + %mul12 = fmul double %add10, %conv11 + %conv13 = fptrunc double %mul12 to float + br label %L3 + +L3: + %sum.2 = phi float [ %conv13, %L2 ], [ %sum.033, %for.body ] + %arrayidx14 = getelementptr inbounds i32, i32* %c, i64 %i + %2 = load i32, i32* %arrayidx14 + %conv15 = sitofp i32 %2 to float + %conv16 = fpext float %conv15 to double + %add17 = fadd double %conv16, 3.000000e+00 + %conv18 = fpext float %sum.2 to double + %mul19 = fmul double %add17, %conv18 + %conv20 = fptrunc double %mul19 to float + %inc = add nuw nsw i64 %i, 1 + %exitcond.not = icmp eq i64 %inc, %N + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret float %conv20 +} + +!0 = distinct !{!0, !2, !4, !6} +!1 = distinct !{!1, !3, !5, !6} +!2 = !{!"llvm.loop.vectorize.width", i32 4} +!3 = !{!"llvm.loop.vectorize.width", i32 1} +!4 = !{!"llvm.loop.interleave.count", i32 1} +!5 = !{!"llvm.loop.interleave.count", i32 2} +!6 = !{!"llvm.loop.vectorize.enable", i1 true} Index: llvm/test/Transforms/LowerSwitch/simple-switches.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LowerSwitch/simple-switches.ll @@ -0,0 +1,250 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -lowerswitch -force-loop-unswitch -S | FileCheck %s +; RUN: opt < %s -lowerswitch -force-loop-unswitch -simplifycfg -S | FileCheck --check-prefix=CHECK-SIMPLIFY-CFG %s + +define void @unswitch(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i64 %N){ +; CHECK-LABEL: @unswitch( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[INC:%.*]], [[L4:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[I]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: br label [[LEAFBLOCK3:%.*]] +; CHECK: LeafBlock3: +; CHECK-NEXT: [[SWITCHLEAF4:%.*]] = icmp eq i32 [[TMP0]], 3 +; CHECK-NEXT: br i1 [[SWITCHLEAF4]], label [[L3:%.*]], label [[LEAFBLOCK1:%.*]] +; CHECK: LeafBlock1: +; CHECK-NEXT: [[SWITCHLEAF2:%.*]] = icmp eq i32 [[TMP0]], 2 +; CHECK-NEXT: br i1 [[SWITCHLEAF2]], label [[L2:%.*]], label [[LEAFBLOCK:%.*]] +; CHECK: LeafBlock: +; CHECK-NEXT: [[SWITCHLEAF:%.*]] = icmp eq i32 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[SWITCHLEAF]], label [[L4]], label [[NEWDEFAULT:%.*]] +; CHECK: NewDefault: +; CHECK-NEXT: br label [[L1:%.*]] +; +; CHECK-SIMPLIFY-CFG-LABEL: @unswitch( +; CHECK-SIMPLIFY-CFG-NEXT: entry: +; CHECK-SIMPLIFY-CFG-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-SIMPLIFY-CFG: for.body: +; CHECK-SIMPLIFY-CFG-NEXT: [[I:%.*]] = phi i64 [ [[INC:%.*]], [[L4:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-SIMPLIFY-CFG-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[I]] +; CHECK-SIMPLIFY-CFG-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-SIMPLIFY-CFG-NEXT: switch i32 [[TMP0]], label [[L1:%.*]] [ +; CHECK-SIMPLIFY-CFG-NEXT: i32 3, label [[L3:%.*]] +; CHECK-SIMPLIFY-CFG-NEXT: i32 2, label [[L2:%.*]] +; CHECK-SIMPLIFY-CFG-NEXT: i32 4, label [[L4]] +; CHECK-SIMPLIFY-CFG-NEXT: ] + +entry: + br label %for.body + +for.body: + %i = phi i64 [ %inc, %L4 ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %i + %0 = load i32, i32* %arrayidx + switch i32 %0, label %L1 [ + i32 4, label %L4 + i32 2, label %L2 + i32 3, label %L3 + ] + +L1: + %arrayidx5 = getelementptr inbounds i32, i32* %b, i64 %i + %1 = load i32, i32* %arrayidx5 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, %0 + store i32 %add, i32* %arrayidx + br label %L2 + +L2: + %2 = phi i32 [ %0, %for.body ], [ %add, %L1 ] + %arrayidx7 = getelementptr inbounds i32, i32* %b, i64 %i + %3 = load i32, i32* %arrayidx7, align 4 + %mul9 = mul nsw i32 %3, %3 + %add11 = add nsw i32 %2, %mul9 + store i32 %add11, i32* %arrayidx + br label %L3 + +L3: + %4 = phi i32 [ %0, %for.body ], [ %add11, %L2 ] + %arrayidx13 = getelementptr inbounds i32, i32* %c, i64 %i + %5 = load i32, i32* %arrayidx13 + %mul14 = mul nsw i32 %5, %4 + %add16 = add nsw i32 %mul14, %4 + store i32 %add16, i32* %arrayidx + br label %L4 + +L4: + %6 = phi i32 [ %0, %for.body ], [ %add16, %L3 ] + %arrayidx17 = getelementptr inbounds i32, i32* %c, i64 %i + %7 = load i32, i32* %arrayidx17 + %mul19 = mul nsw i32 %7, %7 + %add21 = add nsw i32 %6, %mul19 + store i32 %add21, i32* %arrayidx + %inc = add nuw nsw i64 %i, 1 + %exitcond.not = icmp eq i64 %inc, %N + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret void +} + +; This test should not replace the switch statement as multiple cases have the same destination block +define dso_local void @switch2(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i64 %N) { +; CHECK-LABEL: @switch2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[INC:%.*]], [[L3:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[I]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: switch i32 [[TMP0]], label [[L1:%.*]] [ +; CHECK-NEXT: i32 4, label [[L3]] +; CHECK-NEXT: i32 2, label [[L2:%.*]] +; CHECK-NEXT: i32 3, label [[L3]] +; CHECK-NEXT: ] +; +; CHECK-SIMPLIFY-CFG-LABEL: @switch2( +; CHECK-SIMPLIFY-CFG-NEXT: entry: +; CHECK-SIMPLIFY-CFG-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-SIMPLIFY-CFG: for.body: +; CHECK-SIMPLIFY-CFG-NEXT: [[I:%.*]] = phi i64 [ [[INC:%.*]], [[L3:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-SIMPLIFY-CFG-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[I]] +; CHECK-SIMPLIFY-CFG-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-SIMPLIFY-CFG-NEXT: switch i32 [[TMP0]], label [[L1:%.*]] [ +; CHECK-SIMPLIFY-CFG-NEXT: i32 4, label [[L3]] +; CHECK-SIMPLIFY-CFG-NEXT: i32 2, label [[L2:%.*]] +; CHECK-SIMPLIFY-CFG-NEXT: i32 3, label [[L3]] +; CHECK-SIMPLIFY-CFG-NEXT: ] + +entry: + br label %for.body + +for.body: + %i = phi i64 [ %inc, %L3 ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %i + %0 = load i32, i32* %arrayidx + switch i32 %0, label %L1 [ + i32 4, label %L3 + i32 2, label %L2 + i32 3, label %L3 + ] + +L1: + %arrayidx5 = getelementptr inbounds i32, i32* %b, i64 %i + %1 = load i32, i32* %arrayidx5 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, %0 + store i32 %add, i32* %arrayidx + br label %L2 + +L2: + %2 = phi i32 [ %0, %for.body ], [ %add, %L1 ] + %arrayidx7 = getelementptr inbounds i32, i32* %b, i64 %i + %3 = load i32, i32* %arrayidx7 + %mul9 = mul nsw i32 %3, %3 + %add11 = add nsw i32 %2, %mul9 + store i32 %add11, i32* %arrayidx + br label %L3 + +L3: + %4 = phi i32 [ %0, %for.body ], [ %0, %for.body ], [ %add11, %L2 ] + %arrayidx13 = getelementptr inbounds i32, i32* %c, i64 %i + %5 = load i32, i32* %arrayidx13 + %mul14 = mul nsw i32 %5, %4 + %add16 = add nsw i32 %mul14, %4 + store i32 %add16, i32* %arrayidx + %inc = add nuw nsw i64 %i, 1 + %exitcond.not = icmp eq i64 %inc, %N + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret void +} + +define dso_local void @unreachable(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i64 %N) { +; CHECK-LABEL: @unreachable( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[INC:%.*]], [[L3:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[I]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: br label [[LEAFBLOCK3:%.*]] +; CHECK: LeafBlock3: +; CHECK-NEXT: [[SWITCHLEAF4:%.*]] = icmp eq i32 [[TMP0]], 3 +; CHECK-NEXT: br i1 [[SWITCHLEAF4]], label [[L3]], label [[LEAFBLOCK1:%.*]] +; CHECK: LeafBlock1: +; CHECK-NEXT: [[SWITCHLEAF2:%.*]] = icmp eq i32 [[TMP0]], 2 +; CHECK-NEXT: br i1 [[SWITCHLEAF2]], label [[L2:%.*]], label [[LEAFBLOCK:%.*]] +; CHECK: LeafBlock: +; CHECK-NEXT: [[SWITCHLEAF:%.*]] = icmp eq i32 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[SWITCHLEAF]], label [[L1:%.*]], label [[NEWDEFAULT:%.*]] +; CHECK: NewDefault: +; CHECK-NEXT: br label [[DEFAULT:%.*]] +; CHECK: Default: +; CHECK-NEXT: unreachable +; +; CHECK-SIMPLIFY-CFG-LABEL: @unreachable( +; CHECK-SIMPLIFY-CFG-NEXT: entry: +; CHECK-SIMPLIFY-CFG-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-SIMPLIFY-CFG: for.body: +; CHECK-SIMPLIFY-CFG-NEXT: [[I:%.*]] = phi i64 [ [[INC:%.*]], [[L3:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-SIMPLIFY-CFG-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[I]] +; CHECK-SIMPLIFY-CFG-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-SIMPLIFY-CFG-NEXT: switch i32 [[TMP0]], label [[DEFAULT:%.*]] [ +; CHECK-SIMPLIFY-CFG-NEXT: i32 3, label [[L3]] +; CHECK-SIMPLIFY-CFG-NEXT: i32 2, label [[L2:%.*]] +; CHECK-SIMPLIFY-CFG-NEXT: i32 4, label [[L1:%.*]] +; CHECK-SIMPLIFY-CFG-NEXT: ] +; CHECK-SIMPLIFY-CFG: Default: +; CHECK-SIMPLIFY-CFG-NEXT: unreachable +; +entry: + br label %for.body + +for.body: + %i = phi i64 [ %inc, %L3 ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %i + %0 = load i32, i32* %arrayidx + switch i32 %0, label %Default [ + i32 4, label %L1 + i32 2, label %L2 + i32 3, label %L3 + ] + +Default: + unreachable + +L1: + %arrayidx5 = getelementptr inbounds i32, i32* %b, i64 %i + %1 = load i32, i32* %arrayidx5 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, %0 + store i32 %add, i32* %arrayidx + br label %L2 + +L2: + %2 = phi i32 [ %0, %for.body ], [ %add, %L1 ] + %arrayidx7 = getelementptr inbounds i32, i32* %b, i64 %i + %3 = load i32, i32* %arrayidx7 + %mul9 = mul nsw i32 %3, %3 + %add11 = add nsw i32 %2, %mul9 + store i32 %add11, i32* %arrayidx + br label %L3 + +L3: + %4 = phi i32 [ %0, %for.body ], [ %add11, %L2 ] + %arrayidx13 = getelementptr inbounds i32, i32* %c, i64 %i + %5 = load i32, i32* %arrayidx13 + %mul14 = mul nsw i32 %5, %4 + %add16 = add nsw i32 %mul14, %4 + store i32 %add16, i32* %arrayidx + %inc = add nuw nsw i64 %i, 1 + %exitcond.not = icmp eq i64 %inc, %N + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret void +} Index: llvm/test/Transforms/StructurizeCFG/workarounds/needs-fr-ule.ll =================================================================== --- llvm/test/Transforms/StructurizeCFG/workarounds/needs-fr-ule.ll +++ llvm/test/Transforms/StructurizeCFG/workarounds/needs-fr-ule.ll @@ -13,32 +13,32 @@ ; CHECK-NEXT: [[PRED11_INV:%.*]] = xor i1 [[PRED11:%.*]], true ; CHECK-NEXT: [[PRED12_INV:%.*]] = xor i1 [[PRED12:%.*]], true ; CHECK-NEXT: [[PRED13_INV:%.*]] = xor i1 [[PRED13:%.*]], true -; CHECK-NEXT: br i1 [[PRED0_INV]], label [[IF_THEN:%.*]], label [[FLOW19:%.*]] -; CHECK: Flow19: +; CHECK-NEXT: br i1 [[PRED0_INV]], label [[IF_THEN:%.*]], label [[FLOW18:%.*]] +; CHECK: Flow18: ; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ false, [[FLOW3:%.*]] ], [ true, [[ENTRY:%.*]] ] -; CHECK-NEXT: br i1 [[TMP0]], label [[IF_END:%.*]], label [[FLOW20:%.*]] +; CHECK-NEXT: br i1 [[TMP0]], label [[IF_END:%.*]], label [[FLOW19:%.*]] ; CHECK: if.end: -; CHECK-NEXT: br i1 [[PRED1_INV]], label [[IF_ELSE:%.*]], label [[FLOW18:%.*]] -; CHECK: Flow18: +; CHECK-NEXT: br i1 [[PRED1_INV]], label [[IF_ELSE:%.*]], label [[FLOW17:%.*]] +; CHECK: Flow17: ; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, [[IF_ELSE]] ], [ true, [[IF_END]] ] ; CHECK-NEXT: br i1 [[TMP1]], label [[IF_THEN7:%.*]], label [[IF_END16:%.*]] ; CHECK: if.then7: ; CHECK-NEXT: br label [[IF_END16]] ; CHECK: if.else: -; CHECK-NEXT: br label [[FLOW18]] -; CHECK: Flow20: +; CHECK-NEXT: br label [[FLOW17]] +; CHECK: Flow19: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: if.end16: -; CHECK-NEXT: br i1 [[PRED2_INV]], label [[IF_THEN39:%.*]], label [[FLOW16:%.*]] -; CHECK: Flow16: +; CHECK-NEXT: br i1 [[PRED2_INV]], label [[IF_THEN39:%.*]], label [[FLOW15:%.*]] +; CHECK: Flow15: ; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ false, [[FLOW5:%.*]] ], [ true, [[IF_END16]] ] -; CHECK-NEXT: br i1 [[TMP2]], label [[WHILE_COND_PREHEADER:%.*]], label [[FLOW17:%.*]] +; CHECK-NEXT: br i1 [[TMP2]], label [[WHILE_COND_PREHEADER:%.*]], label [[FLOW16:%.*]] ; CHECK: while.cond.preheader: ; CHECK-NEXT: br label [[WHILE_COND:%.*]] -; CHECK: Flow17: -; CHECK-NEXT: br label [[FLOW20]] +; CHECK: Flow16: +; CHECK-NEXT: br label [[FLOW19]] ; CHECK: while.cond: -; CHECK-NEXT: br i1 [[PRED3_INV]], label [[LOR_RHS:%.*]], label [[FLOW12:%.*]] +; CHECK-NEXT: br i1 [[PRED3_INV]], label [[LOR_RHS:%.*]], label [[FLOW11:%.*]] ; CHECK: Flow7: ; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ [[PRED7:%.*]], [[COND_END61:%.*]] ], [ false, [[IRR_GUARD:%.*]] ] ; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ false, [[COND_END61]] ], [ true, [[IRR_GUARD]] ] @@ -46,30 +46,30 @@ ; CHECK: cond.true49: ; CHECK-NEXT: br label [[FLOW8]] ; CHECK: Flow8: -; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ false, [[COND_TRUE49]] ], [ true, [[FLOW7:%.*]] ] -; CHECK-NEXT: [[TMP6:%.*]] = phi i1 [ [[PRED4_INV]], [[COND_TRUE49]] ], [ [[TMP3]], [[FLOW7]] ] -; CHECK-NEXT: br i1 [[TMP6]], label [[WHILE_BODY63:%.*]], label [[FLOW9:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ true, [[COND_TRUE49]] ], [ false, [[FLOW7:%.*]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi i1 [ false, [[COND_TRUE49]] ], [ true, [[FLOW7]] ] +; CHECK-NEXT: [[TMP7:%.*]] = phi i1 [ [[PRED4_INV]], [[COND_TRUE49]] ], [ [[TMP3]], [[FLOW7]] ] +; CHECK-NEXT: br i1 [[TMP7]], label [[WHILE_BODY63:%.*]], label [[FLOW9:%.*]] ; CHECK: while.body63: ; CHECK-NEXT: br i1 [[PRED5_INV]], label [[WHILE_COND47:%.*]], label [[FLOW10:%.*]] ; CHECK: Flow9: -; CHECK-NEXT: [[TMP7:%.*]] = phi i1 [ true, [[FLOW10]] ], [ false, [[FLOW8]] ] ; CHECK-NEXT: [[TMP8:%.*]] = phi i1 [ false, [[FLOW10]] ], [ [[TMP5]], [[FLOW8]] ] -; CHECK-NEXT: [[TMP9:%.*]] = phi i1 [ [[TMP15:%.*]], [[FLOW10]] ], [ true, [[FLOW8]] ] -; CHECK-NEXT: [[DOTINV11:%.*]] = xor i1 [[TMP7]], true -; CHECK-NEXT: [[DOTINV:%.*]] = xor i1 [[TMP8]], true -; CHECK-NEXT: br i1 [[TMP9]], label [[LOOP_EXIT_GUARD1:%.*]], label [[IRR_GUARD]] +; CHECK-NEXT: [[TMP9:%.*]] = phi i1 [ false, [[FLOW10]] ], [ [[TMP6]], [[FLOW8]] ] +; CHECK-NEXT: [[TMP10:%.*]] = phi i1 [ [[TMP16:%.*]], [[FLOW10]] ], [ true, [[FLOW8]] ] +; CHECK-NEXT: [[DOTINV:%.*]] = xor i1 [[TMP9]], true +; CHECK-NEXT: br i1 [[TMP10]], label [[LOOP_EXIT_GUARD1:%.*]], label [[IRR_GUARD]] ; CHECK: while.cond47: ; CHECK-NEXT: br label [[FLOW10]] ; CHECK: cond.end61: ; CHECK-NEXT: br label [[FLOW7]] -; CHECK: Flow14: -; CHECK-NEXT: [[TMP10:%.*]] = phi i1 [ false, [[FLOW15:%.*]] ], [ true, [[LOOP_EXIT_GUARD1]] ] -; CHECK-NEXT: [[TMP11:%.*]] = phi i1 [ [[TMP14:%.*]], [[FLOW15]] ], [ [[DOTINV]], [[LOOP_EXIT_GUARD1]] ] -; CHECK-NEXT: br label [[FLOW13:%.*]] +; CHECK: Flow13: +; CHECK-NEXT: [[TMP11:%.*]] = phi i1 [ false, [[FLOW14:%.*]] ], [ true, [[LOOP_EXIT_GUARD1]] ] +; CHECK-NEXT: [[TMP12:%.*]] = phi i1 [ [[TMP15:%.*]], [[FLOW14]] ], [ [[DOTINV]], [[LOOP_EXIT_GUARD1]] ] +; CHECK-NEXT: br label [[FLOW12:%.*]] ; CHECK: if.then69: -; CHECK-NEXT: br label [[FLOW15]] +; CHECK-NEXT: br label [[FLOW14]] ; CHECK: lor.rhs: -; CHECK-NEXT: br label [[FLOW12]] +; CHECK-NEXT: br label [[FLOW11]] ; CHECK: while.end76: ; CHECK-NEXT: br label [[FLOW6:%.*]] ; CHECK: if.then39: @@ -87,39 +87,39 @@ ; CHECK: Flow: ; CHECK-NEXT: br label [[FLOW3]] ; CHECK: Flow3: -; CHECK-NEXT: br label [[FLOW19]] +; CHECK-NEXT: br label [[FLOW18]] ; CHECK: Flow4: ; CHECK-NEXT: br label [[FLOW5]] ; CHECK: Flow5: -; CHECK-NEXT: br label [[FLOW16]] +; CHECK-NEXT: br label [[FLOW15]] ; CHECK: Flow6: -; CHECK-NEXT: br label [[FLOW17]] +; CHECK-NEXT: br label [[FLOW16]] ; CHECK: exit: ; CHECK-NEXT: ret void -; CHECK: Flow12: -; CHECK-NEXT: [[TMP12:%.*]] = phi i1 [ false, [[LOR_RHS]] ], [ true, [[WHILE_COND]] ] -; CHECK-NEXT: [[TMP13:%.*]] = phi i1 [ [[PRED9:%.*]], [[LOR_RHS]] ], [ [[PRED3]], [[WHILE_COND]] ] -; CHECK-NEXT: br i1 [[TMP13]], label [[IRR_GUARD]], label [[FLOW13]] +; CHECK: Flow11: +; CHECK-NEXT: [[TMP13:%.*]] = phi i1 [ false, [[LOR_RHS]] ], [ true, [[WHILE_COND]] ] +; CHECK-NEXT: [[TMP14:%.*]] = phi i1 [ [[PRED9:%.*]], [[LOR_RHS]] ], [ [[PRED3]], [[WHILE_COND]] ] +; CHECK-NEXT: br i1 [[TMP14]], label [[IRR_GUARD]], label [[FLOW12]] ; CHECK: irr.guard: -; CHECK-NEXT: [[GUARD_COND_TRUE49:%.*]] = phi i1 [ [[PRED6:%.*]], [[FLOW9]] ], [ [[TMP12]], [[FLOW12]] ] +; CHECK-NEXT: [[GUARD_COND_TRUE49:%.*]] = phi i1 [ [[PRED6:%.*]], [[FLOW9]] ], [ [[TMP13]], [[FLOW11]] ] ; CHECK-NEXT: [[GUARD_COND_TRUE49_INV:%.*]] = xor i1 [[GUARD_COND_TRUE49]], true ; CHECK-NEXT: br i1 [[GUARD_COND_TRUE49_INV]], label [[COND_END61]], label [[FLOW7]] -; CHECK: Flow15: -; CHECK-NEXT: [[TMP14]] = phi i1 [ [[PRED8:%.*]], [[IF_THEN69:%.*]] ], [ [[DOTINV]], [[LOOP_EXIT_GUARD2:%.*]] ] -; CHECK-NEXT: br label [[FLOW14:%.*]] +; CHECK: Flow14: +; CHECK-NEXT: [[TMP15]] = phi i1 [ [[PRED8:%.*]], [[IF_THEN69:%.*]] ], [ [[DOTINV]], [[LOOP_EXIT_GUARD2:%.*]] ] +; CHECK-NEXT: br label [[FLOW13:%.*]] ; CHECK: loop.exit.guard: -; CHECK-NEXT: br i1 [[TMP16:%.*]], label [[WHILE_END76:%.*]], label [[FLOW6]] +; CHECK-NEXT: br i1 [[TMP17:%.*]], label [[WHILE_END76:%.*]], label [[FLOW6]] ; CHECK: Flow10: -; CHECK-NEXT: [[TMP15]] = phi i1 [ false, [[WHILE_COND47]] ], [ true, [[WHILE_BODY63]] ] +; CHECK-NEXT: [[TMP16]] = phi i1 [ false, [[WHILE_COND47]] ], [ true, [[WHILE_BODY63]] ] ; CHECK-NEXT: br label [[FLOW9]] -; CHECK: Flow13: -; CHECK-NEXT: [[TMP16]] = phi i1 [ [[TMP10]], [[FLOW14]] ], [ true, [[FLOW12]] ] -; CHECK-NEXT: [[TMP17:%.*]] = phi i1 [ [[TMP11]], [[FLOW14]] ], [ true, [[FLOW12]] ] -; CHECK-NEXT: br i1 [[TMP17]], label [[LOOP_EXIT_GUARD:%.*]], label [[WHILE_COND]] +; CHECK: Flow12: +; CHECK-NEXT: [[TMP17]] = phi i1 [ [[TMP11]], [[FLOW13]] ], [ true, [[FLOW11]] ] +; CHECK-NEXT: [[TMP18:%.*]] = phi i1 [ [[TMP12]], [[FLOW13]] ], [ true, [[FLOW11]] ] +; CHECK-NEXT: br i1 [[TMP18]], label [[LOOP_EXIT_GUARD:%.*]], label [[WHILE_COND]] ; CHECK: loop.exit.guard1: -; CHECK-NEXT: br i1 [[DOTINV]], label [[LOOP_EXIT_GUARD2]], label [[FLOW14]] +; CHECK-NEXT: br i1 [[DOTINV]], label [[LOOP_EXIT_GUARD2]], label [[FLOW13]] ; CHECK: loop.exit.guard2: -; CHECK-NEXT: br i1 [[DOTINV11]], label [[IF_THEN69]], label [[FLOW15]] +; CHECK-NEXT: br i1 [[TMP8]], label [[IF_THEN69]], label [[FLOW14]] ; entry: br i1 %Pred0, label %if.end, label %if.then