Index: llvm/lib/CodeGen/DFAJumpThreading.cpp =================================================================== --- /dev/null +++ llvm/lib/CodeGen/DFAJumpThreading.cpp @@ -0,0 +1,653 @@ +//===- DFAJumpThreading.cpp - Threads a switch statement inside a loop ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// When a switch statement inside a loop is used to implement a deterministic +// finite automaton, we can jump thread the switch statement reducing number of +// conditional jumps. Currently this does not happen in LLVM jump threading. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Verifier.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "dfa-jump-threading" + +static cl::opt DisableThis("disable-dfa-jump-thread", + cl::desc("Disable DFA jump threading."), + cl::Hidden, cl::init(false)); + +static cl::opt + IsViewCFG("dfa-jump-view-cfg", + cl::desc("View the CFG before DFA Jump Threading"), cl::Hidden, + cl::init(false)); + +static cl::opt MaxPathDepth( + "dfa-max-path-length", + cl::desc("Max number of blocks searched to find threadable path"), + cl::Hidden, cl::init(20)); + +namespace { + +class SelectInstToUnfold { + SelectInst *SI; + PHINode *SIUse; + +public: + SelectInstToUnfold(SelectInst *SI, PHINode *SIUse) : SI(SI), SIUse(SIUse) {} + + SelectInst *getInst() { return SI; } + PHINode *getUse() { return SIUse; } + + explicit operator bool() const { return SI && SIUse; } +}; + +void unfold(SelectInstToUnfold SIToUnfold, + std::vector *NewSIsToUnfold, + std::vector *NewBBs); + +class DFAJumpThreading : public FunctionPass { +public: + static char ID; // Pass identification + DFAJumpThreading() : FunctionPass(ID) { + initializeDFAJumpThreadingPass(*PassRegistry::getPassRegistry()); + } + + ~DFAJumpThreading() override = default; + + bool runOnFunction(Function &F) override; + +private: + void unfoldSelectInstrs(const std::vector &SelectInsts) { + std::list Q; + for (SelectInstToUnfold SIToUnfold : SelectInsts) { + Q.push_back(SIToUnfold); + } + + while (!Q.empty()) { + SelectInstToUnfold SIToUnfold = Q.front(); + Q.pop_front(); + + std::vector NewSIsToUnfold; + std::vector NewBBs; + unfold(SIToUnfold, &NewSIsToUnfold, &NewBBs); + + // Put newly discovered select instructions into the work list. + for (const SelectInstToUnfold &NewSIToUnfold : NewSIsToUnfold) { + Q.push_back(NewSIToUnfold); + } + } + } +}; +} // end anonymous namespace + +char DFAJumpThreading::ID = 0; +INITIALIZE_PASS_BEGIN(DFAJumpThreading, "dfa-jump-threading", + "DFA Jump Threading", false, false) +INITIALIZE_PASS_END(DFAJumpThreading, "dfa-jump-threading", + "DFA Jump Threading", false, false) + +// Public interface to the DFA Jump Threading pass +FunctionPass *llvm::createDFAJumpThreadingPass(const TargetMachine *TM) { + return new DFAJumpThreading(); +} + +namespace { + +/// Unfold the select instruction held in \p SIToUnfold. +/// +/// Put newly discovered select instructions into \p NewSIsToUnfold. Put newly +/// created basic blocks into \p NewBBs. +/// +/// This routine is inspired by CodeGenPrepare::optimizeSelectInst(). +void unfold(SelectInstToUnfold SIToUnfold, + std::vector *NewSIsToUnfold, + std::vector *NewBBs) { + SelectInst *SI = SIToUnfold.getInst(); + PHINode *SIUse = SIToUnfold.getUse(); + BasicBlock *StartBlock = SI->getParent(); + BasicBlock *EndBlock = SIUse->getParent(); + BranchInst *StartBlockTerm = + dyn_cast(StartBlock->getTerminator()); + + assert(StartBlockTerm && StartBlockTerm->isUnconditional()); + assert(SI->hasOneUse()); + + // These are the new basic blocks for the conditional branch. + // At least one will become an actual new basic block. + BasicBlock *TrueBlock = nullptr; + BasicBlock *FalseBlock = nullptr; + BranchInst *TrueBranch = nullptr; + BranchInst *FalseBranch = nullptr; + + // Sink select instructions to be able to unfold them later. + if (SelectInst *SIOp = dyn_cast(SI->getTrueValue())) { + assert(SIOp->hasOneUse()); + TrueBlock = BasicBlock::Create(SI->getContext(), "si.unfold.true", + EndBlock->getParent(), EndBlock); + NewBBs->push_back(TrueBlock); + TrueBranch = BranchInst::Create(EndBlock, TrueBlock); + SIOp->moveBefore(TrueBranch); + NewSIsToUnfold->push_back(SelectInstToUnfold(SIOp, SIUse)); + } + if (SelectInst *SIOp = dyn_cast(SI->getFalseValue())) { + assert(SIOp->hasOneUse()); + FalseBlock = BasicBlock::Create(SI->getContext(), "si.unfold.false", + EndBlock->getParent(), EndBlock); + NewBBs->push_back(FalseBlock); + FalseBranch = BranchInst::Create(EndBlock, FalseBlock); + SIOp->moveBefore(FalseBranch); + NewSIsToUnfold->push_back(SelectInstToUnfold(SIOp, SIUse)); + } + + // If there was nothing to sink, then arbitrarily choose the 'false' side + // for a new input value to the PHI. + if (!TrueBlock && !FalseBlock) { + FalseBlock = BasicBlock::Create(SI->getContext(), "si.unfold.false", + EndBlock->getParent(), EndBlock); + NewBBs->push_back(FalseBlock); + BranchInst::Create(EndBlock, FalseBlock); + } + + // Insert the real conditional branch based on the original condition. + // If we did not create a new block for one of the 'true' or 'false' paths + // of the condition, it means that side of the branch goes to the end block + // directly and the path originates from the start block from the point of + // view of the new PHI. + BasicBlock *TT = nullptr; + BasicBlock *FT = nullptr; + if (!TrueBlock) { + // A triangle pointing right. + TT = EndBlock; + FT = FalseBlock; + TrueBlock = StartBlock; + + // Update the phi node of SI. + for (unsigned Idx = 0; Idx < SIUse->getNumIncomingValues(); ++Idx) { + if (SIUse->getIncomingBlock(Idx) == StartBlock) { + SIUse->setIncomingValue(Idx, SI->getTrueValue()); + } + } + SIUse->addIncoming(SI->getFalseValue(), FalseBlock); + + // Update any other PHI nodes in EndBlock. + for (auto BI = EndBlock->begin(); PHINode *Phi = dyn_cast(BI); + ++BI) { + if (Phi != SIUse) { + Phi->addIncoming(Phi->getIncomingValueForBlock(StartBlock), FalseBlock); + } + } + } else if (!FalseBlock) { + // A triangle pointing left. + TT = TrueBlock; + FT = EndBlock; + FalseBlock = StartBlock; + + // Update the phi node of SI. + for (unsigned Idx = 0; Idx < SIUse->getNumIncomingValues(); ++Idx) { + if (SIUse->getIncomingBlock(Idx) == StartBlock) { + SIUse->setIncomingValue(Idx, SI->getFalseValue()); + } + } + SIUse->addIncoming(SI->getTrueValue(), TrueBlock); + + // Update any other PHI nodes in EndBlock. + for (auto BI = EndBlock->begin(); PHINode *Phi = dyn_cast(BI); + ++BI) { + if (Phi != SIUse) { + Phi->addIncoming(Phi->getIncomingValueForBlock(StartBlock), TrueBlock); + } + } + } else { + // A diamond. + TT = TrueBlock; + FT = FalseBlock; + + // Update the phi node of SI. + SIUse->removeIncomingValue(StartBlock, /* DeletePHIIfEmpty = */ false); + SIUse->addIncoming(SI->getTrueValue(), TrueBlock); + SIUse->addIncoming(SI->getFalseValue(), FalseBlock); + + // Update any other PHI nodes in EndBlock. + for (auto BI = EndBlock->begin(); PHINode *Phi = dyn_cast(BI); + ++BI) { + if (Phi != SIUse) { + Phi->addIncoming(Phi->getIncomingValueForBlock(StartBlock), TrueBlock); + Phi->addIncoming(Phi->getIncomingValueForBlock(StartBlock), FalseBlock); + } + } + } + StartBlockTerm->eraseFromParent(); + BranchInst::Create(TT, FT, SI->getCondition(), StartBlock); + + // The select is now dead. + SI->eraseFromParent(); +} + +typedef std::list PathType; +typedef std::list PathsType; +typedef std::set VisitedBlocks; + +inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) { + OS << "< "; + for (const BasicBlock *BB : Path) { + OS << BB->getName() << " "; + } + OS << ">"; + return OS; +} + +struct ThreadingPath { + uint64_t getEntryValue() const { return EntryVal; } + void setEntryValue(const ConstantInt *V) { + EntryVal = V->getZExtValue(); + IsEntryValSet = true; + } + bool isEntryValueSet() const { return IsEntryValSet; } + + uint64_t getExitValue() const { return ExitVal; } + void setExitValue(const ConstantInt *V) { + ExitVal = V->getZExtValue(); + IsExitValSet = true; + } + bool isExitValueSet() const { return IsExitValSet; } + + const BasicBlock *getDeterminatorBB() const { return DBB; } + void setDeterminator(const BasicBlock *BB) { DBB = BB; } + + const PathType &getPath() const { return Path; } + void setPath(const PathType &NewPath) { Path = NewPath; } + + void print(raw_ostream &OS) const { + OS << Path << " [ " << EntryVal << ", " << ExitVal << ", " << DBB->getName() + << " ]"; + } + +private: + PathType Path; + uint64_t EntryVal; + uint64_t ExitVal; + const BasicBlock *DBB = nullptr; + bool IsEntryValSet = false; + bool IsExitValSet = false; +}; + +inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) { + TPath.print(OS); + return OS; +} + +struct MainSwitch { + MainSwitch(const SwitchInst *SI) : SI(SI) { + if (isPredictable(SI)) + Instr = SI; + } + + virtual ~MainSwitch() = default; + + const SwitchInst *getInstr() const { return Instr; } + const std::vector getSelectInsts() { return SelectInsts; } + + /// Returns true if \p BB is a destination of the MainSwitch that is not + /// default. + bool isNonDefaultDest(const BasicBlock *BB) const { + for (auto Case : dyn_cast(Instr)->cases()) { + if (Case.getCaseSuccessor() == BB) { + return true; + } + } + return false; + } + + const ConstantInt *getValueFor(const BasicBlock *BB) const { + for (auto Case : dyn_cast(Instr)->cases()) { + if (Case.getCaseSuccessor() == BB) { + return Case.getCaseValue(); + } + } + return nullptr; + } + +private: + /// Do a use-def chain traversal. Make sure the value of the switch variable + /// is always a known constant. This means that all conditional jumps based on + /// switch variable can be converted to unconditional jump. + bool isPredictable(const SwitchInst *SI) { + std::list Q; + std::set SeenValues; + SelectInsts.clear(); + + Value *FirstDef = SI->getOperand(0); + + if (!FirstDef->getType()->isIntegerTy()) + return false; + + auto *Inst = dyn_cast(FirstDef); + + // If this is a function argument or another non-instruction, then give up. + // We are interested in loop local variables. + if (!Inst) + return false; + + LLVM_DEBUG(dbgs() << "\tisPredictable() FirstDef: " << *Inst << "\n"); + + Q.push_back(Inst); + SeenValues.insert(FirstDef); + + while (!Q.empty()) { + Instruction *Current = Q.front(); + Q.pop_front(); + + if (auto *Phi = dyn_cast(Current)) { + for (Value *Incoming : Phi->incoming_values()) { + if (!isValidValue(Incoming, SeenValues)) { + return false; + } + addInstToQueue(Incoming, Q, SeenValues); + } + LLVM_DEBUG(dbgs() << "\tisPredictable() phi: " << *Phi << "\n"); + } else if (SelectInst *SelI = dyn_cast(Current)) { + if (!isValidSelectInst(SelI)) { + return false; + } + if (!isValidValue(SelI->getTrueValue(), SeenValues) || + !isValidValue(SelI->getFalseValue(), SeenValues)) { + return false; + } + addInstToQueue(SelI->getTrueValue(), Q, SeenValues); + addInstToQueue(SelI->getFalseValue(), Q, SeenValues); + LLVM_DEBUG(dbgs() << "\tisPredictable() select: " << *SelI << "\n"); + if (auto SelIUse = dyn_cast(SelI->user_back())) { + SelectInsts.push_back(SelectInstToUnfold(SelI, SelIUse)); + } + } else { + // If it is neither a phi nor a select, then we give up. + return false; + } + } + + return true; + } + + bool isValidValue(Value *InpVal, std::set &SeenValues) { + if (SeenValues.find(InpVal) != SeenValues.end()) + return true; + + if (isa(InpVal)) + return true; + + // If this is a function argument or another non-instruction, then give up. + Instruction *I = dyn_cast(InpVal); + if (I == nullptr) + return false; + + return true; + } + + void addInstToQueue(Value *Val, std::list &Q, + std::set &SeenValues) { + if (SeenValues.find(Val) != SeenValues.end()) + return; + if (Instruction *I = dyn_cast(Val)) { + Q.push_back(I); + } + SeenValues.insert(Val); + } + + bool isValidSelectInst(SelectInst *SI) { + // Vector select instructions are not supported. + if (!SI->getCondition()->getType()->isIntegerTy(1)) { + return false; + } + + if (!SI->hasOneUse()) { + return false; + } + + Instruction *SIUse = dyn_cast(SI->user_back()); + // The use of the select inst should be either a phi or another select. + if (!SIUse && !(isa(SIUse) || isa(SIUse))) { + return false; + } + + BasicBlock *SIBB = SI->getParent(); + + // Currently, we can only expand select instructions in basic blocks with + // one successor. + BranchInst *SITerm = dyn_cast(SIBB->getTerminator()); + if (!SITerm || !SITerm->isUnconditional()) { + return false; + } + + if (isa(SIUse) && SIBB->getSingleSuccessor() != + dyn_cast(SIUse)->getParent()) { + return false; + } + + // If select will not be sunk during unfolding, and it is in the same basic + // block as another state defining select, then cannot unfold both. + for (SelectInstToUnfold SIToUnfold : SelectInsts) { + SelectInst *PrevSI = SIToUnfold.getInst(); + if (PrevSI->getTrueValue() != SI && PrevSI->getFalseValue() != SI && + PrevSI->getParent() == SI->getParent()) + return false; + } + + return true; + } + + const SwitchInst *SI; + const SwitchInst *Instr = nullptr; + std::vector SelectInsts; +}; + +struct AllSwitchPaths { + AllSwitchPaths(const MainSwitch *MSwitch) + : Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()), + MSwitch(MSwitch) {} + + void run() const { + VisitedBlocks Visited; + PathsType LoopPaths = paths(SwitchBlock, Visited, /* PathDepth = */ 1); + StateDefMap StateDef = getStateDefMap(); + + std::list TPaths; + + for (PathType Path : LoopPaths) { + ThreadingPath TPath; + + const BasicBlock *PrevBB = nullptr; + for (const BasicBlock *BB : Path) { + if (MSwitch->isNonDefaultDest(BB) && !TPath.isEntryValueSet()) { + const ConstantInt *Val = MSwitch->getValueFor(BB); + TPath.setEntryValue(Val); + } + + if (TPath.isEntryValueSet() && StateDef.count(BB) != 0) { + const PHINode *Phi = dyn_cast(StateDef[BB]); + assert(Phi && "Expected a state-defining instr to be a phi node."); + + const Value *V = Phi->getIncomingValueForBlock(PrevBB); + if (const ConstantInt *C = dyn_cast(V)) { + TPath.setExitValue(C); + TPath.setDeterminator(BB); + TPath.setPath(Path); + } + } + + PrevBB = BB; + } + + // The next state may be defined in the switch block itself. If an exit + // value was not found, then check if the switch block is the determinator + if (!TPath.isExitValueSet() && StateDef.count(SwitchBlock) != 0) { + const PHINode *Phi = dyn_cast(StateDef[SwitchBlock]); + assert(Phi && "Expected a state-defining instr to be a phi node."); + + if (Phi->getBasicBlockIndex(PrevBB) != -1) { + const Value *V = Phi->getIncomingValueForBlock(PrevBB); + if (const ConstantInt *C = dyn_cast(V)) { + TPath.setExitValue(C); + TPath.setDeterminator(SwitchBlock); + TPath.setPath(Path); + } + } + } + + if (TPath.isExitValueSet()) + TPaths.push_back(TPath); + } + + for (const ThreadingPath &TPath : TPaths) { + LLVM_DEBUG(dbgs() << TPath << "\n"); + } + } + +private: + // Value: an instruction that defines a switch state; + // Key: the parent basic block of that instruction. + typedef std::unordered_map StateDefMap; + + PathsType paths(const BasicBlock *BB, VisitedBlocks &Visited, + int PathDepth) const { + PathsType Res; + + // Stop exploring paths after visiting MaxPathLength blocks + if (PathDepth > MaxPathDepth) + return Res; + + Visited.insert(BB); + + // Some blocks have multiple edges to the same successor, and this set + // is used to prevent a duplicate path from being generated + std::set Successors; + + for (const_succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; + ++SI) { + const BasicBlock *Succ = *SI; + + if (Successors.find(Succ) != Successors.end()) + continue; + Successors.insert(Succ); + + // Found a cycle through the SwitchBlock + if (Succ == SwitchBlock) { + Res.push_back({BB}); + continue; + } + + // We have encountered a cycle, do not get caught in it + if (Visited.find(Succ) != Visited.end()) + continue; + + PathsType SuccPaths = paths(Succ, Visited, PathDepth + 1); + for (PathType Path : SuccPaths) { + PathType NewPath(Path); + NewPath.push_front(BB); + Res.push_back(NewPath); + } + } + // This block could now be visited again from a different predecessor. Note + // that this will result in exponential runtime. Subpaths could possibly be + // cached but it takes a lot of memory to store them. + Visited.erase(BB); + return Res; + } + + /// Walk the use-def chain and collect all the state-defining instructions. + StateDefMap getStateDefMap() const { + StateDefMap Res; + + Value *FirstDef = Switch->getOperand(0); + + assert(isa(FirstDef) && "After select unfolding, all state " + "definitions are expected to be phi " + "nodes."); + + std::list Q; + Q.push_back(dyn_cast(FirstDef)); + std::set SeenValues; + + while (!Q.empty()) { + PHINode *CurPhi = Q.front(); + Q.pop_front(); + + Res[CurPhi->getParent()] = CurPhi; + SeenValues.insert(CurPhi); + + for (Value *Incoming : CurPhi->incoming_values()) { + if (Incoming == FirstDef || isa(Incoming) || + SeenValues.find(Incoming) != SeenValues.end()) { + continue; + } + + assert(isa(Incoming) && "After select unfolding, all state " + "definitions are expected to be phi " + "nodes."); + + Q.push_back(dyn_cast(Incoming)); + } + } + + return Res; + } + + const SwitchInst *Switch; + const BasicBlock *SwitchBlock; + const MainSwitch *MSwitch; +}; + +bool DFAJumpThreading::runOnFunction(Function &F) { + if (skipFunction(F) || DisableThis) + return false; + + LLVM_DEBUG(dbgs() << "\nDFA Jump threading: " << F.getName() << "\n"); + + for (Function::iterator B = F.begin(), BE = F.end(); B != BE; B++) { + if (auto *SI = dyn_cast(B->getTerminator())) { + + LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << B->getName() + << " is predictable\n"); + MainSwitch Switch(SI); + + if (!Switch.getInstr()) + continue; + + LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << B->getName() << " is a " + << "candidate for jump threading\n"); + LLVM_DEBUG(SI->dump()); + + unfoldSelectInstrs(Switch.getSelectInsts()); + + AllSwitchPaths SwitchPaths(&Switch); + SwitchPaths.run(); + } + } + + if (IsViewCFG) { + F.viewCFG(); + } + + verifyFunction(F, &dbgs()); + + return true; +} +} // end anonymous namespace Index: llvm/test/CodeGen/AArch64/dfa-jump-threading-paths.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/dfa-jump-threading-paths.ll @@ -0,0 +1,100 @@ +; REQUIRES: asserts +; RUN: opt -S -dfa-jump-threading -debug-only=dfa-jump-threading -disable-output %s 2>&1 | FileCheck %s + +define i32 @test1(i32 %num) { +; CHECK: < for.body case1 for.inc > [ 1, 2, for.inc ] +; CHECK-NEXT: < for.body case2 for.inc > [ 2, 1, for.inc ] +; CHECK-NEXT: < for.body case2 si.unfold.false for.inc > [ 2, 2, for.inc ] +entry: + br label %for.body + +for.body: + %count = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %state = phi i32 [ 1, %entry ], [ %state.next, %for.inc ] + switch i32 %state, label %for.inc [ + i32 1, label %case1 + i32 2, label %case2 + ] + +case1: + br label %for.inc + +case2: + %cmp = icmp eq i32 %count, 50 + %sel = select i1 %cmp, i32 1, i32 2 + br label %for.inc + +for.inc: + %state.next = phi i32 [ %sel, %case2 ], [ 1, %for.body ], [ 2, %case1 ] + %inc = add nsw i32 %count, 1 + %cmp.exit = icmp slt i32 %inc, %num + br i1 %cmp.exit, label %for.body, label %for.end + +for.end: + ret i32 0 +} + +define i32 @test2(i32 %init) { +; CHECK: < loop.3 case2 > [ 2, 3, loop.3 ] +; CHECK-NEXT: < loop.3 case2 loop.1.backedge loop.1 loop.2 > [ 2, 1, loop.1 ] +; CHECK-NEXT: < loop.3 case2 loop.1.backedge si.unfold.false1 loop.1 loop.2 > [ 2, 4, loop.1.backedge ] +; CHECK-NEXT: < loop.3 case3 loop.2.backedge loop.2 > [ 3, 0, loop.2.backedge ] +; CHECK-NEXT: < loop.3 case3 case4 loop.2.backedge loop.2 > [ 3, 3, loop.2.backedge ] +; CHECK-NEXT: < loop.3 case3 case4 loop.1.backedge loop.1 loop.2 > [ 3, 1, loop.1 ] +; CHECK-NEXT: < loop.3 case3 case4 loop.1.backedge si.unfold.false1 loop.1 loop.2 > [ 3, 2, loop.1.backedge ] +; CHECK-NEXT: < loop.3 case4 loop.2.backedge loop.2 > [ 4, 3, loop.2.backedge ] +; CHECK-NEXT: < loop.3 case4 loop.1.backedge loop.1 loop.2 > [ 4, 1, loop.1 ] +; CHECK-NEXT: < loop.3 case4 loop.1.backedge si.unfold.false1 loop.1 loop.2 > [ 4, 2, loop.1.backedge ] +entry: + %cmp = icmp eq i32 %init, 0 + %sel = select i1 %cmp, i32 0, i32 2 + br label %loop.1 + +loop.1: + %state.1 = phi i32 [ %sel, %entry ], [ %state.1.be2, %loop.1.backedge ] + br label %loop.2 + +loop.2: + %state.2 = phi i32 [ %state.1, %loop.1 ], [ %state.2.be, %loop.2.backedge ] + br label %loop.3 + +loop.3: + %state = phi i32 [ %state.2, %loop.2 ], [ 3, %case2 ] + switch i32 %state, label %infloop.i [ + i32 2, label %case2 + i32 3, label %case3 + i32 4, label %case4 + i32 0, label %case0 + i32 1, label %case1 + ] + +case2: + br i1 %cmp, label %loop.3, label %loop.1.backedge + +case3: + br i1 %cmp, label %loop.2.backedge, label %case4 + +case4: + br i1 %cmp, label %loop.2.backedge, label %loop.1.backedge + +loop.1.backedge: + %state.1.be = phi i32 [ 2, %case4 ], [ 4, %case2 ] + %state.1.be2 = select i1 %cmp, i32 1, i32 %state.1.be + br label %loop.1 + +loop.2.backedge: + %state.2.be = phi i32 [ 3, %case4 ], [ 0, %case3 ] + br label %loop.2 + +case0: + br label %exit + +case1: + br label %exit + +infloop.i: + br label %infloop.i + +exit: + ret i32 0 +} Index: llvm/test/CodeGen/AArch64/dfa-unfold-select.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/dfa-unfold-select.ll @@ -0,0 +1,149 @@ +; RUN: opt -S -dfa-jump-threading %s | FileCheck %s + +define i32 @test1(i32 %num) { +entry: + br label %for.body + +for.body: + %count = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %state = phi i32 [ 1, %entry ], [ %state.next, %for.inc ] + switch i32 %state, label %for.inc [ + i32 1, label %case1 + i32 2, label %case2 + ] + +case1: + br label %for.inc + +case2: +; CHECK: %cmp = icmp +; CHECK-NOT: %sel = select i1 %cmp, i32 1, i32 2 +; CHECK-NEXT: br i1 %cmp, label %for.inc, label %si.unfold.false + %cmp = icmp slt i32 %count, 50 + %sel = select i1 %cmp, i32 1, i32 2 + br label %for.inc + +; CHECK: si.unfold.false +; CHECK-NEXT: br label %for.inc + +for.inc: +; CHECK: %state.next = phi +; CHECK-NOT: [ %sel, %case2 ] +; CHECK: [ 1, %case2 ] +; CHECK: [ 2, %si.unfold.false ] + %state.next = phi i32 [ %sel, %case2 ], [ 1, %for.body ], [ 2, %case1 ] + %inc = add nsw i32 %count, 1 + %cmp.exit = icmp slt i32 %inc, %num + br i1 %cmp.exit, label %for.body, label %for.end + +for.end: + ret i32 0 +} + +define i32 @test2(i32 %num) { +entry: + br label %for.body + +for.body: + %count = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %state = phi i32 [ 1, %entry ], [ %state.next, %for.inc ] + switch i32 %state, label %for.inc [ + i32 1, label %case1 + i32 2, label %case2 + ] + +case1: +; CHECK: %cmp.c1 = icmp +; CHECK: %cmp2.c1 = icmp +; CHECK-NOT: select i1 +; CHECK-NEXT: br i1 %cmp2.c1, label %si.unfold.true, label %for.inc + %cmp.c1 = icmp slt i32 %count, 50 + %cmp2.c1 = icmp slt i32 %count, 100 + %state1.1 = select i1 %cmp.c1, i32 1, i32 2 + %state1.2 = select i1 %cmp2.c1, i32 %state1.1, i32 3 + br label %for.inc + +case2: +; CHECK: %cmp.c2 = icmp +; CHECK: %cmp2.c2 = icmp +; CHECK-NOT: select i1 +; CHECK-NEXT: br i1 %cmp2.c2, label %for.inc, label %si.unfold.false + %cmp.c2 = icmp slt i32 %count, 50 + %cmp2.c2 = icmp sgt i32 %count, 100 + %state2.1 = select i1 %cmp.c2, i32 1, i32 2 + %state2.2 = select i1 %cmp2.c2, i32 3, i32 %state2.1 + br label %for.inc + +; CHECK: si.unfold.true +; CHECK-NEXT: br i1 %cmp.c1, label %for.inc, label %si.unfold.false1 +; CHECK: si.unfold.false +; CHECK-NEXT: br i1 %cmp.c2, label %for.inc, label %si.unfold.false2 +; CHECK: si.unfold.false1 +; CHECK-NEXT: br label %for.inc +; CHECK: si.unfold.false2 +; CHECK-NEXT: br label %for.inc + +for.inc: +; CHECK: %state.next = phi +; CHECK-NOT: [ %state1.2, %case1 ] +; CHECK-NOT: [ %state2.2, %case2 ] +; CHECK: [ 3, %case1 ], [ 3, %case2 ] +; CHECK: [ 1, %si.unfold.true ], [ 1, %si.unfold.false ], [ 2, %si.unfold.false1 ], [ 2, %si.unfold.false2 ] + %state.next = phi i32 [ %state1.2, %case1 ], [ %state2.2, %case2 ], [ 1, %for.body ] + %inc = add nsw i32 %count, 1 + %cmp.exit = icmp slt i32 %inc, %num + br i1 %cmp.exit, label %for.body, label %for.end + +for.end: + ret i32 0 +} + +define i32 @test3(i32 %num) { +entry: + br label %for.body + +for.body: + %count = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %state = phi i32 [ 1, %entry ], [ %state.next, %for.inc ] + switch i32 %state, label %for.inc [ + i32 1, label %case1 + i32 2, label %case2 + ] + +case1: + br label %for.inc + +case2: +; CHECK: %cmp.3 = icmp eq i32 %0, 0 +; CHECK-NOT: select i1 +; CHECK-NEXT: br i1 %cmp.3, label %si.unfold.true, label %si.unfold.false + %cmp.1 = icmp slt i32 %count, 50 + %cmp.2 = icmp slt i32 %count, 100 + %0 = and i32 %count, 1 + %cmp.3 = icmp eq i32 %0, 0 + %sel.1 = select i1 %cmp.1, i32 1, i32 2 + %sel.2 = select i1 %cmp.2, i32 3, i32 4 + %sel.3 = select i1 %cmp.3, i32 %sel.1, i32 %sel.2 + br label %for.inc + +; CHECK: si.unfold.true +; CHECK-NEXT: br i1 %cmp.1, label %for.inc, label %si.unfold.false1 +; CHECK: si.unfold.false +; CHECK-NEXT: br i1 %cmp.2, label %for.inc, label %si.unfold.false2 +; CHECK: si.unfold.false1 +; CHECK-NEXT: br label %for.inc +; CHECK: si.unfold.false2 +; CHECK-NEXT: br label %for.inc + +for.inc: +; CHECK: %state.next = phi +; CHECK-NOT: %case2 +; CHECK: [ 1, %si.unfold.true ], [ 3, %si.unfold.false ], [ 2, %si.unfold.false1 ], [ 4, %si.unfold.false2 ] + %state.next = phi i32 [ %sel.3, %case2 ], [ 1, %for.body ], [ 2, %case1 ] + %inc = add nsw i32 %count, 1 + %cmp.exit = icmp slt i32 %inc, %num + br i1 %cmp.exit, label %for.body, label %for.end + +for.end: + ret i32 0 +}