Index: include/llvm/Transforms/Utils/Local.h =================================================================== --- include/llvm/Transforms/Utils/Local.h +++ include/llvm/Transforms/Utils/Local.h @@ -137,7 +137,8 @@ /// the basic block that was pointed to. /// bool SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, - unsigned BonusInstThreshold, AssumptionCache *AC = nullptr); + unsigned BonusInstThreshold, AssumptionCache *AC = nullptr, + AliasAnalysis *AA = nullptr); /// FlatternCFG - This function is used to flatten a CFG. For /// example, it uses parallel-and and parallel-or mode to collapse Index: lib/Transforms/Scalar/SimplifyCFGPass.cpp =================================================================== --- lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -128,7 +129,7 @@ /// Call SimplifyCFG on all the blocks in the function, /// iterating until no more changes are made. static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, - AssumptionCache *AC, + AssumptionCache *AC, AliasAnalysis *AA, unsigned BonusInstThreshold) { bool Changed = false; bool LocalChange = true; @@ -137,7 +138,7 @@ // Loop over all of the basic blocks and remove them if they are unneeded. for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) { - if (SimplifyCFG(BBIt++, TTI, BonusInstThreshold, AC)) { + if (SimplifyCFG(BBIt++, TTI, BonusInstThreshold, AC, AA)) { LocalChange = true; ++NumSimpl; } @@ -148,10 +149,11 @@ } static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI, - AssumptionCache *AC, int BonusInstThreshold) { + AssumptionCache *AC, AliasAnalysis *AA, + int BonusInstThreshold) { bool EverChanged = removeUnreachableBlocks(F); EverChanged |= mergeEmptyReturnBlocks(F); - EverChanged |= iterativelySimplifyCFG(F, TTI, AC, BonusInstThreshold); + EverChanged |= iterativelySimplifyCFG(F, TTI, AC, AA, BonusInstThreshold); // If neither pass changed anything, we're done. if (!EverChanged) return false; @@ -165,7 +167,7 @@ return true; do { - EverChanged = iterativelySimplifyCFG(F, TTI, AC, BonusInstThreshold); + EverChanged = iterativelySimplifyCFG(F, TTI, AC, AA, BonusInstThreshold); EverChanged |= removeUnreachableBlocks(F); } while (EverChanged); @@ -183,7 +185,8 @@ auto &TTI = AM->getResult(F); auto &AC = AM->getResult(F); - if (!simplifyFunctionCFG(F, TTI, &AC, BonusInstThreshold)) + // FIXME: How do we query alias analysis with the new pass manager? + if (!simplifyFunctionCFG(F, TTI, &AC, nullptr, BonusInstThreshold)) return PreservedAnalyses::none(); return PreservedAnalyses::all(); @@ -212,11 +215,13 @@ &getAnalysis().getAssumptionCache(F); const TargetTransformInfo &TTI = getAnalysis().getTTI(F); - return simplifyFunctionCFG(F, TTI, AC, BonusInstThreshold); + AliasAnalysis *AA = &getAnalysis().getAAResults(); + return simplifyFunctionCFG(F, TTI, AC, AA, BonusInstThreshold); } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addPreserved(); } @@ -227,6 +232,7 @@ INITIALIZE_PASS_BEGIN(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false, false) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false, false) Index: lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- lib/Transforms/Utils/SimplifyCFG.cpp +++ lib/Transforms/Utils/SimplifyCFG.cpp @@ -18,6 +18,8 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -73,6 +75,17 @@ "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes")); +static cl::opt MergeCondStores( + "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), + cl::desc("Hoist conditional stores even if an unconditional store does not " + "precede - hoist multiple conditional stores into a single " + "predicated store")); + +static cl::opt MergeCondStoresAggressively( + "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), + cl::desc("When merging conditional stores, do so even if the resultant " + "basic blocks are unlikely to be if-converted as a result")); + STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping"); STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); @@ -113,6 +126,7 @@ const DataLayout &DL; unsigned BonusInstThreshold; AssumptionCache *AC; + AliasAnalysis *AA; Value *isValueEqualityComparison(TerminatorInst *TI); BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI, std::vector &Cases); @@ -133,8 +147,10 @@ public: SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL, - unsigned BonusInstThreshold, AssumptionCache *AC) - : TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC) {} + unsigned BonusInstThreshold, AssumptionCache *AC, + AliasAnalysis *AA) + : TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC), + AA(AA) {} bool run(BasicBlock *BB); }; } @@ -2332,11 +2348,268 @@ return false; } +static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, + BasicBlock *QTB, BasicBlock *QFB, + BasicBlock *PostBB, Value *Address, + bool SwapPCond, bool SwapQCond, + AliasAnalysis *AA) { + // For every pointer, there must be exactly two stores, one coming from + // PTB or PFB, and the other from QTB or QFB. We don't support more than one + // store (to any address) in PTB,PFB or QTB,QFB. + // FIXME: We could relax this restriction with a bit more work and performance + // testing. + StoreInst *PStore = nullptr; + for (User *U : Address->users()) + if (auto *SI = dyn_cast(U)) + if (SI->getParent() == PTB || SI->getParent() == PFB) { + if (PStore) + // Multiple stores seen. + return false; + else + PStore = SI; + } + + StoreInst *QStore = nullptr; + for (User *U : Address->users()) + if (auto *SI = dyn_cast(U)) + if (SI->getParent() == QTB || SI->getParent() == QFB) { + if (QStore) + // Multiple stores seen. + return false; + else + QStore = SI; + } + assert(QStore && PStore && "How did we manage this?!"); + + // Now check the stores are compatible. + if (QStore->getType() != PStore->getType() || + !QStore->isUnordered() || !PStore->isUnordered()) + return false; + + // If we're not in aggressive mode, we only optimize if we have some + // confidence that by optimizing we'll allow P and/or Q to be if-converted. + if (!MergeCondStoresAggressively && + PStore->getParent()->size() - 1 > PHINodeFoldingThreshold && + QStore->getParent()->size() - 1 > PHINodeFoldingThreshold) + return false; + + // Check that sinking the store won't cause program behavior changes. Sinking + // the store out of the Q blocks won't change any behavior as we're sinking + // from a block to its unconditional successor. But we're moving a store from + // the P blocks down through the middle block (QBI) and past both QFB and QTB. + // So we need to check that there are no may/must alias loads or stores in + // QBI, QTB and QFB. We also need to check there are no conflicting memory + // operations between PStore and the end of its parent block. + auto IsNoAlias = [&AA,&Address](Instruction *I) { + if (auto *LI = dyn_cast(I)) + return AA->isNoAlias(LI->getPointerOperand(), Address); + if (auto *SI = dyn_cast(I)) + return AA->isNoAlias(SI->getPointerOperand(), Address); + return !isa(I) && !isa(I); + }; + + for (auto &I : *QFB->getSinglePredecessor()) + if (!IsNoAlias(&I)) + return false; + for (auto &I : *QFB) + if (&I != QStore && !IsNoAlias(&I)) + return false; + if (QTB) + for (auto &I : *QTB) + if (&I != QStore && !IsNoAlias(&I)) + return false; + for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end(); + I != E; ++I) + if (&*I != PStore && !IsNoAlias(&*I)) + return false; + + // OK, we're going to sink the stores to PostBB. The store has to be + // conditional though, so first create the predicate. + Value *PCond = cast(PFB->getSinglePredecessor()->getTerminator()) + ->getCondition(); + Value *QCond = cast(QFB->getSinglePredecessor()->getTerminator()) + ->getCondition(); + + IRBuilder<> PB(QFB->getSinglePredecessor()->getFirstInsertionPt()); + if (!PTB) + PTB = PFB->getSinglePredecessor(); + + Type *Ty = QStore->getValueOperand()->getType(); + + // PPHI is going to be a PHI node that allows the value that was going to be + // stored in PStore to be referenced from PostBB. + // + // The only value we care about in PPHI is PStore. It doesn't matter to us + // what the other operand is (it'll never get used). We could just create a + // new PHI with an undef incoming value, but that could increase register + // pressure if EarlyCSE/InstCombine can't fold it with some other PHI. So here + // we directly look for some PHI in PBI with PStore as an incoming operand. If + // we find one, we use it, else we create a new one. + PHINode *PPHI = nullptr; + + for (auto I = QFB->getSinglePredecessor()->begin(); isa(I); ++I) + if (cast(I)->getIncomingValueForBlock(PStore->getParent()) == + PStore->getValueOperand()) + PPHI = cast(I); + + if (!PPHI) { + PPHI = PB.CreatePHI(Ty, 2); + PPHI->addIncoming(PStore->getValueOperand(), + PStore->getParent() == PTB ? PTB : PFB); + PPHI->addIncoming(UndefValue::get(Ty), + PStore->getParent() == PTB ? PFB : PTB); + } + + IRBuilder<> QB(PostBB->getFirstInsertionPt()); + + // At this point, it's useful to redefine TB if it's nullptr. + if (!QTB) + QTB = QFB->getSinglePredecessor(); + + // This is exactly the same logic as for PPHI, above. + PHINode *QPHI = nullptr; + for (auto I = PostBB->begin(); isa(I); ++I) + if (cast(I)->getIncomingValueForBlock(QStore->getParent()) == + QStore->getValueOperand()) + QPHI = cast(I); + + if (!QPHI) { + QPHI = QB.CreatePHI(Ty, 2); + QPHI->addIncoming(QStore->getValueOperand(), + QStore->getParent() == QTB ? QTB : QFB); + QPHI->addIncoming(PPHI, + QStore->getParent() == QTB ? QFB : QTB); + } + + Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond); + Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond); + + if (SwapPCond) + PPred = QB.CreateNot(PPred); + if (SwapQCond) + QPred = QB.CreateNot(QPred); + Value *CombinedPred = QB.CreateOr(PPred, QPred); + + // FIXME: can use QB.getInsertPoint below + auto *T = SplitBlockAndInsertIfThen( + CombinedPred, QB.GetInsertPoint(), false); + QB.SetInsertPoint(T); + QB.CreateStore(QPHI, Address); + + QStore->eraseFromParent(); + PStore->eraseFromParent(); + + return true; +} + +static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, + AliasAnalysis *AA) { + // The intention here is to find diamonds or triangles (see below) where each + // conditional block contains a store to the same address. Both of these + // stores are conditional, so they can't be unconditionally sunk. But it may + // be profitable to speculatively sink the stores into one merged store at the + // end, and predicate the merged store on the union of the two conditions of + // PBI and QBI. + // + // This can reduce the number of stores executed if both of the conditions are + // true, and can allow the blocks to become small enough to be if-converted. + // This optimization will also chain, so that ladders of test-and-set + // sequences can be if-converted away. + // + // We only deal with simple diamonds or triangles: + // + // PBI or PBI or a combination of the two + // / \ | \ + // PTB PFB | PFB + // \ / | / + // QBI QBI + // / \ | \ + // QTB QFB | QFB + // \ / | / + // PostBB PostBB + // + // We model triangles as a type of diamond with a nullptr "true" block. + // Triangles are canonicalized so that the fallthrough edge is represented by + // a true condition, as in the diagram above. + // + if (!AA) + // We cannot continue without alias analysis. + return false; + + BasicBlock *PTB = PBI->getSuccessor(0); + BasicBlock *PFB = PBI->getSuccessor(1); + BasicBlock *QTB = QBI->getSuccessor(0); + BasicBlock *QFB = QBI->getSuccessor(1); + BasicBlock *PostBB = QFB->getSingleSuccessor(); + + bool SwapPCond = false, SwapQCond = false; + // Canonicalize fallthroughs to the true branches. + if (PFB == QBI->getParent()) { + std::swap(PFB, PTB); + SwapPCond = true; + } + if (QFB == PostBB) { + std::swap(QFB, QTB); + SwapQCond = true; + } + + // From this point on we can assume PTB or QTB may be fallthroughs but PFB + // and QFB may not. Model fallthroughs as a nullptr block. + if (PTB == QBI->getParent()) + PTB = nullptr; + if (QTB == PostBB) + QTB = nullptr; + + if ( + !PostBB || + // PFB must point to QBI and only have one predecessor + !PFB || PFB->getSingleSuccessor() != QBI->getParent() || + !PFB->getSinglePredecessor() || + // QFB must point to PostBB and only have one predecessor + !QFB || QFB->getSingleSuccessor() != PostBB || + !QFB->getSinglePredecessor() || + // If PTB exists, it must lead to QBI + (PTB && PTB->getSingleSuccessor() != QBI->getParent()) || + // If QTB exists, it must lead to PostBB + (QTB && QTB->getSingleSuccessor() != PostBB) || + // PostBB and QBI must both have only two predecessors + PostBB->getNumUses() != 2 || QBI->getParent()->getNumUses() != 2) + return false; + + // Check if there are stores in PTB or PFB that are repeated in QTB or QFB. + SmallPtrSet PStoreAddresses, QStoreAddresses; + if (PTB) + for (auto &I : *PTB) + if (StoreInst *SI = dyn_cast(&I)) + PStoreAddresses.insert(SI->getPointerOperand()); + for (auto &I : *PFB) + if (StoreInst *SI = dyn_cast(&I)) + PStoreAddresses.insert(SI->getPointerOperand()); + if (QTB) + for (auto &I : *QTB) + if (StoreInst *SI = dyn_cast(&I)) + QStoreAddresses.insert(SI->getPointerOperand()); + for (auto &I : *QFB) + if (StoreInst *SI = dyn_cast(&I)) + QStoreAddresses.insert(SI->getPointerOperand()); + + SmallVector CommonAddresses; + std::set_intersection(PStoreAddresses.begin(), PStoreAddresses.end(), + QStoreAddresses.begin(), QStoreAddresses.end(), + std::back_inserter(CommonAddresses)); + bool Changed = false; + for (auto *Address : CommonAddresses) + Changed |= mergeConditionalStoreToAddress( + PTB, PFB, QTB, QFB, PostBB, Address, SwapPCond, SwapQCond, AA); + return Changed; +} + /// If we have a conditional branch as a predecessor of another block, /// this function tries to simplify it. We know /// that PBI and BI are both conditional branches, and BI is in one of the /// successor blocks of PBI - PBI branches to BI. -static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { +static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, + AliasAnalysis *AA) { assert(PBI->isConditional() && BI->isConditional()); BasicBlock *BB = BI->getParent(); @@ -2385,6 +2658,11 @@ } } + // If both branches are conditional and both contain stores to the same + // address, remove the stores from the conditionals and create a conditional merged store at the end. + if (MergeCondStores && mergeConditionalStores(PBI, BI, AA)) + return true; + // If this is a conditional branch in an empty block, and if any // predecessors are a conditional branch to one of our destinations, // fold the conditions into logical ops and one cond br. @@ -2963,7 +3241,7 @@ IE = UnwindDest->getFirstNonPHI()->getIterator(); I != IE; ++I) { PHINode *DestPN = cast(I); - + int Idx = DestPN->getBasicBlockIndex(BB); // Since BB unwinds to UnwindDest, it has to be in the PHI node. assert(Idx != -1); @@ -2988,7 +3266,7 @@ // If the incoming value was a PHI node in the cleanup pad we are // removing, we need to merge that PHI node's incoming values into // DestPN. - for (unsigned SrcIdx = 0, SrcE = SrcPN->getNumIncomingValues(); + for (unsigned SrcIdx = 0, SrcE = SrcPN->getNumIncomingValues(); SrcIdx != SrcE; ++SrcIdx) { DestPN->addIncoming(SrcPN->getIncomingValue(SrcIdx), SrcPN->getIncomingBlock(SrcIdx)); @@ -3341,17 +3619,17 @@ } } - // If we can prove that the cases must cover all possible values, the - // default destination becomes dead and we can remove it. If we know some + // If we can prove that the cases must cover all possible values, the + // default destination becomes dead and we can remove it. If we know some // of the bits in the value, we can use that to more precisely compute the // number of possible unique case values. bool HasDefault = !isa(SI->getDefaultDest()->getFirstNonPHIOrDbg()); - const unsigned NumUnknownBits = Bits - + const unsigned NumUnknownBits = Bits - (KnownZero.Or(KnownOne)).countPopulation(); assert(NumUnknownBits <= Bits); if (HasDefault && DeadCases.empty() && - NumUnknownBits < 64 /* avoid overflow */ && + NumUnknownBits < 64 /* avoid overflow */ && SI->getNumCases() == (1ULL << NumUnknownBits)) { DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); BasicBlock *NewDefault = SplitBlockPredecessors(SI->getDefaultDest(), @@ -4107,7 +4385,7 @@ assert((CaseConst == TrueConst || CaseConst == FalseConst) && "Expect true or false as compare result."); } - + // Check if the branch instruction dominates the phi node. It's a simple // dominance check, but sufficient for our needs. // Although this check is invariant in the calling loops, it's better to do it @@ -4572,6 +4850,16 @@ return false; } +static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) { + BasicBlock *PredPred = nullptr; + for (auto *P : predecessors(BB)) { + BasicBlock *PPred = P->getSinglePredecessor(); + if (!PPred || (PredPred && PredPred != PPred)) + return nullptr; + PredPred = PPred; + } + return PredPred; +} bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { BasicBlock *BB = BI->getParent(); @@ -4652,9 +4940,17 @@ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) if (BranchInst *PBI = dyn_cast((*PI)->getTerminator())) if (PBI != BI && PBI->isConditional()) - if (SimplifyCondBranchToCondBranch(PBI, BI)) + if (SimplifyCondBranchToCondBranch(PBI, BI, AA)) return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + // Look for diamond patterns. + if (MergeCondStores) + if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB)) + if (BranchInst *PBI = dyn_cast(PrevBB->getTerminator())) + if (PBI != BI && PBI->isConditional()) + if (mergeConditionalStores(PBI, BI, AA)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + return false; } @@ -4800,7 +5096,9 @@ /// of the CFG. It returns true if a modification was made. /// bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, - unsigned BonusInstThreshold, AssumptionCache *AC) { + unsigned BonusInstThreshold, AssumptionCache *AC, + AliasAnalysis *AA) { return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(), - BonusInstThreshold, AC).run(BB); + BonusInstThreshold, AC, AA) + .run(BB); } Index: test/Transforms/SimplifyCFG/merge-cond-stores.ll =================================================================== --- /dev/null +++ test/Transforms/SimplifyCFG/merge-cond-stores.ll @@ -0,0 +1,279 @@ +; RUN: opt -simplifycfg -instcombine < %s -simplifycfg-merge-cond-stores=true -simplifycfg-merge-cond-stores-aggressively=false -phi-node-folding-threshold=2 -S | FileCheck %s + +; CHECK-LABEL: @test_simple +; This test should succeed and end up if-converted. +; CHECK: icmp eq i32 %b, 0 +; CHECK-NEXT: icmp ne i32 %a, 0 +; CHECK-NEXT: xor i1 %x2, true +; CHECK-NEXT: %[[x:.*]] = or i1 %{{.*}}, %{{.*}} +; CHECK-NEXT: br i1 %[[x]] +; CHECK: store +; CHECK-NOT: store +; CHECK: ret +define void @test_simple(i32* %p, i32 %a, i32 %b) { +entry: + %x1 = icmp eq i32 %a, 0 + br i1 %x1, label %fallthrough, label %yes1 + +yes1: + store i32 0, i32* %p + br label %fallthrough + +fallthrough: + %x2 = icmp eq i32 %b, 0 + br i1 %x2, label %end, label %yes2 + +yes2: + store i32 1, i32* %p + br label %end + +end: + ret void +} + +; CHECK-LABEL: @test_recursive +; This test should entirely fold away, leaving one large basic block. +; CHECK: store +; CHECK-NOT: store +; CHECK: ret +define void @test_recursive(i32* %p, i32 %a, i32 %b, i32 %c, i32 %d) { +entry: + %x1 = icmp eq i32 %a, 0 + br i1 %x1, label %fallthrough, label %yes1 + +yes1: + store i32 0, i32* %p + br label %fallthrough + +fallthrough: + %x2 = icmp eq i32 %b, 0 + br i1 %x2, label %next, label %yes2 + +yes2: + store i32 1, i32* %p + br label %next + +next: + %x3 = icmp eq i32 %c, 0 + br i1 %x3, label %fallthrough2, label %yes3 + +yes3: + store i32 2, i32* %p + br label %fallthrough2 + +fallthrough2: + %x4 = icmp eq i32 %d, 0 + br i1 %x4, label %end, label %yes4 + +yes4: + store i32 3, i32* %p + br label %end + + +end: + ret void +} + +; CHECK-LABEL: @test_not_ifconverted +; The code in each diamond is too large - it won't be if-converted so our +; heuristics should say no. +; CHECK: store +; CHECK: store +; CHECK: ret +define void @test_not_ifconverted(i32* %p, i32 %a, i32 %b) { +entry: + %x1 = icmp eq i32 %a, 0 + br i1 %x1, label %fallthrough, label %yes1 + +yes1: + %y1 = or i32 %b, 55 + %y2 = add i32 %y1, 24 + %y3 = and i32 %y2, 67 + store i32 %y3, i32* %p + br label %fallthrough + +fallthrough: + %x2 = icmp eq i32 %b, 0 + br i1 %x2, label %end, label %yes2 + +yes2: + %z1 = or i32 %a, 55 + %z2 = add i32 %z1, 24 + %z3 = and i32 %z2, 67 + store i32 %z3, i32* %p + br label %end + +end: + ret void +} + +; CHECK-LABEL: @test_aliasing1 +; The store to %p clobbers the previous store, so if-converting this would +; be illegal. +; CHECK: store +; CHECK: store +; CHECK: ret +define void @test_aliasing1(i32* %p, i32 %a, i32 %b) { +entry: + %x1 = icmp eq i32 %a, 0 + br i1 %x1, label %fallthrough, label %yes1 + +yes1: + store i32 0, i32* %p + br label %fallthrough + +fallthrough: + %y1 = load i32, i32* %p + %x2 = icmp eq i32 %y1, 0 + br i1 %x2, label %end, label %yes2 + +yes2: + store i32 1, i32* %p + br label %end + +end: + ret void +} + +; CHECK-LABEL: @test_aliasing2 +; The load from %q aliases with %p, so if-converting this would be illegal. +; CHECK: store +; CHECK: store +; CHECK: ret +define void @test_aliasing2(i32* %p, i32* %q, i32 %a, i32 %b) { +entry: + %x1 = icmp eq i32 %a, 0 + br i1 %x1, label %fallthrough, label %yes1 + +yes1: + store i32 0, i32* %p + br label %fallthrough + +fallthrough: + %y1 = load i32, i32* %q + %x2 = icmp eq i32 %y1, 0 + br i1 %x2, label %end, label %yes2 + +yes2: + store i32 1, i32* %p + br label %end + +end: + ret void +} + +declare void @f() + +; CHECK-LABEL: @test_diamond_simple +; This should get if-converted. +; CHECK: store +; CHECK-NOT: store +; CHECK: ret +define i32 @test_diamond_simple(i32* %p, i32* %q, i32 %a, i32 %b) { +entry: + %x1 = icmp eq i32 %a, 0 + br i1 %x1, label %no1, label %yes1 + +yes1: + store i32 0, i32* %p + br label %fallthrough + +no1: + %z1 = add i32 %a, %b + br label %fallthrough + +fallthrough: + %z2 = phi i32 [ %z1, %no1 ], [ 0, %yes1 ] + %x2 = icmp eq i32 %b, 0 + br i1 %x2, label %no2, label %yes2 + +yes2: + store i32 1, i32* %p + br label %end + +no2: + %z3 = sub i32 %z2, %b + br label %end + +end: + %z4 = phi i32 [ %z3, %no2 ], [ 3, %yes2 ] + ret i32 %z4 +} + +; CHECK-LABEL: @test_diamond_simple2 +; This should get if-converted. The call to f() occurs in the first branch, +; so it does not affect the if-conversion. +; CHECK: store +; CHECK-NOT: store +; CHECK: ret +define i32 @test_diamond_simple2(i32* %p, i32* %q, i32 %a, i32 %b) { +entry: + %x1 = icmp eq i32 %a, 0 + br i1 %x1, label %no1, label %yes1 + +yes1: + store i32 0, i32* %p + br label %fallthrough + +no1: + call void @f() + %z1 = add i32 %a, %b + br label %fallthrough + +fallthrough: + %z2 = phi i32 [ %z1, %no1 ], [ 0, %yes1 ] + %x2 = icmp eq i32 %b, 0 + br i1 %x2, label %no2, label %yes2 + +yes2: + store i32 1, i32* %p + br label %end + +no2: + %z3 = sub i32 %z2, %b + br label %end + +end: + %z4 = phi i32 [ %z3, %no2 ], [ 3, %yes2 ] + ret i32 %z4 +} + +; CHECK-LABEL: @test_diamond_alias3 +; Now there is a call to f() in the bottom branch. The store in the first +; branch would now be reordered with respect to the call if we if-converted, +; so we must not. +; CHECK: store +; CHECK: store +; CHECK: ret +define i32 @test_diamond_alias3(i32* %p, i32* %q, i32 %a, i32 %b) { +entry: + %x1 = icmp eq i32 %a, 0 + br i1 %x1, label %no1, label %yes1 + +yes1: + store i32 0, i32* %p + br label %fallthrough + +no1: + call void @f() + %z1 = add i32 %a, %b + br label %fallthrough + +fallthrough: + %z2 = phi i32 [ %z1, %no1 ], [ 0, %yes1 ] + %x2 = icmp eq i32 %b, 0 + br i1 %x2, label %no2, label %yes2 + +yes2: + store i32 1, i32* %p + br label %end + +no2: + call void @f() + %z3 = sub i32 %z2, %b + br label %end + +end: + %z4 = phi i32 [ %z3, %no2 ], [ 3, %yes2 ] + ret i32 %z4 +}