Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -113,6 +113,12 @@ "of sunken Casts"); STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " "computations were sunk"); +STATISTIC(NumMemoryInstsPhiCreated, + "Number of phis created when address " + "computations were sunk to memory instructions"); +STATISTIC(NumMemoryInstsSelectCreated, + "Number of select created when address " + "computations were sunk to memory instructions"); STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); STATISTIC(NumAndsAdded, @@ -194,6 +200,19 @@ cl::desc("The number of loads per basic block for inline expansion of " "memcmp that is only being compared against zero.")); +static cl::opt DisableComplexAddrModes( + "disable-complex-addr-modes", cl::Hidden, cl::init(false), + cl::desc("Disables combining addressing modes with different parts " + "in optimizeMemoryInst.")); + +static cl::opt +AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), + cl::desc("Allow creation of Phis in Address sinking.")); + +static cl::opt +AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true), + cl::desc("Allow creation of selects in Address sinking.")); + namespace { using SetOfInstrs = SmallPtrSet; @@ -3354,8 +3373,65 @@ Value *PromotedOperand) const; }; +/// \brief Keep track of simplification of Phi nodes. +/// Accept the set of all phi nodes and erase phi node from this set +/// if it is simplified. +class SimplificationTracker { + DenseMap Storage; + const SimplifyQuery &SQ; + SmallPtrSetImpl &AllPhiNodes; + SmallPtrSetImpl &AllSelectNodes; + +public: + SimplificationTracker(const SimplifyQuery &sq, + SmallPtrSetImpl &APN, + SmallPtrSetImpl &ASN) + : SQ(sq), AllPhiNodes(APN), AllSelectNodes(ASN) {} + + Value *Get(Value *V) { + do { + auto SV = Storage.find(V); + if (SV == Storage.end()) + return V; + V = SV->second; + } while (true); + } + + Value *Simplify(Value *Val) { + SmallVector WorkList; + SmallPtrSet Visited; + WorkList.push_back(Val); + while (!WorkList.empty()) { + auto P = WorkList.pop_back_val(); + if (!Visited.insert(P).second) + continue; + if (auto *PI = dyn_cast(P)) + if (Value *V = SimplifyInstruction(cast(PI), SQ)) { + for (auto *U : PI->users()) + WorkList.push_back(cast(U)); + Put(PI, V); + PI->replaceAllUsesWith(V); + if (auto *PHI = dyn_cast(PI)) + AllPhiNodes.erase(PHI); + if (auto *Select = dyn_cast(PI)) + AllSelectNodes.erase(Select); + PI->eraseFromParent(); + } + } + return Get(Val); + } + + void Put(Value *From, Value *To) { + Storage.insert({ From, To }); + } +}; + /// \brief A helper class for combining addressing modes. class AddressingModeCombiner { + typedef std::pair ValueInBB; + typedef DenseMap FoldAddrToValueMapping; + typedef std::pair PHIPair; + private: /// The addressing modes we've collected. SmallVector AddrModes; @@ -3366,7 +3442,19 @@ /// Are the AddrModes that we have all just equal to their original values? bool AllAddrModesTrivial = true; + /// Common Type for all different fields in addressing modes. + Type *CommonType; + + /// SimplifyQuery for simplifyInstruction utility. + const SimplifyQuery &SQ; + + /// Original Address. + ValueInBB Original; + public: + AddressingModeCombiner(const SimplifyQuery &_SQ, ValueInBB OriginalValue) + : CommonType(nullptr), SQ(_SQ), Original(OriginalValue) {} + /// \brief Get the combined AddrMode const ExtAddrMode &getAddrMode() const { return AddrModes[0]; @@ -3434,12 +3522,356 @@ if (AllAddrModesTrivial) return false; - // TODO: Combine multiple AddrModes by inserting a select or phi for the - // field in which the AddrModes differ. - return false; + if (DisableComplexAddrModes) + return false; + + // For now we support only different base registers. + // TODO: enable others. + if (DifferentField != ExtAddrMode::BaseRegField) + return false; + + // Build a map between to + // value of base register. + FoldAddrToValueMapping Map; + initializeMap(Map); + + Value *CommonValue = findCommon(Map); + if (CommonValue) + AddrModes[0].BaseReg = CommonValue; + return CommonValue != nullptr; } -}; +private: + /// \brief Initialize Map with anchor values. For address seen in some BB + /// we set the value of different field saw in this address. + /// If address is not an instruction than basic block is set to null. + /// At the same time we find a common type for different field we will + /// use to create new Phi/Select nodes. Keep it in CommonType field. + void initializeMap(FoldAddrToValueMapping &Map) { + // Keep track of keys where the value is null. We will need to replace it + // with constant null when we know the common type. + SmallVector NullValue; + for (auto &AM : AddrModes) { + BasicBlock *BB = nullptr; + if (Instruction *I = dyn_cast(AM.OriginalValue)) + BB = I->getParent(); + + // For now we support only base register as different field. + // TODO: Enable others. + Value *DV = AM.BaseReg; + if (DV) { + if (CommonType) + assert(CommonType == DV->getType() && "Different types detected!"); + else + CommonType = DV->getType(); + Map[{ AM.OriginalValue, BB }] = DV; + } else { + NullValue.push_back({ AM.OriginalValue, BB }); + } + } + assert(CommonType && "At least one non-null value must be!"); + for (auto VIBB : NullValue) + Map[VIBB] = Constant::getNullValue(CommonType); + } + + /// \brief We have mapping between value A and basic block where value A + /// seen to other value B where B was a field in addressing mode represented + /// by A. Also we have an original value C representin an address in some + /// basic block. Traversing from C through phi and selects we ended up with + /// A's in a map. This utility function tries to find a value V which is a + /// field in addressing mode C and traversing through phi nodes and selects + /// we will end up in corresponded values B in a map. + /// The utility will create a new Phi/Selects if needed. + // The simple example looks as follows: + // BB1: + // p1 = b1 + 40 + // br cond BB2, BB3 + // BB2: + // p2 = b2 + 40 + // br BB3 + // BB3: + // p = phi [p1, BB1], [p2, BB2] + // v = load p + // Map is + // -> b1 + // -> b2 + // Request is + // -> ? + // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3 + Value *findCommon(FoldAddrToValueMapping &Map) { + // Tracks of new created Phi nodes. + SmallPtrSet NewPhiNodes; + // Tracks of new created Select nodes. + SmallPtrSet NewSelectNodes; + // Tracks the simplification of new created phi nodes. The reason we use + // this mapping is because we will add new created Phi nodes in AddrToBase. + // Simplification of Phi nodes is recursive, so some Phi node may + // be simplified after we added it to AddrToBase. + // Using this mapping we can find the current value in AddrToBase. + SimplificationTracker ST(SQ, NewPhiNodes, NewSelectNodes); + + // First step, DFS to create PHI nodes for all intermediate blocks. + // Also fill traverse order for the second step. + SmallVector TraverseOrder; + InsertPlaceholders(Map, TraverseOrder, NewPhiNodes, NewSelectNodes); + + // Second Step, fill new nodes by merged values and simplify if possible. + FillPlaceholders(Map, TraverseOrder, ST); + + if (!AddrSinkNewSelects && NewSelectNodes.size() > 0) { + DestroyNodes(NewPhiNodes); + DestroyNodes(NewSelectNodes); + return nullptr; + } + + // Now we'd like to match New Phi nodes to existed ones. + unsigned PhiNotMatchedCount = 0; + if (!MatchPhiSet(NewPhiNodes, ST, AddrSinkNewPhis, PhiNotMatchedCount)) { + DestroyNodes(NewPhiNodes); + DestroyNodes(NewSelectNodes); + return nullptr; + } + + auto *Result = ST.Get(Map.find(Original)->second); + if (Result) { + NumMemoryInstsPhiCreated += NewPhiNodes.size() + PhiNotMatchedCount; + NumMemoryInstsSelectCreated += NewSelectNodes.size(); + } + return Result; + } + + /// \brief Destroy nodes from a set. + template void DestroyNodes(SmallPtrSetImpl &Instructions) { + // For safe erasing, replace the Phi with dummy value first. + auto Dummy = UndefValue::get(CommonType); + for (auto I : Instructions) { + I->replaceAllUsesWith(Dummy); + I->eraseFromParent(); + } + } + + /// \brief Try to match PHI node to Candidate. + /// Matcher tracks the matched Phi nodes. + bool MatchPhiNode(PHINode *PHI, PHINode *Candidate, + DenseSet &Matcher, + SmallPtrSetImpl &PhiNodesToMatch) { + SmallVector WorkList; + Matcher.insert({ PHI, Candidate }); + WorkList.push_back({ PHI, Candidate }); + SmallSet Visited; + while (!WorkList.empty()) { + auto Item = WorkList.pop_back_val(); + if (!Visited.insert(Item).second) + continue; + // We iterate over all incoming values to Phi to compare them. + // If values are different and both of them Phi and the first one is a + // Phi we added (subject to match) and both of them is in the same basic + // block then we can match our pair if values match. So we state that + // these values match and add it to work list to verify that. + for (auto B : Item.first->blocks()) { + Value *FirstValue = Item.first->getIncomingValueForBlock(B); + Value *SecondValue = Item.second->getIncomingValueForBlock(B); + if (FirstValue == SecondValue) + continue; + + PHINode *FirstPhi = dyn_cast(FirstValue); + PHINode *SecondPhi = dyn_cast(SecondValue); + + // One of them is not Phi or + // The first one is not Phi node from the set we'd like to match or + // Phi nodes from different basic blocks then + // we will not be able to match. + if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) || + FirstPhi->getParent() != SecondPhi->getParent()) + return false; + + // If we already matched them then continue. + if (Matcher.count({ FirstPhi, SecondPhi })) + continue; + // So the values are different and does not match. So we need them to + // match. + Matcher.insert({ FirstPhi, SecondPhi }); + // But me must check it. + WorkList.push_back({ FirstPhi, SecondPhi }); + } + } + return true; + } + + /// \brief For the given set of PHI nodes try to find their equivalents. + /// Returns false if this matching fails and creation of new Phi is disabled. + bool MatchPhiSet(SmallPtrSetImpl &PhiNodesToMatch, + SimplificationTracker &ST, bool AllowNewPhiNodes, + unsigned &PhiNotMatchedCount) { + DenseSet Matched; + SmallPtrSet WillNotMatch; + while (PhiNodesToMatch.size()) { + PHINode *PHI = *PhiNodesToMatch.begin(); + + // Add us, if no Phi nodes in the basic block we do not match. + WillNotMatch.clear(); + WillNotMatch.insert(PHI); + + // Traverse all Phis until we found equivalent or fail to do that. + bool IsMatched = false; + for (auto &P : PHI->getParent()->phis()) { + if (&P == PHI) + continue; + if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch))) + break; + // If it does not match, collect all Phi nodes from matcher. + // if we end up with no match, them all these Phi nodes will not match + // later. + for (auto M : Matched) + WillNotMatch.insert(M.first); + Matched.clear(); + } + if (IsMatched) { + // Replace all matched values and erase them. + for (auto MV : Matched) { + MV.first->replaceAllUsesWith(MV.second); + PhiNodesToMatch.erase(MV.first); + ST.Put(MV.first, MV.second); + MV.first->eraseFromParent(); + } + Matched.clear(); + continue; + } + // If we are not allowed to create new nodes then bail out. + if (!AllowNewPhiNodes) + return false; + // Just remove all seen values in matcher. They will not match anything. + PhiNotMatchedCount += WillNotMatch.size(); + for (auto *P : WillNotMatch) + PhiNodesToMatch.erase(P); + } + return true; + } + /// \brief Fill the placeholder with values from predecessors and simplify it. + void FillPlaceholders(FoldAddrToValueMapping &Map, + SmallVectorImpl &TraverseOrder, + SimplificationTracker &ST) { + while (!TraverseOrder.empty()) { + auto Current = TraverseOrder.pop_back_val(); + assert(Map.find(Current) != Map.end() && "No node to fill!!!"); + Value *CurrentValue = Current.first; + BasicBlock *CurrentBlock = Current.second; + Value *V = Map[Current]; + + if (SelectInst *Select = dyn_cast(V)) { + // CurrentValue also must be Select. + auto *CurrentSelect = cast(CurrentValue); + auto *TrueValue = CurrentSelect->getTrueValue(); + ValueInBB TrueItem = { TrueValue, isa(TrueValue) + ? CurrentBlock + : nullptr }; + assert(Map.find(TrueItem) != Map.end() && "No True Value!"); + Select->setTrueValue(Map[TrueItem]); + auto *FalseValue = CurrentSelect->getFalseValue(); + ValueInBB FalseItem = { FalseValue, isa(FalseValue) + ? CurrentBlock + : nullptr }; + assert(Map.find(FalseItem) != Map.end() && "No False Value!"); + Select->setFalseValue(Map[FalseItem]); + } else { + // Must be a Phi node then. + PHINode *PHI = cast(V); + // Fill the Phi node with values from predecessors. + bool IsDefinedInThisBB = + cast(CurrentValue)->getParent() == CurrentBlock; + auto *CurrentPhi = dyn_cast(CurrentValue); + for (auto B : predecessors(CurrentBlock)) { + Value *PV = IsDefinedInThisBB + ? CurrentPhi->getIncomingValueForBlock(B) + : CurrentValue; + ValueInBB item = { PV, isa(PV) ? B : nullptr }; + assert(Map.find(item) != Map.end() && "No predecessor Value!"); + PHI->addIncoming(ST.Get(Map[item]), B); + } + } + // Simplify if possible. + Map[Current] = ST.Simplify(V); + } + } + + /// Starting from value recursively iterates over predecessors up to known + /// ending values represented in a map. For each traversed block inserts + /// a placeholder Phi or Select. + /// Reports all new created Phi/Select nodes by adding them to set. + /// Also reports and order in what basic blocks have been traversed. + void InsertPlaceholders(FoldAddrToValueMapping &Map, + SmallVectorImpl &TraverseOrder, + SmallPtrSetImpl &NewPhiNodes, + SmallPtrSetImpl &NewSelectNodes) { + SmallVector Worklist; + assert((isa(Original.first) || isa(Original.first)) && + "Address must be a Phi or Select node"); + auto *Dummy = UndefValue::get(CommonType); + Worklist.push_back(Original); + while (!Worklist.empty()) { + auto Current = Worklist.pop_back_val(); + // If value is not an instruction it is something global, constant, + // parameter and we can say that this value is observable in any block. + // Set block to null to denote it. + // Also please take into account that it is how we build anchors. + if (!isa(Current.first)) + Current.second = nullptr; + // if it is already visited or it is an ending value then skip it. + if (Map.find(Current) != Map.end()) + continue; + TraverseOrder.push_back(Current); + + Value *CurrentValue = Current.first; + BasicBlock *CurrentBlock = Current.second; + // CurrentValue must be a Phi node or select. All others must be covered + // by anchors. + Instruction *CurrentI = cast(CurrentValue); + bool IsDefinedInThisBB = CurrentI->getParent() == CurrentBlock; + + unsigned PredCount = + std::distance(pred_begin(CurrentBlock), pred_end(CurrentBlock)); + // if Current Value is not defined in this basic block we are interested + // in values in predecessors. + if (!IsDefinedInThisBB) { + assert(PredCount && "Unreachable block?!"); + PHINode *PHI = PHINode::Create(CommonType, PredCount, "sunk_phi", + &CurrentBlock->front()); + Map[Current] = PHI; + NewPhiNodes.insert(PHI); + // Add all predecessors in work list. + for (auto B : predecessors(CurrentBlock)) + Worklist.push_back({ CurrentValue, B }); + continue; + } + // Value is defined in this basic block. + if (SelectInst *OrigSelect = dyn_cast(CurrentI)) { + // Is it OK to get metadata from OrigSelect?! + // Create a Select placeholder with dummy value. + SelectInst *Select = + SelectInst::Create(OrigSelect->getCondition(), Dummy, Dummy, + OrigSelect->getName(), OrigSelect, OrigSelect); + Map[Current] = Select; + NewSelectNodes.insert(Select); + // We are interested in True and False value in this basic block. + Worklist.push_back({ OrigSelect->getTrueValue(), CurrentBlock }); + Worklist.push_back({ OrigSelect->getFalseValue(), CurrentBlock }); + } else { + // It must be a Phi node then. + auto *CurrentPhi = cast(CurrentI); + // Create new Phi node for merge of bases. + assert(PredCount && "Unreachable block?!"); + PHINode *PHI = PHINode::Create(CommonType, PredCount, "sunk_phi", + &CurrentBlock->front()); + Map[Current] = PHI; + NewPhiNodes.insert(PHI); + + // Add all predecessors in work list. + for (auto B : predecessors(CurrentBlock)) + Worklist.push_back({ CurrentPhi->getIncomingValueForBlock(B), B }); + } + } + } +}; } // end anonymous namespace /// Try adding ScaleReg*Scale to the current addressing mode. @@ -4532,7 +4964,8 @@ // the graph are compatible. bool PhiOrSelectSeen = false; SmallVector AddrModeInsts; - AddressingModeCombiner AddrModes; + AddressingModeCombiner AddrModes({ *DL, TLInfo }, + { Addr, MemoryInst->getParent() }); TypePromotionTransaction TPT(RemovedInsts); TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); Index: test/Transforms/CodeGenPrepare/ARM/sink-addrmode.ll =================================================================== --- /dev/null +++ test/Transforms/CodeGenPrepare/ARM/sink-addrmode.ll @@ -0,0 +1,18 @@ +; RUN: opt -S -codegenprepare -mtriple=thumbv7m -disable-complex-addr-modes=false < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + +; Select between two geps with different base, same constant offset +define void @test_select_twogep_base(i32* %ptr1, i32* %ptr2, i32 %value) { +; CHECK-LABEL: @test_select_twogep_base +; CHECK-NOT: select i1 %cmp, i32* %gep1, i32* %gep2 +; CHECK: select i1 %cmp, i32* %ptr1, i32* %ptr2 +entry: + %cmp = icmp sgt i32 %value, 0 + %gep1 = getelementptr inbounds i32, i32* %ptr1, i32 1 + %gep2 = getelementptr inbounds i32, i32* %ptr2, i32 1 + %select = select i1 %cmp, i32* %gep1, i32* %gep2 + store i32 %value, i32* %select, align 4 + ret void +} + Index: test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll =================================================================== --- /dev/null +++ test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll @@ -0,0 +1,389 @@ +; RUN: opt -S -codegenprepare -disable-complex-addr-modes=false -addr-sink-new-phis=true %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-YES +; RUN: opt -S -codegenprepare -disable-complex-addr-modes=false -addr-sink-new-phis=false %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NO +target datalayout = +"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +; Can we sink for different base if there is no phi for base? +define i32 @test1(i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test1 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br label %fallthrough + +fallthrough: +; CHECK-YES: sunk_phi +; CHECK-NO-LABEL: fallthrough: +; CHECK-NO: phi +; CHECK-NO: load + %c = phi i32* [%c1, %entry], [%c2, %if.then] + %v = load i32, i32* %c, align 4 + ret i32 %v +} + +; Can we sink for different base if there is phi for base? +define i32 @test2(i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test2 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br label %fallthrough + +fallthrough: +; CHECK: getelementptr i8, {{.+}} 40 + %b = phi i64* [%b1, %entry], [%b2, %if.then] + %c = phi i32* [%c1, %entry], [%c2, %if.then] + %v = load i32, i32* %c, align 4 + ret i32 %v +} + +; Can we sink for different base if there is phi for base but not valid one? +define i32 @test3(i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test3 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br label %fallthrough + +fallthrough: +; CHECK-YES: sunk_phi +; CHECK-NO-LABEL: fallthrough: +; CHECK-NO: phi +; CHECK-NO: load + %b = phi i64* [%b2, %entry], [%b1, %if.then] + %c = phi i32* [%c1, %entry], [%c2, %if.then] + %v = load i32, i32* %c, align 4 + ret i32 %v +} + +; Can we sink for different base if both addresses are in the same block? +define i32 @test4(i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test4 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: + br label %fallthrough + +fallthrough: +; CHECK-YES: sunk_phi +; CHECK-NO-LABEL: fallthrough: +; CHECK-NO: phi +; CHECK-NO: load + %c = phi i32* [%c1, %entry], [%c2, %if.then] + %v = load i32, i32* %c, align 4 + ret i32 %v +} + +; Can we sink for different base if there is phi for base? +; Both addresses are in the same block. +define i32 @test5(i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test5 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: + br label %fallthrough + +fallthrough: +; CHECK: getelementptr i8, {{.+}} 40 + %b = phi i64* [%b1, %entry], [%b2, %if.then] + %c = phi i32* [%c1, %entry], [%c2, %if.then] + %v = load i32, i32* %c, align 4 + ret i32 %v +} + +; Can we sink for different base if there is phi for base but not valid one? +; Both addresses are in the same block. +define i32 @test6(i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test6 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: + br label %fallthrough + +fallthrough: +; CHECK-YES: sunk_phi +; CHECK-NO-LABEL: fallthrough: +; CHECK-NO: phi +; CHECK-NO: load + %b = phi i64* [%b2, %entry], [%b1, %if.then] + %c = phi i32* [%c1, %entry], [%c2, %if.then] + %v = load i32, i32* %c, align 4 + ret i32 %v +} + +; case with a loop. No phi node. +define i32 @test7(i32 %N, i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test7 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK-YES: sunk_phi + %iv = phi i32 [0, %entry], [%iv.inc, %fallthrough] + %c3 = phi i32* [%c1, %entry], [%c, %fallthrough] + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br label %fallthrough + +fallthrough: +; CHECK-YES: sunk_phi +; CHECK-NO-LABEL: fallthrough: +; CHECK-NO: phi +; CHECK-NO: load + %c = phi i32* [%c3, %loop], [%c2, %if.then] + %v = load volatile i32, i32* %c, align 4 + %iv.inc = add i32 %iv, 1 + %cmp = icmp slt i32 %iv.inc, %N + br i1 %cmp, label %loop, label %exit + +exit: + ret i32 %v +} + +; case with a loop. There is phi node. +define i32 @test8(i32 %N, i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test8 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.inc, %fallthrough] + %c3 = phi i32* [%c1, %entry], [%c, %fallthrough] + %b3 = phi i64* [%b1, %entry], [%b, %fallthrough] + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br label %fallthrough + +fallthrough: +; CHECK: getelementptr i8, {{.+}} 40 + %c = phi i32* [%c3, %loop], [%c2, %if.then] + %b = phi i64* [%b3, %loop], [%b2, %if.then] + %v = load volatile i32, i32* %c, align 4 + %iv.inc = add i32 %iv, 1 + %cmp = icmp slt i32 %iv.inc, %N + br i1 %cmp, label %loop, label %exit + +exit: + ret i32 %v +} + +; case with a loop. There is phi node but it does not fit. +define i32 @test9(i32 %N, i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test9 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK-YES: sunk_phi + %iv = phi i32 [0, %entry], [%iv.inc, %fallthrough] + %c3 = phi i32* [%c1, %entry], [%c, %fallthrough] + %b3 = phi i64* [%b1, %entry], [%b2, %fallthrough] + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br label %fallthrough + +fallthrough: +; CHECK-YES: sunk_phi +; CHECK-NO-LABEL: fallthrough: +; CHECK-NO: phi +; CHECK-NO: load + %c = phi i32* [%c3, %loop], [%c2, %if.then] + %b = phi i64* [%b3, %loop], [%b2, %if.then] + %v = load volatile i32, i32* %c, align 4 + %iv.inc = add i32 %iv, 1 + %cmp = icmp slt i32 %iv.inc, %N + br i1 %cmp, label %loop, label %exit + +exit: + ret i32 %v +} + +; Case through a loop. No phi node. +define i32 @test10(i32 %N, i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test10 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br label %fallthrough + +fallthrough: +; CHECK-YES: sunk_phi +; CHECK-NO-LABEL: fallthrough: +; CHECK-NO: phi +; CHECK-NO: br + %c = phi i32* [%c1, %entry], [%c2, %if.then] + br label %loop + +loop: + %iv = phi i32 [0, %fallthrough], [%iv.inc, %loop] + %iv.inc = add i32 %iv, 1 + %cmp = icmp slt i32 %iv.inc, %N + br i1 %cmp, label %loop, label %exit + +exit: +; CHECK-YES: sunkaddr + %v = load volatile i32, i32* %c, align 4 + ret i32 %v +} + +; Case through a loop. There is a phi. +define i32 @test11(i32 %N, i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test11 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br label %fallthrough + +fallthrough: +; CHECK: phi +; CHECK: phi +; CHECK: br + %c = phi i32* [%c1, %entry], [%c2, %if.then] + %b = phi i64* [%b1, %entry], [%b2, %if.then] + br label %loop + +loop: + %iv = phi i32 [0, %fallthrough], [%iv.inc, %loop] + %iv.inc = add i32 %iv, 1 + %cmp = icmp slt i32 %iv.inc, %N + br i1 %cmp, label %loop, label %exit + +exit: +; CHECK: sunkaddr + %v = load volatile i32, i32* %c, align 4 + ret i32 %v +} + +; Complex case with address value from previous iteration. +define i32 @test12(i32 %N, i1 %cond, i64* %b1, i64* %b2, i64* %b3) { +; CHECK-LABEL: @test12 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK-YES: sunk_phi +; CHECK-NO: phi +; CHECK-NO: phi +; CHECK-NO: phi +; CHECK-NO: br + %iv = phi i32 [0, %entry], [%iv.inc, %backedge] + %c3 = phi i32* [%c1, %entry], [%c, %backedge] + %b4 = phi i64* [%b1, %entry], [%b5, %backedge] + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br label %fallthrough + +fallthrough: +; CHECK-LABEL: fallthrough: +; CHECK-YES: sunk_phi +; CHECK-NO: phi +; CHECK-NO: phi +; CHECK-NO: load + %c = phi i32* [%c3, %loop], [%c2, %if.then] + %b6 = phi i64* [%b4, %loop], [%b2, %if.then] + %v = load volatile i32, i32* %c, align 4 + %a4 = getelementptr inbounds i64, i64* %b4, i64 5 + %c4 = bitcast i64* %a4 to i32* + %cmp = icmp slt i32 %iv, 20 + br i1 %cmp, label %backedge, label %if.then.2 + +if.then.2: + br label %backedge + +backedge: + %b5 = phi i64* [%b4, %fallthrough], [%b6, %if.then.2] + %iv.inc = add i32 %iv, 1 + %cmp2 = icmp slt i32 %iv.inc, %N + br i1 %cmp2, label %loop, label %exit + +exit: + ret i32 %v +} + + +%struct.S = type {i32, i32} +; Case with index +define i32 @test13(i1 %cond, %struct.S* %b1, %struct.S* %b2, i64 %Index) { +; CHECK-LABEL: @test13 +entry: + %a1 = getelementptr inbounds %struct.S, %struct.S* %b1, i64 %Index, i32 1 + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %i2 = mul i64 %Index, 2 + %a2 = getelementptr inbounds %struct.S, %struct.S* %b2, i64 %Index, i32 1 + br label %fallthrough + +fallthrough: +; CHECK-YES: sunk_phi +; CHECK-NO-LABEL: fallthrough: +; CHECK-NO: phi +; CHECK-NO: load + %a = phi i32* [%a1, %entry], [%a2, %if.then] + %v = load i32, i32* %a, align 4 + ret i32 %v +}