Index: include/llvm/Transforms/Scalar/GVNExpression.h =================================================================== --- include/llvm/Transforms/Scalar/GVNExpression.h +++ include/llvm/Transforms/Scalar/GVNExpression.h @@ -478,10 +478,13 @@ class PHIExpression final : public BasicExpression { private: BasicBlock *BB; + // True if all the operands of this phi expression actually exist in the + // program, as opposed to having been temporarily created by GVN. + bool AllRealOps; public: PHIExpression(unsigned NumOperands, BasicBlock *B) - : BasicExpression(NumOperands, ET_Phi), BB(B) {} + : BasicExpression(NumOperands, ET_Phi), BB(B), AllRealOps(false) {} PHIExpression() = delete; PHIExpression(const PHIExpression &) = delete; PHIExpression &operator=(const PHIExpression &) = delete; @@ -491,6 +494,9 @@ return EB->getExpressionType() == ET_Phi; } + bool isAllRealOps() const { return AllRealOps; } + void setAllRealOps(bool B) { AllRealOps = B; } + bool equals(const Expression &Other) const override { if (!this->BasicExpression::equals(Other)) return false; Index: lib/Transforms/Scalar/NewGVN.cpp =================================================================== --- lib/Transforms/Scalar/NewGVN.cpp +++ lib/Transforms/Scalar/NewGVN.cpp @@ -34,6 +34,15 @@ /// performing symbolic evaluation, forward propagation, and simplification of /// operations based on the value numbers deduced so far. /// +/// In order to make the GVN complete, we use a technique derived from +/// "Detection of Redundant Expressions: A Complete and Polynomial-time +/// Algorithm in SSA" by R.R. Pai. The source of incompleteness in most SSA +/// based GVN algorithms is related to their inability to detect equivalence +/// between phi of ops (IE phi(a+b, c+d)) and op of phis (phi(a,c) + phi(b, d)). +/// We resolve this issue by generating the equivalent "phi of ops" form for +/// each +/// op of phis we see, in a way that only takes polynomial time to resolve. +/// /// We also do not perform elimination by using any published algorithm. All /// published algorithms are O(Instructions). Instead, we use a technique that /// is O(number of operations with the same value number), enabling us to skip @@ -138,7 +147,8 @@ // It also wants to hand us SCC's that are unrelated to the phi node we ask // about, and have us process them there or risk redoing work. // Graph traits over a filter iterator also doesn't work that well here. -// This SCC finder is specialized to walk use-def chains, and only follows instructions, +// This SCC finder is specialized to walk use-def chains, and only follows +// instructions, // not generic values (arguments, etc). struct TarjanSCC { @@ -170,8 +180,10 @@ Root[I] = std::min(Root.lookup(I), Root.lookup(Op)); } } - // See if we really were the root of a component, by seeing if we still have our DFSNumber. - // If we do, we are the root of the component, and we have completed a component. If we do not, + // See if we really were the root of a component, by seeing if we still have + // our DFSNumber. + // If we do, we are the root of the component, and we have completed a + // component. If we do not, // we are not the root of a component, and belong on the component stack. if (Root.lookup(I) == OurDFS) { unsigned ComponentID = Components.size(); @@ -422,6 +434,28 @@ // Value Mappings. DenseMap ValueToClass; DenseMap ValueToExpression; + // Value PHI handling, used to make equivalence between phi(op, op) and + // op(phi, phi). + // These mappings just store various data that would normally be part of the + // IR. + + // Map a temporary instruction we created to a parent block. + DenseMap TempToBlock; + // Map between the temporary phis we created and the real instructions they + // are known equivalent to. + DenseMap RealToTemp; + DenseMap TempToReal; + // Map from basic block to the temporary operations we created + DenseMap> PHIOfOpsOps; + DenseMap> PHIOfOpsPHIs; + // Map from temporary operation to memoryaccess. + DenseMap TempToMemory; + // Map from memoryaccess to set of temporary instructions with that + // memoryaccess, since they won't appear as Users. + DenseMap> + MemoryToTempUsers; + // Set of all temporary instructions we created. + DenseSet AllTempInstructions; // Mapping from predicate info we used to the instructions we used it with. // In order to correctly ensure propagation, we must keep track of what @@ -510,7 +544,7 @@ const Expression *createExpression(Instruction *); const Expression *createBinaryExpression(unsigned, Type *, Value *, Value *); PHIExpression *createPHIExpression(Instruction *, bool &HasBackedge, - bool &AllConstant); + bool &OriginalOpsConstant); const VariableExpression *createVariableExpression(Value *); const ConstantExpression *createConstantExpression(Constant *); const Expression *createVariableOrConstant(Value *V); @@ -549,6 +583,10 @@ return CClass; } void initializeCongruenceClasses(Function &F); + bool makePossiblePhiOfOps(Instruction *, bool); + void addPhiOfOpsOrOp(Instruction *Op, SmallVectorImpl &Vect, + BasicBlock *BB, MemoryUseOrDef *MemAccess, + Value *ExistingValue = nullptr); // Value number an Instruction or MemoryPhi. void valueNumberMemoryPhi(MemoryPhi *); @@ -625,12 +663,15 @@ // Utilities. void cleanupTables(); std::pair assignDFSNumbers(BasicBlock *, unsigned); - void updateProcessedCount(Value *V); + void updateProcessedCount(const Value *V); void verifyMemoryCongruency() const; void verifyIterationSettled(Function &F); bool singleReachablePHIPath(const MemoryAccess *, const MemoryAccess *) const; BasicBlock *getBlockForValue(Value *V) const; void deleteExpression(const Expression *E); + MemoryUseOrDef *getMemoryAccess(const Instruction *) const; + MemoryPhi *getMemoryAccess(const BasicBlock *) const; + template T *getMinDFSOfRange(const Range &) const; unsigned InstrToDFSNum(const Value *V) const { assert(isa(V) && "This should not be used for MemoryAccesses"); return InstrDFS.lookup(V); @@ -651,7 +692,7 @@ : InstrDFS.lookup(MA); } bool isCycleFree(const PHINode *PN); - template T *getMinDFSOfRange(const Range &) const; + bool isBackedge(BasicBlock *From, BasicBlock *To) const; // Debug counter info. When verifying, we have to reset the value numbering // debug counter to the same state it started in to get the same results. std::pair StartingVNCounter; @@ -679,20 +720,48 @@ return true; } +// Determine if the edge From->To is a backedge +bool NewGVN::isBackedge(BasicBlock *From, BasicBlock *To) const { + if (From == To) + return true; + auto *FromDTN = DT->getNode(From); + auto *ToDTN = DT->getNode(To); + return RPOOrdering.lookup(FromDTN) >= RPOOrdering.lookup(ToDTN); +} + #ifndef NDEBUG static std::string getBlockName(const BasicBlock *B) { return DOTGraphTraits::getSimpleNodeLabel(B, nullptr); } #endif +// Get a MemoryAccess for an instruction, fake or real. +MemoryUseOrDef *NewGVN::getMemoryAccess(const Instruction *I) const { + auto *Result = MSSA->getMemoryAccess(I); + if (Result) + return Result; + return TempToMemory.lookup(I); +} + +// Get a MemoryPhi for a basic block. These are all real. +MemoryPhi *NewGVN::getMemoryAccess(const BasicBlock *BB) const { + return MSSA->getMemoryAccess(BB); +} + // Get the basic block from an instruction/memory value. BasicBlock *NewGVN::getBlockForValue(Value *V) const { - if (auto *I = dyn_cast(V)) - return I->getParent(); - else if (auto *MP = dyn_cast(V)) - return MP->getBlock(); - llvm_unreachable("Should have been able to figure out a block for our value"); - return nullptr; + if (auto *I = dyn_cast(V)) { + auto *Parent = I->getParent(); + if (Parent) + return Parent; + Parent = TempToBlock.lookup(V); + assert(Parent && "Every fake instruction should have a block"); + return Parent; + } + + auto *MP = dyn_cast(V); + assert(MP && "Should have been an instruction or a MemoryPhi"); + return MP->getBlock(); } // Delete a definitely dead expression, so it can be reused by the expression @@ -704,10 +773,9 @@ const_cast(BE)->deallocateOperands(ArgRecycler); ExpressionAllocator.Deallocate(E); } - PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge, - bool &AllConstant) { - BasicBlock *PHIBlock = I->getParent(); + bool &OriginalOpsConstant) { + BasicBlock *PHIBlock = getBlockForValue(I); auto *PN = cast(I); auto *E = new (ExpressionAllocator) PHIExpression(PN->getNumOperands(), PHIBlock); @@ -716,26 +784,34 @@ E->setType(I->getType()); E->setOpcode(I->getOpcode()); - unsigned PHIRPO = RPOOrdering.lookup(DT->getNode(PHIBlock)); - // Filter out unreachable phi operands. auto Filtered = make_filter_range(PN->operands(), [&](const Use &U) { return ReachableEdges.count({PN->getIncomingBlock(U), PHIBlock}); }); + bool AllRealOps = true; + std::transform( + Filtered.begin(), Filtered.end(), op_inserter(E), + [&](const Use &U) -> Value * { + auto *BB = PN->getIncomingBlock(U); + HasBackedge = HasBackedge || isBackedge(BB, PHIBlock); + OriginalOpsConstant = + OriginalOpsConstant && (isa(U) || isa(U)); + + // Don't try to transform self-defined phis. + if (U == PN) { + AllRealOps = false; + return PN; + } + + Value *LOL = lookupOperandLeader(U); + AllRealOps = AllRealOps && + (isa(LOL) || + (isa(LOL) && + !AllTempInstructions.count(cast(LOL)))); + return LOL; - std::transform(Filtered.begin(), Filtered.end(), op_inserter(E), - [&](const Use &U) -> Value * { - auto *BB = PN->getIncomingBlock(U); - auto *DTN = DT->getNode(BB); - if (RPOOrdering.lookup(DTN) >= PHIRPO) - HasBackedge = true; - AllConstant &= isa(U) || isa(U); - - // Don't try to transform self-defined phis. - if (U == PN) - return PN; - return lookupOperandLeader(U); - }); + }); + E->setAllRealOps(AllRealOps); return E; } @@ -754,7 +830,7 @@ // whether all members are constant. std::transform(I->op_begin(), I->op_end(), op_inserter(E), [&](Value *O) { auto Operand = lookupOperandLeader(O); - AllConstant &= isa(Operand); + AllConstant = AllConstant && isa(Operand); return Operand; }); @@ -1069,7 +1145,7 @@ // Unlike loads, we never try to eliminate stores, so we do not check if they // are simple and avoid value numbering them. auto *SI = cast(I); - auto *StoreAccess = MSSA->getMemoryAccess(SI); + auto *StoreAccess = getMemoryAccess(SI); // Get the expression, if any, for the RHS of the MemoryDef. const MemoryAccess *StoreRHS = StoreAccess->getDefiningAccess(); if (EnableStoreRefinement) @@ -1106,7 +1182,7 @@ dyn_cast(lookupOperandLeader(SI->getValueOperand()))) { if ((lookupOperandLeader(LI->getPointerOperand()) == lookupOperandLeader(SI->getPointerOperand())) && - (lookupMemoryLeader(MSSA->getMemoryAccess(LI)->getDefiningAccess()) == + (lookupMemoryLeader(getMemoryAccess(LI)->getDefiningAccess()) == StoreRHS)) return createVariableExpression(LI); } @@ -1210,8 +1286,9 @@ // Load of undef is undef. if (isa(LoadAddressLeader)) return createConstantExpression(UndefValue::get(LI->getType())); - - MemoryAccess *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(I); + MemoryAccess *OriginalAccess = getMemoryAccess(I); + MemoryAccess *DefiningAccess = + MSSAWalker->getClobberingMemoryAccess(OriginalAccess); if (!MSSA->isLiveOnEntryDef(DefiningAccess)) { if (auto *MD = dyn_cast(DefiningAccess)) { @@ -1662,6 +1739,14 @@ return createExpression(I); } +// Return true if V is a value that will always be available (IE can +// be placed anywhere) in the function. We don't do globals here +// because they are often worse to put in place. +// TODO: Separate cost from availability +static bool alwaysAvailable(Value *V) { + return isa(V) || isa(V); +} + // Substitute and symbolize the value before value numbering. const Expression *NewGVN::performSymbolicEvaluation(Value *V) { const Expression *E = nullptr; @@ -1738,6 +1823,21 @@ return nullptr; } } + // For the moment, we only make op of phis to be equivalent to phi of ops if + // the op of phis has generated something better. Once we have finished + // completeness, we should be able to always convert one to the other. + if (auto *RTV = RealToTemp.lookup(V)) { + if (E && !isa(E) && !isa(E)) { + auto *LOL = lookupOperandLeader(RTV); + if (alwaysAvailable(LOL)) + return createVariableOrConstant(lookupOperandLeader(RTV)); + auto *PHIE = + dyn_cast_or_null(ValueToExpression.lookup(RTV)); + if (PHIE && PHIE->isAllRealOps()) + return createVariableOrConstant(RTV); + } + } + return E; } @@ -1763,6 +1863,11 @@ return; for (auto U : MA->users()) TouchedInstructions.set(MemoryToDFSNum(U)); + const auto TempResult = MemoryToTempUsers.find(MA); + if (TempResult != MemoryToTempUsers.end()) + for (auto *User : TempResult->second) + TouchedInstructions.set(InstrToDFSNum(User)); + const auto Result = MemoryToUsers.find(MA); if (Result != MemoryToUsers.end()) { for (auto *User : Result->second) @@ -1829,11 +1934,11 @@ "Can't get next leader if there is none"); if (CC->getStoreCount() > 0) { if (auto *NL = dyn_cast_or_null(CC->getNextLeader().first)) - return MSSA->getMemoryAccess(NL); + return getMemoryAccess(NL); // Find the store with the minimum DFS number. auto *V = getMinDFSOfRange(make_filter_range( *CC, [&](const Value *V) { return isa(V); })); - return MSSA->getMemoryAccess(cast(V)); + return getMemoryAccess(cast(V)); } assert(CC->getStoreCount() == 0); @@ -1917,6 +2022,8 @@ CongruenceClass *NewClass) { if (I == OldClass->getNextLeader().first) OldClass->resetNextLeader(); + OldClass->erase(I); + NewClass->insert(I); // It's possible, though unlikely, for us to discover equivalences such // that the current leader does not dominate the old one. @@ -1932,17 +2039,17 @@ Dominated = Dominated || DT->properlyDominates(IBB, NCBB); if (Dominated) { ++NumGVNNotMostDominatingLeader; +#if 0 assert( !isa(I) && "New class for instruction should not be dominated by instruction"); +#endif } } if (NewClass->getLeader() != I) NewClass->addPossibleNextLeader({I, InstrToDFSNum(I)}); - OldClass->erase(I); - NewClass->insert(I); // Handle our special casing of stores. if (auto *SI = dyn_cast(I)) { OldClass->decStoreCount(); @@ -1977,7 +2084,7 @@ // instructions before. // If it's not a memory use, set the MemoryAccess equivalence - auto *InstMA = dyn_cast_or_null(MSSA->getMemoryAccess(I)); + auto *InstMA = dyn_cast_or_null(getMemoryAccess(I)); bool InstWasMemoryLeader = InstMA && OldClass->getMemoryLeader() == InstMA; if (InstMA) moveMemoryToNewCongruenceClass(I, InstMA, OldClass, NewClass); @@ -2099,10 +2206,14 @@ if (ClassChanged || LeaderChanged) { DEBUG(dbgs() << "New class " << EClass->getID() << " for expression " << *E << "\n"); - if (ClassChanged) + if (ClassChanged) { moveValueToNewCongruenceClass(I, E, IClass, EClass); + if (auto *VTR = TempToReal.lookup(I)) + TouchedInstructions.set(InstrToDFSNum(VTR)); + } + markUsersTouched(I); - if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) + if (MemoryAccess *MA = getMemoryAccess(I)) markMemoryUsersTouched(MA); if (auto *CI = dyn_cast(I)) markPredicateUsersTouched(CI); @@ -2128,7 +2239,7 @@ // impact predicates. Otherwise, only mark the phi nodes as touched, as // they are the only thing that depend on new edges. Anything using their // values will get propagated to if necessary. - if (MemoryAccess *MemPhi = MSSA->getMemoryAccess(To)) + if (MemoryAccess *MemPhi = getMemoryAccess(To)) TouchedInstructions.set(InstrToDFSNum(MemPhi)); auto BI = To->begin(); @@ -2225,7 +2336,7 @@ // This also may be a memory defining terminator, in which case, set it // equivalent only to itself. // - auto *MA = MSSA->getMemoryAccess(TI); + auto *MA = getMemoryAccess(TI); if (MA && !isa(MA)) { auto *CC = ensureLeaderOfMemoryClass(MA); if (setMemoryClass(MA, CC)) @@ -2234,6 +2345,82 @@ } } +void NewGVN::addPhiOfOpsOrOp(Instruction *Op, + SmallVectorImpl &Vect, + BasicBlock *BB, MemoryUseOrDef *MemAccess, + Value *ExistingValue) { + AllTempInstructions.insert(Op); + Vect.push_back(Op); + TempToBlock[Op] = BB; + if (MemAccess) { + TempToMemory[Op] = MemAccess; + MemoryToTempUsers[MemAccess->getDefiningAccess()].insert(Op); + } + if (ExistingValue) { + RealToTemp[ExistingValue] = Op; + TempToReal[Op] = ExistingValue; + } +} + +// When we see an instruction that is an op of phis, generate the equivalent phi +// of ops form. +bool NewGVN::makePossiblePhiOfOps(Instruction *I, bool HasBackedge) { + if (!isa(I) && !isa(I) && !isa(I)) + return false; + // Pretty much all of the instructions we can convert to phi of ops over a + // backedge that are adds, are really induction variables, and those are + // pretty much pointless to convert. This is very coarse-grained for a + // test, so if we do find some value, we can change it later. + // But otherwise, what can happen is we convert the induction variable from + // + // i = phi (0, tmp) + // tmp = i + 1 + // + // to + // i = phi (0, tmpphi) + // tmpphi = phi(1, tmpphi+1) + // + // Which we don't want to happen. We could just avoid this for all non-cycle + // free phis, and we made go that route. + if (HasBackedge && I->getOpcode() == Instruction::Add) + return false; + + SmallPtrSet ProcessedPHIs; + // TODO: We don't do phi translation on memory accesses because it's + // complicated. For a load, we'd need to be able to simulate a new memoryuse, + // which we don't have a good way of doing ATM. + auto *MemAccess = MSSA->getMemoryAccess(I); + // If the memory operation is defined by a memory operation this block that + // isn't a MemoryPhi, transforming the pointer backwards through a scalar phi + // can't + // help, as it would still be killed by that memory operation. + if (MemAccess && !isa(MemAccess) && + MemAccess->getDefiningAccess()->getBlock() == I->getParent()) + return false; + // Convert op of phis to phi of ops + for (auto &Op : I->operands()) { + if (!isa(Op)) + continue; + auto *OpPHI = cast(Op); + auto *PHIBlock = getBlockForValue(OpPHI); + PHINode *ValuePHI = PHINode::Create(I->getType(), OpPHI->getNumOperands()); + addPhiOfOpsOrOp(ValuePHI, PHIOfOpsPHIs[PHIBlock], PHIBlock, nullptr, I); + for (auto PredBB : OpPHI->blocks()) { + Instruction *ValueOp = I->clone(); + addPhiOfOpsOrOp(ValueOp, PHIOfOpsOps[PredBB], PredBB, MemAccess); + for (auto &Op : ValueOp->operands()) + Op = Op->DoPHITranslation(PHIBlock, PredBB); + ValuePHI->addIncoming(ValueOp, PredBB); + DEBUG(dbgs() << "Created phi of ops operand " << *ValueOp << " in " + << getBlockName(PredBB) << "\n"); + } + DEBUG(dbgs() << "Created phi of ops " << *ValuePHI << " for " << *I + << "\n"); + return true; + } + return false; +} + // The algorithm initially places the values of the routine in the TOP // congruence class. The leader of TOP is the undetermined value `undef`. // When the algorithm has finished, values still in TOP are unreachable. @@ -2254,12 +2441,15 @@ MemoryAccessToClass[MSSA->getLiveOnEntryDef()] = createMemoryClass(MSSA->getLiveOnEntryDef()); - for (auto &B : F) { + // Set of instructions already considered for conversion to phi of ops + SmallPtrSet VisitedPhiOfOps; + for (auto DTN : nodes(DT)) { + BasicBlock *BB = DTN->getBlock(); // All MemoryAccesses are equivalent to live on entry to start. They must // be initialized to something so that initial changes are noticed. For // the maximal answer, we initialize them all to be the same as // liveOnEntry. - auto *MemoryBlockDefs = MSSA->getBlockDefs(&B); + auto *MemoryBlockDefs = MSSA->getBlockDefs(BB); if (MemoryBlockDefs) for (const auto &Def : *MemoryBlockDefs) { MemoryAccessToClass[&Def] = TOPClass; @@ -2274,7 +2464,19 @@ if (MD && isa(MD->getMemoryInst())) TOPClass->incStoreCount(); } - for (auto &I : B) { + bool HasBackedge = llvm::any_of(predecessors(BB), [&](BasicBlock *Pred) { + return isBackedge(Pred, BB); + }); + + for (auto &I : *BB) { + // TODO: Move to helper + if (isa(&I)) + for (auto *U : I.users()) + if (auto *UInst = dyn_cast(U)) + if (!isInstructionTriviallyDead(UInst, TLI) && + VisitedPhiOfOps.insert(U).second) + makePossiblePhiOfOps(UInst, HasBackedge); + // Don't insert void terminators into the class. We don't value number // them, and they just end up sitting in TOP. if (isa(I) && I.getType()->isVoidTy()) @@ -2284,6 +2486,19 @@ } } + for (const auto &BlockOps : PHIOfOpsOps) { + for (auto &Op : BlockOps.second) { + TOPClass->insert(Op); + ValueToClass[Op] = TOPClass; + } + } + for (const auto &BlockPHIs : PHIOfOpsPHIs) { + for (auto &PHI : BlockPHIs.second) { + TOPClass->insert(PHI); + ValueToClass[PHI] = TOPClass; + } + } + // Initialize arguments to be in their own unique congruence classes for (auto &FA : F.args()) createSingletonCongruenceClass(&FA); @@ -2299,12 +2514,36 @@ CongruenceClasses[i] = nullptr; } + // Destroy the value expressions + SmallVector TempInst(AllTempInstructions.begin(), + AllTempInstructions.end()); + AllTempInstructions.clear(); + + // We have to drop all references for everything first, so there are no uses + // left as we delete them. + for (auto *I : TempInst) { + I->dropAllReferences(); + } + + while (!TempInst.empty()) { + auto *I = TempInst.back(); + TempInst.pop_back(); + delete I; + } + ValueToClass.clear(); ArgRecycler.clear(ExpressionAllocator); ExpressionAllocator.Reset(); CongruenceClasses.clear(); ExpressionToClass.clear(); ValueToExpression.clear(); + RealToTemp.clear(); + TempToReal.clear(); + TempToBlock.clear(); + TempToMemory.clear(); + MemoryToTempUsers.clear(); + PHIOfOpsOps.clear(); + PHIOfOpsPHIs.clear(); ReachableBlocks.clear(); ReachableEdges.clear(); #ifndef NDEBUG @@ -2320,14 +2559,27 @@ MemoryToUsers.clear(); } +// Assign local DFS number mapping to instructions, and leave space for Value +// PHI's. std::pair NewGVN::assignDFSNumbers(BasicBlock *B, unsigned Start) { unsigned End = Start; - if (MemoryAccess *MemPhi = MSSA->getMemoryAccess(B)) { + if (MemoryAccess *MemPhi = getMemoryAccess(B)) { InstrDFS[MemPhi] = End++; DFSToInstr.emplace_back(MemPhi); } + // Value PHIs go first. + const auto PHIResult = PHIOfOpsPHIs.find(B); + if (PHIResult != PHIOfOpsPHIs.end()) { + const auto &Ops = PHIResult->second; + for (auto I : Ops) { + InstrDFS[I] = End++; + DFSToInstr.push_back(I); + } + } + + // Then the real block goes next. for (auto &I : *B) { // There's no need to call isInstructionTriviallyDead more than once on // an instruction. Therefore, once we know that an instruction is dead @@ -2338,18 +2590,27 @@ markInstructionForDeletion(&I); continue; } - InstrDFS[&I] = End++; DFSToInstr.emplace_back(&I); } + // And lastly, any value expressions we may need. + const auto OpResult = PHIOfOpsOps.find(B); + if (OpResult != PHIOfOpsOps.end()) { + const auto &Ops = OpResult->second; + for (auto I : Ops) { + InstrDFS[I] = End++; + DFSToInstr.push_back(I); + } + } + // All of the range functions taken half-open ranges (open on the end side). // So we do not subtract one from count, because at this point it is one // greater than the last instruction. return std::make_pair(Start, End); } -void NewGVN::updateProcessedCount(Value *V) { +void NewGVN::updateProcessedCount(const Value *V) { #ifndef NDEBUG if (ProcessedCount.count(V) == 0) { ProcessedCount.insert({V, 1}); @@ -2429,10 +2690,8 @@ } // If we couldn't come up with a symbolic expression, use the unknown // expression - if (Symbolized == nullptr) { + if (Symbolized == nullptr) Symbolized = createUnknownExpression(I); - } - performCongruenceFinding(I, Symbolized); } else { // Handle terminators that return values. All of them produce values we @@ -2626,7 +2885,7 @@ // Nothing set, nothing to iterate, just return. if (FirstInstr == -1) return; - BasicBlock *LastBlock = getBlockForValue(InstrFromDFSNum(FirstInstr)); + const BasicBlock *LastBlock = getBlockForValue(InstrFromDFSNum(FirstInstr)); while (TouchedInstructions.any()) { ++Iterations; // Walk through all the instructions in all the blocks in RPO. @@ -2644,7 +2903,7 @@ } Value *V = InstrFromDFSNum(InstrNum); - BasicBlock *CurrBlock = getBlockForValue(V); + const BasicBlock *CurrBlock = getBlockForValue(V); // If we hit a new block, do reachability processing. if (CurrBlock != LastBlock) { @@ -2718,26 +2977,17 @@ }); } + // This must happen after we initialize RPO but before we assign DFS numbers. + initializeCongruenceClasses(F); + // Now a standard depth first ordering of the domtree is equivalent to RPO. - auto DFI = df_begin(DT->getRootNode()); - for (auto DFE = df_end(DT->getRootNode()); DFI != DFE; ++DFI) { - BasicBlock *B = DFI->getBlock(); + for (auto DTN : depth_first(DT->getRootNode())) { + BasicBlock *B = DTN->getBlock(); const auto &BlockRange = assignDFSNumbers(B, ICount); BlockInstRange.insert({B, BlockRange}); ICount += BlockRange.second - BlockRange.first; } - // Handle forward unreachable blocks and figure out which blocks - // have single preds. - for (auto &B : F) { - // Assign numbers to unreachable blocks. - if (!DFI.nodeVisited(DT->getNode(&B))) { - const auto &BlockRange = assignDFSNumbers(&B, ICount); - BlockInstRange.insert({&B, BlockRange}); - ICount += BlockRange.second - BlockRange.first; - } - } - TouchedInstructions.resize(ICount); // Ensure we don't end up resizing the expressionToClass map, as // that can be quite expensive. At most, we have one expression per @@ -2747,9 +2997,10 @@ // Initialize the touched instructions to include the entry block. const auto &InstRange = BlockInstRange.lookup(&F.getEntryBlock()); TouchedInstructions.set(InstRange.first, InstRange.second); + DEBUG(dbgs() << "Block " << getBlockName(&F.getEntryBlock()) + << " marked reachable\n"); ReachableBlocks.insert(&F.getEntryBlock()); - initializeCongruenceClasses(F); iterateTouchedInstructions(); verifyMemoryCongruency(); verifyIterationSettled(F); @@ -2761,7 +3012,8 @@ if (!ToErase->use_empty()) ToErase->replaceAllUsesWith(UndefValue::get(ToErase->getType())); - ToErase->eraseFromParent(); + if (ToErase->getParent()) + ToErase->eraseFromParent(); } // Delete all unreachable blocks. @@ -2780,14 +3032,6 @@ return Changed; } -// Return true if V is a value that will always be available (IE can -// be placed anywhere) in the function. We don't do globals here -// because they are often worse to put in place. -// TODO: Separate cost from availability -static bool alwaysAvailable(Value *V) { - return isa(V) || isa(V); -} - struct NewGVN::ValueDFS { int DFSIn = 0; int DFSOut = 0; @@ -2896,7 +3140,7 @@ // they are from. VDUse.LocalNum = InstrDFS.size() + 1; } else { - IBlock = I->getParent(); + IBlock = getBlockForValue(I); VDUse.LocalNum = InstrToDFSNum(I); } @@ -3114,7 +3358,7 @@ // Map to store the use counts DenseMap UseCounts; - for (CongruenceClass *CC : reverse(CongruenceClasses)) { + for (auto *CC : reverse(CongruenceClasses)) { // Track the equivalent store info so we can decide whether to try // dead store elimination. SmallVector PossibleDeadStores; @@ -3185,6 +3429,22 @@ if (Def && Def->getType()->isVoidTy()) continue; + if (AllTempInstructions.count(dyn_cast_or_null(Def))) { + // If this is a value phi and all operands are real, insert it into + // the program and remove from temp instruction list. + auto *PHIE = + dyn_cast_or_null(ValueToExpression.lookup(Def)); + if (!PHIE || !PHIE->isAllRealOps()) + continue; + auto *DefInst = cast(Def); + AllTempInstructions.erase(DefInst); + auto *DefBlock = getBlockForValue(Def); + DEBUG(dbgs() << "Inserting fully real phi of ops" << *Def + << " into block " + << getBlockName(getBlockForValue(Def)) << "\n"); + DefInst->insertBefore(&DefBlock->front()); + } + if (EliminationStack.empty()) { DEBUG(dbgs() << "Elimination Stack is empty\n"); } else { @@ -3342,7 +3602,6 @@ } } } - return AnythingReplaced; } Index: test/Transforms/NewGVN/completeness.ll =================================================================== --- /dev/null +++ test/Transforms/NewGVN/completeness.ll @@ -0,0 +1,306 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +define i32 @test1(i32, i8**) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP0:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP5:%.*]] +; CHECK: br label [[TMP6:%.*]] +; CHECK: br label [[TMP6]] +; CHECK: [[TMP7:%.*]] = phi i32 [ 75, [[TMP4]] ], [ 105, [[TMP5]] ] +; CHECK-NEXT: [[DOT0:%.*]] = phi i32 [ 5, [[TMP4]] ], [ 7, [[TMP5]] ] +; CHECK-NEXT: ret i32 [[TMP7]] +; + %3 = icmp ne i32 %0, 0 + br i1 %3, label %4, label %5 + +;