Index: lib/Analysis/BasicAliasAnalysis.cpp =================================================================== --- lib/Analysis/BasicAliasAnalysis.cpp +++ lib/Analysis/BasicAliasAnalysis.cpp @@ -682,8 +682,11 @@ #ifndef NDEBUG static const Function *getParent(const Value *V) { - if (const Instruction *inst = dyn_cast(V)) + if (const Instruction *inst = dyn_cast(V)) { + if (!inst->getParent()) + return nullptr; return inst->getParent()->getParent(); + } if (const Argument *arg = dyn_cast(V)) return arg->getParent(); Index: lib/Transforms/Scalar/NewGVN.cpp =================================================================== --- lib/Transforms/Scalar/NewGVN.cpp +++ lib/Transforms/Scalar/NewGVN.cpp @@ -34,6 +34,14 @@ /// performing symbolic evaluation, forward propagation, and simplification of /// operations based on the value numbers deduced so far. /// +/// In order to make the GVN mostly-complete, we use a technique derived from +/// "Detection of Redundant Expressions: A Complete and Polynomial-time +/// Algorithm in SSA" by R.R. Pai. The source of incompleteness in most SSA +/// based GVN algorithms is related to their inability to detect equivalence +/// between phi of ops (IE phi(a+b, c+d)) and op of phis (phi(a,c) + phi(b, d)). +/// We resolve this issue by generating the equivalent "phi of ops" form for +/// each op of phis we see, in a way that only takes polynomial time to resolve. +/// /// We also do not perform elimination by using any published algorithm. All /// published algorithms are O(Instructions). Instead, we use a technique that /// is O(number of operations with the same value number), enabling us to skip @@ -104,12 +112,11 @@ STATISTIC(NumGVNSortedLeaderChanges, "Number of sorted leader changes"); STATISTIC(NumGVNAvoidedSortedLeaderChanges, "Number of avoided sorted leader changes"); -STATISTIC(NumGVNNotMostDominatingLeader, - "Number of times a member dominated it's new classes' leader"); STATISTIC(NumGVNDeadStores, "Number of redundant/dead stores eliminated"); DEBUG_COUNTER(VNCounter, "newgvn-vn", "Controls which instructions are value numbered") - +DEBUG_COUNTER(PHIOfOpsCounter, "newgvn-phi", + "Controls which instructions we create phi of ops for") // Currently store defining access refinement is too slow due to basicaa being // egregiously slow. This flag lets us keep it working while we work on this // issue. @@ -172,10 +179,9 @@ } } // See if we really were the root of a component, by seeing if we still have - // our DFSNumber. - // If we do, we are the root of the component, and we have completed a - // component. If we do not, - // we are not the root of a component, and belong on the component stack. + // our DFSNumber. If we do, we are the root of the component, and we have + // completed a component. If we do not, we are not the root of a component, + // and belong on the component stack. if (Root.lookup(I) == OurDFS) { unsigned ComponentID = Components.size(); Components.resize(Components.size() + 1); @@ -425,6 +431,36 @@ // Value Mappings. DenseMap ValueToClass; DenseMap ValueToExpression; + // Value PHI handling, used to make equivalence between phi(op, op) and + // op(phi, phi). + // These mappings just store various data that would normally be part of the + // IR. + + DenseSet PHINodeUses; + // Map a temporary instruction we created to a parent block. + DenseMap TempToBlock; + // Map between the temporary phis we created and the real instructions they + // are known equivalent to. + DenseMap RealToTemp; + DenseMap TempToReal; + // In order to know when we shoudl re-process instructions that have + // phi-of-ops, we track the set of expressions that they needed as + // leaders. When we disover new leaders for those expressions, we process the + // associated phi-of-op instructions again in case they have changed. The + // other way they may change is if they had leaders, and those leaders + // disappear. However, at the point they have leaders, there are uses of the + // relevant operands in the created phi node, and so they will get reprocessed + // through the normal user marking we perform. + DenseMap> AdditionalUsers; + DenseMap> + ExpressionToPhiOfOps; + + // Map from basic block to the temporary operations we created + DenseMap> PHIOfOpsPHIs; + // Map from temporary operation to MemoryAccess. + DenseMap TempToMemory; + // Set of all temporary instructions we created. + DenseSet AllTempInstructions; // Mapping from predicate info we used to the instructions we used it with. // In order to correctly ensure propagation, we must keep track of what @@ -456,8 +492,8 @@ enum MemoryPhiState { MPS_Invalid, MPS_TOP, MPS_Equivalent, MPS_Unique }; DenseMap MemoryPhiState; - enum PhiCycleState { PCS_Unknown, PCS_CycleFree, PCS_Cycle }; - DenseMap PhiCycleState; + enum InstCycleState { ICS_Unknown, ICS_CycleFree, ICS_Cycle }; + DenseMap InstCycleState; // Expression to class mapping. using ExpressionClassMap = DenseMap; ExpressionClassMap ExpressionToClass; @@ -514,7 +550,7 @@ const Expression *createExpression(Instruction *); const Expression *createBinaryExpression(unsigned, Type *, Value *, Value *); PHIExpression *createPHIExpression(Instruction *, bool &HasBackedge, - bool &AllConstant); + bool &OriginalOpsConstant); const VariableExpression *createVariableExpression(Value *); const ConstantExpression *createConstantExpression(Constant *); const Expression *createVariableOrConstant(Value *V); @@ -553,6 +589,9 @@ return CClass; } void initializeCongruenceClasses(Function &F); + const Expression *makePossiblePhiOfOps(Instruction *, bool, + SmallPtrSetImpl &); + void addPhiOfOps(PHINode *Op, BasicBlock *BB, Instruction *ExistingValue); // Value number an Instruction or MemoryPhi. void valueNumberMemoryPhi(MemoryPhi *); @@ -561,7 +600,8 @@ // Symbolic evaluation. const Expression *checkSimplificationResults(Expression *, Instruction *, Value *); - const Expression *performSymbolicEvaluation(Value *); + const Expression *performSymbolicEvaluation(Value *, + SmallPtrSetImpl &); const Expression *performSymbolicLoadCoercion(Type *, Value *, LoadInst *, Instruction *, MemoryAccess *); const Expression *performSymbolicLoadEvaluation(Instruction *); @@ -585,7 +625,7 @@ bool setMemoryClass(const MemoryAccess *From, CongruenceClass *To); CongruenceClass *getMemoryClass(const MemoryAccess *MA) const; const MemoryAccess *lookupMemoryLeader(const MemoryAccess *) const; - bool isMemoryAccessTop(const MemoryAccess *) const; + bool isMemoryAccessTOP(const MemoryAccess *) const; // Ranking unsigned int getRank(const Value *) const; @@ -609,6 +649,7 @@ void replaceInstruction(Instruction *, Value *); void markInstructionForDeletion(Instruction *); void deleteInstructionsInBlock(BasicBlock *); + Value *findPhiOfOpsLeader(const Expression *E, const BasicBlock *BB) const; // New instruction creation. void handleNewInstruction(Instruction *){}; @@ -620,8 +661,10 @@ void markPredicateUsersTouched(Instruction *); void markValueLeaderChangeTouched(CongruenceClass *CC); void markMemoryLeaderChangeTouched(CongruenceClass *CC); + void markPhiOfOpsChanged(const Expression *E); void addPredicateUsers(const PredicateBase *, Instruction *); void addMemoryUsers(const MemoryAccess *To, MemoryAccess *U); + void addAdditionalUsers(Value *To, Value *User); // Main loop of value numbering void iterateTouchedInstructions(); @@ -629,12 +672,16 @@ // Utilities. void cleanupTables(); std::pair assignDFSNumbers(BasicBlock *, unsigned); - void updateProcessedCount(Value *V); + void updateProcessedCount(const Value *V); void verifyMemoryCongruency() const; void verifyIterationSettled(Function &F); bool singleReachablePHIPath(const MemoryAccess *, const MemoryAccess *) const; BasicBlock *getBlockForValue(Value *V) const; void deleteExpression(const Expression *E); + MemoryUseOrDef *getMemoryAccess(const Instruction *) const; + MemoryAccess *getDefiningAccess(const MemoryAccess *) const; + MemoryPhi *getMemoryAccess(const BasicBlock *) const; + template T *getMinDFSOfRange(const Range &) const; unsigned InstrToDFSNum(const Value *V) const { assert(isa(V) && "This should not be used for MemoryAccesses"); return InstrDFS.lookup(V); @@ -654,8 +701,8 @@ ? InstrToDFSNum(cast(MA)->getMemoryInst()) : InstrDFS.lookup(MA); } - bool isCycleFree(const PHINode *PN); - template T *getMinDFSOfRange(const Range &) const; + bool isCycleFree(const Instruction *); + bool isBackedge(BasicBlock *From, BasicBlock *To) const; // Debug counter info. When verifying, we have to reset the value numbering // debug counter to the same state it started in to get the same results. std::pair StartingVNCounter; @@ -683,20 +730,46 @@ return true; } +// Determine if the edge From->To is a backedge +bool NewGVN::isBackedge(BasicBlock *From, BasicBlock *To) const { + if (From == To) + return true; + auto *FromDTN = DT->getNode(From); + auto *ToDTN = DT->getNode(To); + return RPOOrdering.lookup(FromDTN) >= RPOOrdering.lookup(ToDTN); +} + #ifndef NDEBUG static std::string getBlockName(const BasicBlock *B) { return DOTGraphTraits::getSimpleNodeLabel(B, nullptr); } #endif +// Get a MemoryAccess for an instruction, fake or real. +MemoryUseOrDef *NewGVN::getMemoryAccess(const Instruction *I) const { + auto *Result = MSSA->getMemoryAccess(I); + return Result ? Result : TempToMemory.lookup(I); +} + +// Get a MemoryPhi for a basic block. These are all real. +MemoryPhi *NewGVN::getMemoryAccess(const BasicBlock *BB) const { + return MSSA->getMemoryAccess(BB); +} + // Get the basic block from an instruction/memory value. BasicBlock *NewGVN::getBlockForValue(Value *V) const { - if (auto *I = dyn_cast(V)) - return I->getParent(); - else if (auto *MP = dyn_cast(V)) - return MP->getBlock(); - llvm_unreachable("Should have been able to figure out a block for our value"); - return nullptr; + if (auto *I = dyn_cast(V)) { + auto *Parent = I->getParent(); + if (Parent) + return Parent; + Parent = TempToBlock.lookup(V); + assert(Parent && "Every fake instruction should have a block"); + return Parent; + } + + auto *MP = dyn_cast(V); + assert(MP && "Should have been an instruction or a MemoryPhi"); + return MP->getBlock(); } // Delete a definitely dead expression, so it can be reused by the expression @@ -708,10 +781,9 @@ const_cast(BE)->deallocateOperands(ArgRecycler); ExpressionAllocator.Deallocate(E); } - PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge, - bool &AllConstant) { - BasicBlock *PHIBlock = I->getParent(); + bool &OriginalOpsConstant) { + BasicBlock *PHIBlock = getBlockForValue(I); auto *PN = cast(I); auto *E = new (ExpressionAllocator) PHIExpression(PN->getNumOperands(), PHIBlock); @@ -720,24 +792,22 @@ E->setType(I->getType()); E->setOpcode(I->getOpcode()); - unsigned PHIRPO = RPOOrdering.lookup(DT->getNode(PHIBlock)); - // Filter out unreachable phi operands. auto Filtered = make_filter_range(PN->operands(), [&](const Use &U) { return ReachableEdges.count({PN->getIncomingBlock(U), PHIBlock}); }); - std::transform(Filtered.begin(), Filtered.end(), op_inserter(E), [&](const Use &U) -> Value * { auto *BB = PN->getIncomingBlock(U); - auto *DTN = DT->getNode(BB); - if (RPOOrdering.lookup(DTN) >= PHIRPO) - HasBackedge = true; - AllConstant &= isa(U) || isa(U); + HasBackedge = HasBackedge || isBackedge(BB, PHIBlock); + OriginalOpsConstant = + OriginalOpsConstant && isa(U); // Don't try to transform self-defined phis. - if (U == PN) + if (U == PN) { return PN; + } + return lookupOperandLeader(U); }); return E; @@ -758,7 +828,7 @@ // whether all members are constant. std::transform(I->op_begin(), I->op_end(), op_inserter(E), [&](Value *O) { auto Operand = lookupOperandLeader(O); - AllConstant &= isa(Operand); + AllConstant = AllConstant && isa(Operand); return Operand; }); @@ -1026,7 +1096,7 @@ // Return true if the MemoryAccess is really equivalent to everything. This is // equivalent to the lattice value "TOP" in most lattices. This is the initial // state of all MemoryAccesses. -bool NewGVN::isMemoryAccessTop(const MemoryAccess *MA) const { +bool NewGVN::isMemoryAccessTOP(const MemoryAccess *MA) const { return getMemoryClass(MA) == TOPClass; } @@ -1072,7 +1142,7 @@ // Unlike loads, we never try to eliminate stores, so we do not check if they // are simple and avoid value numbering them. auto *SI = cast(I); - auto *StoreAccess = MSSA->getMemoryAccess(SI); + auto *StoreAccess = getMemoryAccess(SI); // Get the expression, if any, for the RHS of the MemoryDef. const MemoryAccess *StoreRHS = StoreAccess->getDefiningAccess(); if (EnableStoreRefinement) @@ -1109,7 +1179,7 @@ dyn_cast(lookupOperandLeader(SI->getValueOperand()))) { if ((lookupOperandLeader(LI->getPointerOperand()) == lookupOperandLeader(SI->getPointerOperand())) && - (lookupMemoryLeader(MSSA->getMemoryAccess(LI)->getDefiningAccess()) == + (lookupMemoryLeader(getMemoryAccess(LI)->getDefiningAccess()) == StoreRHS)) return createVariableExpression(LI); } @@ -1213,8 +1283,9 @@ // Load of undef is undef. if (isa(LoadAddressLeader)) return createConstantExpression(UndefValue::get(LI->getType())); - - MemoryAccess *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(I); + MemoryAccess *OriginalAccess = getMemoryAccess(I); + MemoryAccess *DefiningAccess = + MSSAWalker->getClobberingMemoryAccess(OriginalAccess); if (!MSSA->isLiveOnEntryDef(DefiningAccess)) { if (auto *MD = dyn_cast(DefiningAccess)) { @@ -1403,34 +1474,37 @@ return Changed; } -// Determine if a phi is cycle-free. That means the values in the phi don't -// depend on any expressions that can change value as a result of the phi. -// For example, a non-cycle free phi would be v = phi(0, v+1). -bool NewGVN::isCycleFree(const PHINode *PN) { - // In order to compute cycle-freeness, we do SCC finding on the phi, and see +// Determine if a instruction is cycle-free. That means the values in the +// instruction don't +// depend on any expressions that can change value as a result of the +// instruction. +// For example, a non-cycle free instruction would be v = phi(0, v+1). +bool NewGVN::isCycleFree(const Instruction *I) { + // In order to compute cycle-freeness, we do SCC finding on the instruction, + // and see // what kind of SCC it ends up in. If it is a singleton, it is cycle-free. // If it is not in a singleton, it is only cycle free if the other members are // all phi nodes (as they do not compute anything, they are copies). TODO: // There are likely a few other intrinsics or expressions that could be // included here, but this happens so infrequently already that it is not // likely to be worth it. - auto PCS = PhiCycleState.lookup(PN); - if (PCS == PCS_Unknown) { - SCCFinder.Start(PN); - auto &SCC = SCCFinder.getComponentFor(PN); + auto ICS = InstCycleState.lookup(I); + if (ICS == ICS_Unknown) { + SCCFinder.Start(I); + auto &SCC = SCCFinder.getComponentFor(I); // It's cycle free if it's size 1 or or the SCC is *only* phi nodes. if (SCC.size() == 1) - PhiCycleState.insert({PN, PCS_CycleFree}); + InstCycleState.insert({I, ICS_CycleFree}); else { bool AllPhis = llvm::all_of(SCC, [](const Value *V) { return isa(V); }); - PCS = AllPhis ? PCS_CycleFree : PCS_Cycle; + ICS = AllPhis ? ICS_CycleFree : ICS_Cycle; for (auto *Member : SCC) if (auto *MemberPhi = dyn_cast(Member)) - PhiCycleState.insert({MemberPhi, PCS}); + InstCycleState.insert({MemberPhi, ICS}); } } - if (PCS == PCS_Cycle) + if (ICS == ICS_Cycle) return false; return true; } @@ -1495,8 +1569,18 @@ // constants, or all operands are ignored but the undef, it also must be // cycle free. if (!AllConstant && HasBackedge && NumOps > 0 && - !isa(AllSameValue) && !isCycleFree(cast(I))) - return E; + !isa(AllSameValue)) { + if (!isCycleFree(I)) + return E; +#if 0 + // Note: For a phi of ops, it can only be cycle free if it is cycle + // free, *and* the instruction it represents is cycle free, since we + // have implicit copies due to assigning the same congruence class. + if (AllTempInstructions.count(I) && + !isCycleFree(cast(TempToReal.lookup(I)))) + return E; +#endif + } // Only have to check for instructions if (auto *AllSameInst = dyn_cast(AllSameValue)) @@ -1665,8 +1749,17 @@ return createExpression(I); } +// Return true if V is a value that will always be available (IE can +// be placed anywhere) in the function. We don't do globals here +// because they are often worse to put in place. +// TODO: Separate cost from availability +static bool alwaysAvailable(Value *V) { + return isa(V) || isa(V); +} + // Substitute and symbolize the value before value numbering. -const Expression *NewGVN::performSymbolicEvaluation(Value *V) { +const Expression * +NewGVN::performSymbolicEvaluation(Value *V, SmallPtrSetImpl &Visited) { const Expression *E = nullptr; if (auto *C = dyn_cast(V)) E = createConstantExpression(C); @@ -1740,16 +1833,34 @@ default: return nullptr; } + + if (E && !isa(E) && !isa(E) && + PHINodeUses.count(I)) { + // FIXME: Backedge argument + auto *PHIE = makePossiblePhiOfOps(I, false, Visited); + if (PHIE) + return PHIE; + } } return E; } +void NewGVN::addAdditionalUsers(Value *To, Value *User) { + AdditionalUsers[To].insert(User); +} + void NewGVN::markUsersTouched(Value *V) { // Now mark the users as touched. for (auto *User : V->users()) { assert(isa(User) && "Use of value not within an instruction?"); TouchedInstructions.set(InstrToDFSNum(User)); } + const auto Result = AdditionalUsers.find(V); + if (Result != AdditionalUsers.end()) { + for (auto *User : Result->second) + TouchedInstructions.set(InstrToDFSNum(User)); + AdditionalUsers.erase(Result); + } } void NewGVN::addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) { @@ -1776,6 +1887,10 @@ // Add I to the set of users of a given predicate. void NewGVN::addPredicateUsers(const PredicateBase *PB, Instruction *I) { + // This is a slight hack. When doing phi of ops, we set the instruction + // numbers to be the right one, and then this ensures we don't add any deleted + // operands to the predicate user list. + I = cast(InstrFromDFSNum(InstrToDFSNum(I))); if (auto *PBranch = dyn_cast(PB)) PredicateToUsers[PBranch->Condition].insert(I); else if (auto *PAssume = dyn_cast(PB)) @@ -1832,11 +1947,11 @@ "Can't get next leader if there is none"); if (CC->getStoreCount() > 0) { if (auto *NL = dyn_cast_or_null(CC->getNextLeader().first)) - return MSSA->getMemoryAccess(NL); + return getMemoryAccess(NL); // Find the store with the minimum DFS number. auto *V = getMinDFSOfRange(make_filter_range( *CC, [&](const Value *V) { return isa(V); })); - return MSSA->getMemoryAccess(cast(V)); + return getMemoryAccess(cast(V)); } assert(CC->getStoreCount() == 0); @@ -1920,32 +2035,11 @@ CongruenceClass *NewClass) { if (I == OldClass->getNextLeader().first) OldClass->resetNextLeader(); - - // It's possible, though unlikely, for us to discover equivalences such - // that the current leader does not dominate the old one. - // This statistic tracks how often this happens. - // We assert on phi nodes when this happens, currently, for debugging, because - // we want to make sure we name phi node cycles properly. - if (isa(NewClass->getLeader()) && NewClass->getLeader() && - I != NewClass->getLeader()) { - auto *IBB = I->getParent(); - auto *NCBB = cast(NewClass->getLeader())->getParent(); - bool Dominated = - IBB == NCBB && InstrToDFSNum(I) < InstrToDFSNum(NewClass->getLeader()); - Dominated = Dominated || DT->properlyDominates(IBB, NCBB); - if (Dominated) { - ++NumGVNNotMostDominatingLeader; - assert( - !isa(I) && - "New class for instruction should not be dominated by instruction"); - } - } - + OldClass->erase(I); + NewClass->insert(I); if (NewClass->getLeader() != I) NewClass->addPossibleNextLeader({I, InstrToDFSNum(I)}); - OldClass->erase(I); - NewClass->insert(I); // Handle our special casing of stores. if (auto *SI = dyn_cast(I)) { OldClass->decStoreCount(); @@ -1980,7 +2074,7 @@ // instructions before. // If it's not a memory use, set the MemoryAccess equivalence - auto *InstMA = dyn_cast_or_null(MSSA->getMemoryAccess(I)); + auto *InstMA = dyn_cast_or_null(getMemoryAccess(I)); bool InstWasMemoryLeader = InstMA && OldClass->getMemoryLeader() == InstMA; if (InstMA) moveMemoryToNewCongruenceClass(I, InstMA, OldClass, NewClass); @@ -2038,20 +2132,33 @@ } } +// For a given expression, mark the phi of ops instructions that could have +// changed as a result. +void NewGVN::markPhiOfOpsChanged(const Expression *E) { + auto PhiOfOpsSet = ExpressionToPhiOfOps.find(E); + if (PhiOfOpsSet != ExpressionToPhiOfOps.end()) { + for (auto I : PhiOfOpsSet->second) + TouchedInstructions.set(InstrToDFSNum(I)); + ExpressionToPhiOfOps.erase(PhiOfOpsSet); + } +} + // Perform congruence finding on a given value numbering expression. void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) { ValueToExpression[I] = E; + + assert(!TempToReal.lookup(I) && + "Should not congruence find fake instructions"); // This is guaranteed to return something, since it will at least find // TOP. - CongruenceClass *IClass = ValueToClass[I]; assert(IClass && "Should have found a IClass"); // Dead classes should have been eliminated from the mapping. assert(!IClass->isDead() && "Found a dead class"); - CongruenceClass *EClass; + CongruenceClass *EClass = nullptr; if (const auto *VE = dyn_cast(E)) { - EClass = ValueToClass[VE->getVariableValue()]; + EClass = ValueToClass.lookup(VE->getVariableValue()); } else { auto lookupResult = ExpressionToClass.insert({E, nullptr}); @@ -2102,10 +2209,13 @@ if (ClassChanged || LeaderChanged) { DEBUG(dbgs() << "New class " << EClass->getID() << " for expression " << *E << "\n"); - if (ClassChanged) + if (ClassChanged) { moveValueToNewCongruenceClass(I, E, IClass, EClass); + markPhiOfOpsChanged(E); + } + markUsersTouched(I); - if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) + if (MemoryAccess *MA = getMemoryAccess(I)) markMemoryUsersTouched(MA); if (auto *CI = dyn_cast(I)) markPredicateUsersTouched(CI); @@ -2131,7 +2241,7 @@ // impact predicates. Otherwise, only mark the phi nodes as touched, as // they are the only thing that depend on new edges. Anything using their // values will get propagated to if necessary. - if (MemoryAccess *MemPhi = MSSA->getMemoryAccess(To)) + if (MemoryAccess *MemPhi = getMemoryAccess(To)) TouchedInstructions.set(InstrToDFSNum(MemPhi)); auto BI = To->begin(); @@ -2139,6 +2249,12 @@ TouchedInstructions.set(InstrToDFSNum(&*BI)); ++BI; } + const auto PHIResult = PHIOfOpsPHIs.find(To); + if (PHIResult != PHIOfOpsPHIs.end()) { + const auto &PHIs = PHIResult->second; + for (auto I : PHIs) + TouchedInstructions.set(InstrToDFSNum(I)); + } } } } @@ -2228,7 +2344,7 @@ // This also may be a memory defining terminator, in which case, set it // equivalent only to itself. // - auto *MA = MSSA->getMemoryAccess(TI); + auto *MA = getMemoryAccess(TI); if (MA && !isa(MA)) { auto *CC = ensureLeaderOfMemoryClass(MA); if (setMemoryClass(MA, CC)) @@ -2237,6 +2353,152 @@ } } +void NewGVN::addPhiOfOps(PHINode *Op, BasicBlock *BB, + Instruction *ExistingValue) { + InstrDFS[Op] = InstrToDFSNum(ExistingValue); + // InstrDFS[Op] = MaxDFSNum++; + // DFSToInstr.push_back(Op); + AllTempInstructions.insert(Op); + PHIOfOpsPHIs[BB].push_back(Op); + TempToBlock[Op] = BB; + if (ExistingValue) { + RealToTemp[ExistingValue] = Op; + TempToReal[Op] = ExistingValue; + } +} + +static bool okayForPHIOfOps(const Instruction *I) { + return isa(I) || isa(I) || isa(I); +} + +// When we see an instruction that is an op of phis, generate the equivalent phi +// of ops form. +const Expression * +NewGVN::makePossiblePhiOfOps(Instruction *I, bool HasBackedge, + SmallPtrSetImpl &Visited) { + if (!okayForPHIOfOps(I)) + return nullptr; + + if (!Visited.insert(I).second) + return nullptr; + // For now, we require the instruction be cycle free because we don't + // *always* create a phi of ops for instructions that could be done as phi + // of ops, we only do it if we think it is useful. If we did do it all the + // time, we could remove the cycle free check. + if (!isCycleFree(I)) + return nullptr; + + unsigned IDFSNum = InstrToDFSNum(I); + // Pretty much all of the instructions we can convert to phi of ops over a + // backedge that are adds, are really induction variables, and those are + // pretty much pointless to convert. This is very coarse-grained for a + // test, so if we do find some value, we can change it later. + // But otherwise, what can happen is we convert the induction variable from + // + // i = phi (0, tmp) + // tmp = i + 1 + // + // to + // i = phi (0, tmpphi) + // tmpphi = phi(1, tmpphi+1) + // + // Which we don't want to happen. We could just avoid this for all non-cycle + // free phis, and we made go that route. + if (HasBackedge && I->getOpcode() == Instruction::Add) + return nullptr; + + SmallPtrSet ProcessedPHIs; + // TODO: We don't do phi translation on memory accesses because it's + // complicated. For a load, we'd need to be able to simulate a new memoryuse, + // which we don't have a good way of doing ATM. + auto *MemAccess = getMemoryAccess(I); + // If the memory operation is defined by a memory operation this block that + // isn't a MemoryPhi, transforming the pointer backwards through a scalar phi + // can't help, as it would still be killed by that memory operation. + if (MemAccess && !isa(MemAccess->getDefiningAccess()) && + MemAccess->getDefiningAccess()->getBlock() == I->getParent()) + return nullptr; + + // Convert op of phis to phi of ops + for (auto &Op : I->operands()) { + if (!isa(Op)) + continue; + auto *OpPHI = cast(Op); + // No point in doing this for one-operand phis. + if (OpPHI->getNumOperands() == 1) + continue; + if (!DebugCounter::shouldExecute(PHIOfOpsCounter)) + return nullptr; + SmallVector, 4> Ops; + auto *PHIBlock = getBlockForValue(OpPHI); + for (auto PredBB : OpPHI->blocks()) { + Value *FoundVal = nullptr; + // We could just skip unreachable edges entirely but it's tricky to do + // with existing phi nodes. + if (ReachableEdges.count({PredBB, PHIBlock})) { + Instruction *ValueOp = I->clone(); + auto Iter = TempToMemory.end(); + if (MemAccess) + Iter = TempToMemory.insert({ValueOp, MemAccess}).first; + + for (auto &Op : ValueOp->operands()) { + Op = Op->DoPHITranslation(PHIBlock, PredBB); + addAdditionalUsers(Op, I); + } + InstrDFS.insert({ValueOp, IDFSNum}); + const Expression *E = performSymbolicEvaluation(ValueOp, Visited); + InstrDFS.erase(ValueOp); + delete ValueOp; + + if (MemAccess) + TempToMemory.erase(Iter); + if (!E) + return nullptr; + if (!isa(E)) + ExpressionToPhiOfOps[E].insert(I); + FoundVal = findPhiOfOpsLeader(E, PredBB); + if (!FoundVal) + return nullptr; + } else { + DEBUG(dbgs() << "Skipping phi of ops operand for incoming block " + << getBlockName(PredBB) + << " because the block is unreachable\n"); + FoundVal = UndefValue::get(I->getType()); + } + + Ops.push_back({FoundVal, PredBB}); + DEBUG(dbgs() << "Found phi of ops operand " << *FoundVal << " in " + << getBlockName(PredBB) << "\n"); + } + auto *ValuePHI = RealToTemp.lookup(I); + bool NewPHI = false; + if (!ValuePHI) { + ValuePHI = PHINode::Create(I->getType(), OpPHI->getNumOperands()); + addPhiOfOps(ValuePHI, PHIBlock, I); + NewPHI = true; + } + if (NewPHI) { + + for (auto PHIOp : Ops) + ValuePHI->addIncoming(PHIOp.first, PHIOp.second); + } + + else { + unsigned int i = 0; + for (auto PHIOp : Ops) { + ValuePHI->setIncomingValue(i, PHIOp.first); + ValuePHI->setIncomingBlock(i, PHIOp.second); + ++i; + } + } + + DEBUG(dbgs() << "Created phi of ops " << *ValuePHI << " for " << *I + << "\n"); + return performSymbolicEvaluation(ValuePHI, Visited); + } + return nullptr; +} + // The algorithm initially places the values of the routine in the TOP // congruence class. The leader of TOP is the undetermined value `undef`. // When the algorithm has finished, values still in TOP are unreachable. @@ -2279,6 +2541,12 @@ TOPClass->incStoreCount(); } for (auto &I : *BB) { + // TODO: Move to helper + if (isa(&I)) + for (auto *U : I.users()) + if (auto *UInst = dyn_cast(U)) + if (InstrToDFSNum(UInst) != 0 && okayForPHIOfOps(UInst)) + PHINodeUses.insert(UInst); // Don't insert void terminators into the class. We don't value number // them, and they just end up sitting in TOP. if (isa(I) && I.getType()->isVoidTy()) @@ -2303,12 +2571,36 @@ CongruenceClasses[i] = nullptr; } + // Destroy the value expressions + SmallVector TempInst(AllTempInstructions.begin(), + AllTempInstructions.end()); + AllTempInstructions.clear(); + + // We have to drop all references for everything first, so there are no uses + // left as we delete them. + for (auto *I : TempInst) { + I->dropAllReferences(); + } + + while (!TempInst.empty()) { + auto *I = TempInst.back(); + TempInst.pop_back(); + delete I; + } + ValueToClass.clear(); ArgRecycler.clear(ExpressionAllocator); ExpressionAllocator.Reset(); CongruenceClasses.clear(); ExpressionToClass.clear(); ValueToExpression.clear(); + RealToTemp.clear(); + TempToReal.clear(); + AdditionalUsers.clear(); + ExpressionToPhiOfOps.clear(); + TempToBlock.clear(); + TempToMemory.clear(); + PHIOfOpsPHIs.clear(); ReachableBlocks.clear(); ReachableEdges.clear(); #ifndef NDEBUG @@ -2324,14 +2616,17 @@ MemoryToUsers.clear(); } +// Assign local DFS number mapping to instructions, and leave space for Value +// PHI's. std::pair NewGVN::assignDFSNumbers(BasicBlock *B, unsigned Start) { unsigned End = Start; - if (MemoryAccess *MemPhi = MSSA->getMemoryAccess(B)) { + if (MemoryAccess *MemPhi = getMemoryAccess(B)) { InstrDFS[MemPhi] = End++; DFSToInstr.emplace_back(MemPhi); } + // Then the real block goes next. for (auto &I : *B) { // There's no need to call isInstructionTriviallyDead more than once on // an instruction. Therefore, once we know that an instruction is dead @@ -2342,7 +2637,6 @@ markInstructionForDeletion(&I); continue; } - InstrDFS[&I] = End++; DFSToInstr.emplace_back(&I); } @@ -2353,7 +2647,7 @@ return std::make_pair(Start, End); } -void NewGVN::updateProcessedCount(Value *V) { +void NewGVN::updateProcessedCount(const Value *V) { #ifndef NDEBUG if (ProcessedCount.count(V) == 0) { ProcessedCount.insert({V, 1}); @@ -2372,7 +2666,7 @@ const BasicBlock *PHIBlock = MP->getBlock(); auto Filtered = make_filter_range(MP->operands(), [&](const Use &U) { return lookupMemoryLeader(cast(U)) != MP && - !isMemoryAccessTop(cast(U)) && + !isMemoryAccessTOP(cast(U)) && ReachableEdges.count({MP->getIncomingBlock(U), PHIBlock}); }); // If all that is left is nothing, our memoryphi is undef. We keep it as @@ -2425,18 +2719,17 @@ DEBUG(dbgs() << "Processing instruction " << *I << "\n"); if (!I->isTerminator()) { const Expression *Symbolized = nullptr; + SmallPtrSet Visited; if (DebugCounter::shouldExecute(VNCounter)) { - Symbolized = performSymbolicEvaluation(I); + Symbolized = performSymbolicEvaluation(I, Visited); } else { // Mark the instruction as unused so we don't value number it again. InstrDFS[I] = 0; } // If we couldn't come up with a symbolic expression, use the unknown // expression - if (Symbolized == nullptr) { + if (Symbolized == nullptr) Symbolized = createUnknownExpression(I); - } - performCongruenceFinding(I, Symbolized); } else { // Handle terminators that return values. All of them produce values we @@ -2527,13 +2820,15 @@ return false; if (auto *MemDef = dyn_cast(Pair.first)) return !isInstructionTriviallyDead(MemDef->getMemoryInst()); + if (auto *MemPhi = dyn_cast(Pair.first)) + return all_of(MemPhi->operands(), [&](const Value *V) { + return isMemoryAccessTOP(cast(V)); + }); return true; }; auto Filtered = make_filter_range(MemoryAccessToClass, ReachableAccessPred); for (auto KV : Filtered) { - assert(KV.second != TOPClass && - "Memory not unreachable but ended up in TOP"); if (auto *FirstMUD = dyn_cast(KV.first)) { auto *SecondMUD = dyn_cast(KV.second->getMemoryLeader()); if (FirstMUD && SecondMUD) @@ -2627,7 +2922,7 @@ // Nothing set, nothing to iterate, just return. if (FirstInstr == -1) return; - BasicBlock *LastBlock = getBlockForValue(InstrFromDFSNum(FirstInstr)); + const BasicBlock *LastBlock = getBlockForValue(InstrFromDFSNum(FirstInstr)); while (TouchedInstructions.any()) { ++Iterations; // Walk through all the instructions in all the blocks in RPO. @@ -2645,7 +2940,7 @@ } Value *V = InstrFromDFSNum(InstrNum); - BasicBlock *CurrBlock = getBlockForValue(V); + const BasicBlock *CurrBlock = getBlockForValue(V); // If we hit a new block, do reachability processing. if (CurrBlock != LastBlock) { @@ -2726,6 +3021,7 @@ BlockInstRange.insert({B, BlockRange}); ICount += BlockRange.second - BlockRange.first; } + initializeCongruenceClasses(F); TouchedInstructions.resize(ICount); // Ensure we don't end up resizing the expressionToClass map, as @@ -2736,9 +3032,10 @@ // Initialize the touched instructions to include the entry block. const auto &InstRange = BlockInstRange.lookup(&F.getEntryBlock()); TouchedInstructions.set(InstRange.first, InstRange.second); + DEBUG(dbgs() << "Block " << getBlockName(&F.getEntryBlock()) + << " marked reachable\n"); ReachableBlocks.insert(&F.getEntryBlock()); - initializeCongruenceClasses(F); iterateTouchedInstructions(); verifyMemoryCongruency(); verifyIterationSettled(F); @@ -2750,7 +3047,8 @@ if (!ToErase->use_empty()) ToErase->replaceAllUsesWith(UndefValue::get(ToErase->getType())); - ToErase->eraseFromParent(); + if (ToErase->getParent()) + ToErase->eraseFromParent(); } // Delete all unreachable blocks. @@ -2769,14 +3067,6 @@ return Changed; } -// Return true if V is a value that will always be available (IE can -// be placed anywhere) in the function. We don't do globals here -// because they are often worse to put in place. -// TODO: Separate cost from availability -static bool alwaysAvailable(Value *V) { - return isa(V) || isa(V); -} - struct NewGVN::ValueDFS { int DFSIn = 0; int DFSOut = 0; @@ -2866,9 +3156,21 @@ } assert(isa(D) && "The dense set member should always be an instruction"); - VDDef.LocalNum = InstrToDFSNum(D); - DFSOrderedSet.emplace_back(VDDef); Instruction *Def = cast(D); + VDDef.LocalNum = InstrToDFSNum(D); + DFSOrderedSet.push_back(VDDef); + // If there is a phi node equivalent, add it + if (auto *PN = RealToTemp.lookup(Def)) { + auto *PHIE = + dyn_cast_or_null(ValueToExpression.lookup(Def)); + if (PHIE) { + VDDef.Def.setInt(false); + VDDef.Def.setPointer(PN); + VDDef.LocalNum = 0; + DFSOrderedSet.push_back(VDDef); + } + } + unsigned int UseCount = 0; // Now add the uses. for (auto &U : Def->uses()) { @@ -2885,7 +3187,7 @@ // they are from. VDUse.LocalNum = InstrDFS.size() + 1; } else { - IBlock = I->getParent(); + IBlock = getBlockForValue(I); VDUse.LocalNum = InstrToDFSNum(I); } @@ -3055,6 +3357,37 @@ }; } +// Given a value and a basic block we are trying to see if it is available in, +// see if the value has a leader available in that block. +Value *NewGVN::findPhiOfOpsLeader(const Expression *E, + const BasicBlock *BB) const { + // It would already be constant if we could make it constant + if (auto *CE = dyn_cast(E)) + return CE->getConstantValue(); + if (auto *VE = dyn_cast(E)) + return VE->getVariableValue(); + + auto *CC = ExpressionToClass.lookup(E); + if (!CC) + return nullptr; + if (alwaysAvailable(CC->getLeader())) + return CC->getLeader(); + + for (auto Member : *CC) { + auto *MemberInst = dyn_cast(Member); + // Anything that isn't an instruction is always available. + if (!MemberInst) + return Member; + // If we are looking for something in the same block as the member, it must + // be a leader because this function is looking for operands for a phi node. + if (MemberInst->getParent() == BB || + DT->dominates(MemberInst->getParent(), BB)) { + return Member; + } + } + return nullptr; +} + bool NewGVN::eliminateInstructions(Function &F) { // This is a non-standard eliminator. The normal way to eliminate is // to walk the dominator tree in order, keeping track of available @@ -3084,26 +3417,45 @@ // Since we are going to walk the domtree anyway, and we can't guarantee the // DFS numbers are updated, we compute some ourselves. DT->updateDFSNumbers(); - - for (auto &B : F) { - if (!ReachableBlocks.count(&B)) { - for (const auto S : successors(&B)) { - for (auto II = S->begin(); isa(II); ++II) { - auto &Phi = cast(*II); - DEBUG(dbgs() << "Replacing incoming value of " << *II << " for block " - << getBlockName(&B) - << " with undef due to it being unreachable\n"); - for (auto &Operand : Phi.incoming_values()) - if (Phi.getIncomingBlock(Operand) == &B) - Operand.set(UndefValue::get(Phi.getType())); + auto ReplaceUnreachablePHIArgs = [&](PHINode &PHI, BasicBlock *BB) { + for (auto &Operand : PHI.incoming_values()) + if (!ReachableEdges.count({PHI.getIncomingBlock(Operand), BB})) { + DEBUG(dbgs() << "Replacing incoming value of " << PHI << " for block " + << getBlockName(PHI.getIncomingBlock(Operand)) + << " with undef due to it being unreachable\n"); + Operand.set(UndefValue::get(PHI.getType())); + } + }; + SmallPtrSet BlocksWithPhis; + for (auto &B : F) + if ((!B.empty() && isa(*B.begin())) || + (PHIOfOpsPHIs.find(&B) != PHIOfOpsPHIs.end())) + BlocksWithPhis.insert(&B); + DenseMap ReachablePredCount; + for (auto KV : ReachableEdges) + ReachablePredCount[KV.getEnd()]++; + for (auto *BB : BlocksWithPhis) + // TODO: It would be faster to use getNumIncomingBlocks() on a phi node in + // the block and subtract the pred count, but it's more complicated. + if (ReachablePredCount.lookup(BB) != + std::distance(pred_begin(BB), pred_end(BB))) { + for (auto II = BB->begin(); isa(II); ++II) { + auto &PHI = cast(*II); + ReplaceUnreachablePHIArgs(PHI, BB); + } + auto PHIResult = PHIOfOpsPHIs.find(BB); + if (PHIResult != PHIOfOpsPHIs.end()) { + auto &PHIs = PHIResult->second; + for (auto I : PHIs) { + auto *PHI = dyn_cast(I); + ReplaceUnreachablePHIArgs(*PHI, BB); } } } - } // Map to store the use counts DenseMap UseCounts; - for (CongruenceClass *CC : reverse(CongruenceClasses)) { + for (auto *CC : reverse(CongruenceClasses)) { // Track the equivalent store info so we can decide whether to try // dead store elimination. SmallVector PossibleDeadStores; @@ -3151,7 +3503,7 @@ DEBUG(dbgs() << "Eliminating in congruence class " << CC->getID() << "\n"); // If this is a singleton, we can skip it. - if (CC->size() != 1) { + if (CC->size() != 1 || RealToTemp.lookup(Leader)) { // This is a stack because equality replacement/etc may place // constants in the middle of the member list, and we want to use // those constant values in preference to the current leader, over @@ -3173,6 +3525,21 @@ // We ignore void things because we can't get a value from them. if (Def && Def->getType()->isVoidTy()) continue; + auto *DefInst = dyn_cast_or_null(Def); + if (DefInst && AllTempInstructions.count(DefInst)) { + auto *PN = cast(DefInst); + + // If this is a value phi and that's the expression we used, insert + // it into the program + // remove from temp instruction list. + AllTempInstructions.erase(PN); + auto *DefBlock = getBlockForValue(Def); + DEBUG(dbgs() << "Inserting fully real phi of ops" << *Def + << " into block " + << getBlockName(getBlockForValue(Def)) << "\n"); + PN->insertBefore(&DefBlock->front()); + Def = PN; + } if (EliminationStack.empty()) { DEBUG(dbgs() << "Elimination Stack is empty\n"); @@ -3331,7 +3698,6 @@ } } } - return AnythingReplaced; } @@ -3341,11 +3707,12 @@ // we will simplify an operation with all constants so that it doesn't matter // what order they appear in. unsigned int NewGVN::getRank(const Value *V) const { - // Prefer undef to anything else + // Prefer constants to undef to anything else + // Undef is a constant, have to check it first. if (isa(V)) - return 0; - if (isa(V)) return 1; + if (isa(V)) + return 0; else if (auto *A = dyn_cast(V)) return 2 + A->getArgNo(); Index: lib/Transforms/Scalar/gvndiff =================================================================== --- /dev/null +++ lib/Transforms/Scalar/gvndiff @@ -0,0 +1,2726 @@ +diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp +index 5aa8be6..7574deb 100644 +--- a/lib/Transforms/Scalar/NewGVN.cpp ++++ b/lib/Transforms/Scalar/NewGVN.cpp +@@ -72,23 +72,9 @@ using namespace llvm::GVNExpression; + + STATISTIC(NumGVNInstrDeleted, "Number of instructions deleted"); + STATISTIC(NumGVNBlocksDeleted, "Number of blocks deleted"); +-STATISTIC(NumNewGVNPRE, "Number of instructions PRE'd"); +-// STATISTIC(NumNewGVNBlocks, "Number of blocks merged"); +-// STATISTIC(NumNewGVNSimpl, "Number of instructions simplified"); +-STATISTIC(NumGVNEqProp, "Number of equalities propagated"); +-STATISTIC(NumPRELoad, "Number of loads PRE'd"); + STATISTIC(NumGVNOpsSimplified, "Number of Expressions simplified"); + STATISTIC(NumGVNPhisAllSame, "Number of PHIs whos arguments are all the same"); + +-static cl::opt EnablePRE("enable-pre2", cl::init(true), cl::Hidden); +-static cl::opt EnableLoadPRE("enable-load-pre2", cl::init(true)); +- +-// Maximum allowed recursion depth. +-static cl::opt +- MaxRecurseDepth("max-recurse-depth2", cl::Hidden, cl::init(1000), +- cl::ZeroOrMore, +- cl::desc("Max recurse depth (default = 1000)")); +- + //===----------------------------------------------------------------------===// + // GVN Pass + //===----------------------------------------------------------------------===// +@@ -124,11 +110,6 @@ struct CongruenceClass { + const Expression *DefiningExpr; + // Actual members of this class. These are the things the same everywhere + MemberSet Members; +- // Coercible members of this class. These are loads where we can pull the +- // value out of a store. This means they need special processing during +- // elimination to do this, but they are otherwise the same as members, +- // in particular, we can eliminate one in favor of a dominating one. +- MemberSet CoercibleMembers; + + // True if this class has no members left. This is mainly used for assertion + // purposes, and for skipping empty classes. +@@ -140,68 +121,7 @@ struct CongruenceClass { + : ID(ID), RepLeader(Leader), DefiningExpr(E), Dead(false) {} + }; + +-struct EdgePredicate { +- // First OP Second == Val +- Value *First; +- CmpInst::Predicate Op; +- Value *Second; +- Value *Val; +-}; +- +-bool operator==(const EdgePredicate &LHS, const EdgePredicate &RHS) { +- return LHS.First == RHS.First && LHS.Op == RHS.Op && +- LHS.Second == RHS.Second && LHS.Val == RHS.Val; +-} +- +-template <> struct DenseMapInfo { +- typedef DenseMapInfo DMI; +- static inline const EdgePredicate getEmptyKey() { +- return {nullptr, CmpInst::BAD_ICMP_PREDICATE, nullptr, nullptr}; +- } +- static inline const EdgePredicate getTombstoneKey() { +- return {nullptr, CmpInst::BAD_FCMP_PREDICATE, nullptr, nullptr}; +- } +- static unsigned getHashValue(const EdgePredicate &V) { +- +- return hash_combine(DMI::getHashValue(V.First), +- static_cast(V.Op), +- DMI::getHashValue(V.Second), DMI::getHashValue(V.Val)); +- } +- static bool isEqual(const EdgePredicate &First, const EdgePredicate &Second) { +- return First == Second; +- } +-}; +- +-bool operator!=(const EdgePredicate &LHS, const EdgePredicate &RHS) { +- return !(LHS == RHS); +-} +- +-template <> struct DenseMapInfo { +- static inline const Expression *getEmptyKey() { +- uintptr_t Val = static_cast(-1); +- Val <<= PointerLikeTypeTraits::NumLowBitsAvailable; +- return reinterpret_cast(Val); +- } +- static inline const Expression *getTombstoneKey() { +- uintptr_t Val = static_cast(-2); +- Val <<= PointerLikeTypeTraits::NumLowBitsAvailable; +- return reinterpret_cast(Val); +- } +- static unsigned getHashValue(const Expression *V) { +- return static_cast(V->getHashValue()); +- } +- static bool isEqual(const Expression *LHS, const Expression *RHS) { +- if (LHS == RHS) +- return true; +- if (LHS == getTombstoneKey() || RHS == getTombstoneKey() || +- LHS == getEmptyKey() || RHS == getEmptyKey()) +- return false; +- return *LHS == *RHS; +- } +-}; +- + class NewGVN : public FunctionPass { +- MemoryDependenceResults *MD; + DominatorTree *DT; + const DataLayout *DL; + const TargetLibraryInfo *TLI; +@@ -221,31 +141,32 @@ class NewGVN : public FunctionPass { + DenseMap ValueToClass; + DenseMap ValueToExpression; + +- struct expression_equal_to { +- bool operator()(const Expression *A, const Expression *B) const { +- if (A == B) ++ struct ComparingExpressionInfo : public DenseMapInfo { ++ static unsigned getHashValue(const Expression *V) { ++ return static_cast(V->getHashValue()); ++ } ++ static bool isEqual(const Expression *LHS, const Expression *RHS) { ++ if (LHS == RHS) + return true; +- return *A == *B; ++ if (LHS == getTombstoneKey() || RHS == getTombstoneKey() || ++ LHS == getEmptyKey() || RHS == getEmptyKey()) ++ return false; ++ return *LHS == *RHS; + } + }; +- struct hash_expression { +- size_t operator()(const Expression *A) const { return A->getHashValue(); } +- }; + + // Expression to class mapping +- typedef DenseMap ExpressionClassMap; ++ typedef DenseMap ++ ExpressionClassMap; + ExpressionClassMap ExpressionToClass; + + // Which values have changed as a result of leader changes + SmallPtrSet ChangedValues; + + // Reachability info +- typedef BasicBlockEdge BlockEdge; +- DenseMap> EdgePredicates; +- DenseSet ReachableEdges; ++ DenseSet> ReachableEdges; + SmallPtrSet ReachableBlocks; +- // Map from a block to which single incoming edge is reachable, if any. +- SmallDenseMap SingleReachableIncoming; + // This is a bitvector because, on larger functions, we may have + // thousands of touched instructions at once (entire blocks, + // instructions with hundreds of uses, etc). Even with optimization +@@ -256,9 +177,6 @@ class NewGVN : public FunctionPass { + // individual and ranges, as well as "find next element" This + // enables us to use it as a worklist with essentially 0 cost. + BitVector TouchedInstructions; +- // We mark which instructions were affected by an equivalence, so we only have +- // to revisit those instructions when an edge change occurs +- BitVector InvolvedInEquivalence; + DenseMap> BlockInstRange; + DenseMap> + DominatedInstRange; +@@ -272,28 +190,17 @@ class NewGVN : public FunctionPass { + + // Deletion info + SmallPtrSet InstructionsToErase; +- // This is a mapping from Load to (offset into source, coercion source) +- DenseMap> CoercionInfo; +- // This is a mapping for loads that got widened, to the new load. This ensures +- // we coerce from the new widened load, instead of the old one. Otherwise, we +- // may try to widen the same old load multiple times. +- DenseMap CoercionForwarding; +- +- // This is used by PRE to forward values when they get replaced +- // Because we don't update the expressions, ValueToExpression will point to +- // expressions which have the old arguments in them +- DenseMap PREValueForwarding; +- PredIteratorCache PredCache; + + public: + static char ID; // Pass identification, replacement for typeid +- explicit NewGVN() : FunctionPass(ID), MD(nullptr) { ++ explicit NewGVN() : FunctionPass(ID) { + initializeNewGVNPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + bool runGVN(Function &F, DominatorTree *DT, AssumptionCache *AC, +- TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA); ++ TargetLibraryInfo *TLI, AliasAnalysis *AA, ++ MemorySSA *MSSA); + + private: + // This transformation requires dominator postdominator info +@@ -315,16 +222,13 @@ private: + bool setBasicExpressionInfo(Instruction *, BasicExpression *, + const BasicBlock *); + PHIExpression *createPHIExpression(Instruction *); +- const VariableExpression *createVariableExpression(Value *, bool); +- const ConstantExpression *createConstantExpression(Constant *, bool); ++ const VariableExpression *createVariableExpression(Value *); ++ const ConstantExpression *createConstantExpression(Constant *); + const Expression *createVariableOrConstant(Value *V, const BasicBlock *B); + const StoreExpression *createStoreExpression(StoreInst *, MemoryAccess *, + const BasicBlock *); + LoadExpression *createLoadExpression(Type *, Value *, LoadInst *, + MemoryAccess *, const BasicBlock *); +- const CoercibleLoadExpression * +- createCoercibleLoadExpression(Type *, Value *, LoadInst *, MemoryAccess *, +- unsigned, Value *, const BasicBlock *); + + const CallExpression *createCallExpression(CallInst *, MemoryAccess *, + const BasicBlock *); +@@ -351,12 +255,6 @@ private: + const Expression *checkSimplificationResults(Expression *, Instruction *, + Value *); + const Expression *performSymbolicEvaluation(Value *, const BasicBlock *); +- const Expression *performSymbolicLoadCoercionFromPhi(Type *, Value *, +- LoadInst *, MemoryPhi *, +- const BasicBlock *); +- const Expression *performSymbolicLoadCoercion(Type *, Value *, LoadInst *, +- Instruction *, MemoryAccess *, +- const BasicBlock *); + const Expression *performSymbolicLoadEvaluation(Instruction *, + const BasicBlock *); + const Expression *performSymbolicStoreEvaluation(Instruction *, +@@ -367,198 +265,37 @@ private: + const BasicBlock *); + const Expression *performSymbolicAggrValueEvaluation(Instruction *, + const BasicBlock *); +- int analyzeLoadFromClobberingStore(Type *, Value *, StoreInst *); +- int analyzeLoadFromClobberingLoad(Type *, Value *, LoadInst *); +- int analyzeLoadFromClobberingMemInst(Type *, Value *, MemIntrinsic *); +- int analyzeLoadFromClobberingWrite(Type *, Value *, Value *, uint64_t); ++ + // Congruence finding + // Templated to allow them to work both on BB's and BB-edges + template +- std::pair lookupOperandLeader(Value *, const T &) const; +- ++ Value *lookupOperandLeader(Value *, const User *, const T &) const; + void performCongruenceFinding(Value *, const Expression *); +- // Predicate and reachability handling ++ ++ // Reachability handling + void updateReachableEdge(BasicBlock *, BasicBlock *); + void processOutgoingEdges(TerminatorInst *, BasicBlock *); +- void propagateChangeInEdge(BasicBlock *); + bool isOnlyReachableViaThisEdge(const BasicBlockEdge &); +- + Value *findConditionEquivalence(Value *, BasicBlock *) const; +- const std::pair +- calculateDominatedInstRange(const DomTreeNode *); +- Value *inferValueAtBlock(Value *V, const BasicBlock *B) const; +- Value *inferValueOfPredicate(Value *V, const BasicBlock *B) const; ++ + // Elimination + struct ValueDFS; + void convertDenseToDFSOrdered(CongruenceClass::MemberSet &, +- std::vector &, bool); ++ std::vector &); + + bool eliminateInstructions(Function &); + void replaceInstruction(Instruction *, Value *); + void markInstructionForDeletion(Instruction *); + void deleteInstructionsInBlock(BasicBlock *); +- bool canCoerceMustAliasedValueToLoad(Value *, Type *); +- Value *coerceAvailableValueToLoadType(Value *, Type *, Instruction *); +- Value *getStoreValueForLoad(Value *, unsigned, Type *, Instruction *); +- Value *getLoadValueForLoad(LoadInst *, unsigned, Type *, Instruction *); +- Value *getMemInstValueForLoad(MemIntrinsic *, unsigned, Type *, Instruction *, +- bool NoNewInst = false); +- Value *coerceLoad(Value *); ++ + // New instruction creation + void handleNewInstruction(Instruction *){}; + void markUsersTouched(Value *); ++ + // Utilities + void cleanupTables(); + std::pair assignDFSNumbers(BasicBlock *, unsigned); +- +- /// Represents a particular available value that we know how to materialize. +- /// Materialization of an AvailableValue never fails. An AvailableValue is +- /// implicitly associated with a rematerialization point which is the +- /// location of the instruction from which it was formed. +- struct AvailableValue { +- enum ValType { +- SimpleVal, // A simple offsetted value that is accessed. +- LoadVal, // A value produced by a load. +- MemIntrin, // A memory intrinsic which is loaded from. +- UndefVal // A UndefValue representing a value from dead block (which +- // is not yet physically removed from the CFG). +- }; +- +- /// V - The value that is live out of the block. +- PointerIntPair Val; +- +- /// Offset - The byte offset in Val that is interesting for the load query. +- unsigned Offset; +- +- static AvailableValue get(Value *V, unsigned Offset = 0) { +- AvailableValue Res; +- Res.Val.setPointer(V); +- Res.Val.setInt(SimpleVal); +- Res.Offset = Offset; +- return Res; +- } +- +- static AvailableValue getMI(MemIntrinsic *MI, unsigned Offset = 0) { +- AvailableValue Res; +- Res.Val.setPointer(MI); +- Res.Val.setInt(MemIntrin); +- Res.Offset = Offset; +- return Res; +- } +- +- static AvailableValue getLoad(LoadInst *LI, unsigned Offset = 0) { +- AvailableValue Res; +- Res.Val.setPointer(LI); +- Res.Val.setInt(LoadVal); +- Res.Offset = Offset; +- return Res; +- } +- +- static AvailableValue getUndef() { +- AvailableValue Res; +- Res.Val.setPointer(nullptr); +- Res.Val.setInt(UndefVal); +- Res.Offset = 0; +- return Res; +- } +- +- bool isSimpleValue() const { return Val.getInt() == SimpleVal; } +- bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; } +- bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; } +- bool isUndefValue() const { return Val.getInt() == UndefVal; } +- +- Value *getSimpleValue() const { +- assert(isSimpleValue() && "Wrong accessor"); +- return Val.getPointer(); +- } +- +- LoadInst *getCoercedLoadValue() const { +- assert(isCoercedLoadValue() && "Wrong accessor"); +- return cast(Val.getPointer()); +- } +- +- MemIntrinsic *getMemIntrinValue() const { +- assert(isMemIntrinValue() && "Wrong accessor"); +- return cast(Val.getPointer()); +- } +- +- /// Emit code at the specified insertion point to adjust the value defined +- /// here to the specified type. This handles various coercion cases. +- Value *MaterializeAdjustedValue(LoadInst *LI, Instruction *InsertPt, +- NewGVN &gvn) const; +- }; +- +- /// Represents an AvailableValue which can be rematerialized at the end of +- /// the associated BasicBlock. +- struct AvailableValueInBlock { +- /// BB - The basic block in question. +- BasicBlock *BB; +- +- /// AV - The actual available value +- AvailableValue AV; +- +- static AvailableValueInBlock get(BasicBlock *BB, AvailableValue &&AV) { +- AvailableValueInBlock Res; +- Res.BB = BB; +- Res.AV = std::move(AV); +- return Res; +- } +- +- static AvailableValueInBlock get(BasicBlock *BB, Value *V, +- unsigned Offset = 0) { +- return get(BB, AvailableValue::get(V, Offset)); +- } +- static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI, +- unsigned Offset = 0) { +- return get(BB, AvailableValue::getMI(MI, Offset)); +- } +- static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI, +- unsigned Offset = 0) { +- return get(BB, AvailableValue::getLoad(LI, Offset)); +- } +- static AvailableValueInBlock getUndef(BasicBlock *BB) { +- return get(BB, AvailableValue::getUndef()); +- } +- +- /// Emit code at the end of this block to adjust the value defined here to +- /// the specified type. This handles various coercion cases. +- Value *MaterializeAdjustedValue(LoadInst *LI, NewGVN &gvn) const { +- return AV.MaterializeAdjustedValue(LI, BB->getTerminator(), gvn); +- } +- }; +- +- // PRE +- typedef SmallVector AvailValInBlkVect; +- typedef SmallVector, 64> +- UnavailBlkVect; +- typedef SmallDenseMap +- AvailValInBlkMap; +- typedef DenseMap FullyAvailableMap; +- +- bool isValueFullyAvailableInBlock(BasicBlock *, FullyAvailableMap &, +- uint32_t); +- Value *findPRELeader(Value *, const BasicBlock *, const Value *); +- Value *findPRELeader(const Expression *, const BasicBlock *, const Value *); +- bool phiTranslateArguments(const BasicExpression *, BasicExpression *, +- const BasicBlock *, const Value *); +- MemoryAccess *phiTranslateMemoryAccess(MemoryAccess *, const BasicBlock *); +- const Expression *phiTranslateExpression(const Expression *E, BasicBlock *, +- BasicBlock *, const Value *); +- BasicBlock *splitCriticalEdges(BasicBlock *, BasicBlock *); +- void analyzeAvailability(Instruction *, AvailValInBlkVect &, +- UnavailBlkVect &); +- bool performPRE(Instruction *, AvailValInBlkVect &, UnavailBlkVect &); +- bool performPREOnClass(CongruenceClass *); +- Value *constructSSAForSet(Instruction *, +- SmallVectorImpl &); +- void valueNumberNewInstruction(Value *); +- void valueNumberNewInstructionToValue(Value *, Value *); +- const Expression *trySimplifyPREExpression(const Expression *, +- const BasicBlock *); +- Value *regenerateExpression(const Expression *, BasicBlock *); +- void topoVisitCongruenceClass(CongruenceClass *, +- SmallDenseMap &, +- SmallPtrSetImpl &); ++ void updateProcessedCount(Value *V); + }; + + char NewGVN::ID = 0; +@@ -570,8 +307,8 @@ FunctionPass *llvm::createNewGVNPass() { return new NewGVN(); } + static std::string getBlockName(const BasicBlock *B) { + return DOTGraphTraits::getSimpleNodeLabel(B, NULL); + } +- + #endif ++ + INITIALIZE_PASS_BEGIN(NewGVN, "newgvn", "Global Value Numbering", false, false) + INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) + INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) +@@ -580,7 +317,6 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) + INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) + INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) + INITIALIZE_PASS_END(NewGVN, "newgvn", "Global Value Numbering", false, false) +- + PHIExpression *NewGVN::createPHIExpression(Instruction *I) { + BasicBlock *PhiBlock = I->getParent(); + PHINode *PN = cast(I); +@@ -590,7 +326,6 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I) { + E->allocateOperands(ArgRecycler, ExpressionAllocator); + E->setType(I->getType()); + E->setOpcode(I->getOpcode()); +- bool UsedEquiv = false; + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + BasicBlock *B = PN->getIncomingBlock(i); + if (!ReachableBlocks.count(B)) { +@@ -600,14 +335,12 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I) { + } + if (I->getOperand(i) != I) { + const BasicBlockEdge BBE(B, PhiBlock); +- auto Operand = lookupOperandLeader(I->getOperand(i), BBE); +- E->ops_push_back(Operand.first); +- UsedEquiv |= Operand.second; ++ auto Operand = lookupOperandLeader(I->getOperand(i), I, BBE); ++ E->ops_push_back(Operand); + } else { + E->ops_push_back(I->getOperand(i)); + } + } +- E->setUsedInference(UsedEquiv); + return E; + } + +@@ -617,7 +350,6 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I) { + bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E, + const BasicBlock *B) { + bool AllConstant = true; +- bool UsedInference = false; + if (auto GEP = dyn_cast(I)) + E->setType(GEP->getSourceElementType()); + else +@@ -626,14 +358,11 @@ bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E, + E->allocateOperands(ArgRecycler, ExpressionAllocator); + + for (auto &O : I->operands()) { +- auto Res = inferValueAtBlock(O, B); +- auto Operand = lookupOperandLeader(O, B); +- UsedInference |= Operand.second; +- if (!isa(Operand.first)) ++ auto Operand = lookupOperandLeader(O, I, B); ++ if (!isa(Operand)) + AllConstant = false; +- E->ops_push_back(Operand.first); ++ E->ops_push_back(Operand); + } +- E->setUsedInference(UsedInference); + return AllConstant; + } + +@@ -653,14 +382,10 @@ const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T, + if (Arg1 > Arg2) + std::swap(Arg1, Arg2); + } +- bool UsedEquiv = false; +- auto BinaryLeader = lookupOperandLeader(Arg1, B); +- UsedEquiv |= BinaryLeader.second; +- E->ops_push_back(BinaryLeader.first); +- +- BinaryLeader = lookupOperandLeader(Arg2, B); +- UsedEquiv |= BinaryLeader.second; +- E->ops_push_back(BinaryLeader.first); ++ auto BinaryLeader = lookupOperandLeader(Arg1, nullptr, B); ++ E->ops_push_back(BinaryLeader); ++ BinaryLeader = lookupOperandLeader(Arg2, nullptr, B); ++ E->ops_push_back(BinaryLeader); + + Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), *DL, TLI, + DT, AC); +@@ -690,7 +415,7 @@ const Expression *NewGVN::checkSimplificationResults(Expression *E, + + cast(E)->deallocateOperands(ArgRecycler); + ExpressionAllocator.Deallocate(E); +- return createConstantExpression(C, E->usedEquivalence()); ++ return createConstantExpression(C); + } else if (isa(V) || isa(V)) { + #ifndef NDEBUG + if (I) +@@ -699,7 +424,7 @@ const Expression *NewGVN::checkSimplificationResults(Expression *E, + #endif + cast(E)->deallocateOperands(ArgRecycler); + ExpressionAllocator.Deallocate(E); +- return createVariableExpression(V, E->usedEquivalence()); ++ return createVariableExpression(V); + } + + CongruenceClass *CC = ValueToClass.lookup(V); +@@ -842,26 +567,24 @@ NewGVN::createAggregateValueExpression(Instruction *I, const BasicBlock *B) { + } + + const VariableExpression * +-NewGVN::createVariableExpression(Value *V, bool UsedEquivalence) { ++NewGVN::createVariableExpression(Value *V) { + VariableExpression *E = new (ExpressionAllocator) VariableExpression(V); + E->setOpcode(V->getValueID()); +- E->setUsedInference(UsedEquivalence); + return E; + } + + const Expression *NewGVN::createVariableOrConstant(Value *V, + const BasicBlock *B) { +- auto Leader = lookupOperandLeader(V, B); +- if (Constant *C = dyn_cast(Leader.first)) +- return createConstantExpression(C, Leader.second); +- return createVariableExpression(Leader.first, Leader.second); ++ auto Leader = lookupOperandLeader(V, nullptr, B); ++ if (Constant *C = dyn_cast(Leader)) ++ return createConstantExpression(C); ++ return createVariableExpression(Leader); + } + + const ConstantExpression * +-NewGVN::createConstantExpression(Constant *C, bool UsedEquivalence) { ++NewGVN::createConstantExpression(Constant *C) { + ConstantExpression *E = new (ExpressionAllocator) ConstantExpression(C); + E->setOpcode(C->getValueID()); +- E->setUsedInference(UsedEquivalence); + return E; + } + +@@ -876,15 +599,14 @@ const CallExpression *NewGVN::createCallExpression(CallInst *CI, + + // lookupOperandLeader -- See if we have a congruence class and leader + // for this operand, and if so, return it. Otherwise, return the +-// original operand. The second part of the return value is true if a +-// dominating equivalence is being returned. ++// original operand. + template +-std::pair NewGVN::lookupOperandLeader(Value *V, +- const T &B) const { ++Value *NewGVN::lookupOperandLeader(Value *V, const User *U, ++ const T &B) const { + CongruenceClass *CC = ValueToClass.lookup(V); + if (CC && (CC != InitialClass)) +- return std::make_pair(CC->RepLeader, false); +- return std::make_pair(V, false); ++ return CC->RepLeader; ++ return V; + } + + LoadExpression *NewGVN::createLoadExpression(Type *LoadType, Value *PointerOp, +@@ -895,30 +617,11 @@ LoadExpression *NewGVN::createLoadExpression(Type *LoadType, Value *PointerOp, + E->setType(LoadType); + // Give store and loads same opcode so they value number together + E->setOpcode(0); +- auto Operand = lookupOperandLeader(PointerOp, B); +- E->ops_push_back(Operand.first); ++ auto Operand = lookupOperandLeader(PointerOp, LI, B); ++ E->ops_push_back(Operand); + if (LI) + E->setAlignment(LI->getAlignment()); +- E->setUsedInference(Operand.second); +- +- // TODO: Value number heap versions. We may be able to discover +- // things alias analysis can't on it's own (IE that a store and a +- // load have the same value, and thus, it isn't clobbering the load) +- return E; +-} + +-const CoercibleLoadExpression *NewGVN::createCoercibleLoadExpression( +- Type *LoadType, Value *PtrOperand, LoadInst *Original, MemoryAccess *DA, +- unsigned Offset, Value *SrcVal, const BasicBlock *B) { +- CoercibleLoadExpression *E = new (ExpressionAllocator) +- CoercibleLoadExpression(1, Original, DA, Offset, SrcVal); +- E->allocateOperands(ArgRecycler, ExpressionAllocator); +- E->setType(LoadType); +- // Give store and loads same opcode so they value number together +- E->setOpcode(0); +- auto Operand = lookupOperandLeader(PtrOperand, B); +- E->ops_push_back(Operand.first); +- E->setUsedInference(Operand.second); + // TODO: Value number heap versions. We may be able to discover + // things alias analysis can't on it's own (IE that a store and a + // load have the same value, and thus, it isn't clobbering the load) +@@ -934,300 +637,21 @@ const StoreExpression *NewGVN::createStoreExpression(StoreInst *SI, + E->setType(SI->getValueOperand()->getType()); + // Give store and loads same opcode so they value number together + E->setOpcode(0); +- auto Operand = lookupOperandLeader(SI->getPointerOperand(), B); +- E->ops_push_back(Operand.first); +- E->setUsedInference(Operand.second); ++ auto Operand = lookupOperandLeader(SI->getPointerOperand(), SI, B); ++ E->ops_push_back(Operand); + // TODO: Value number heap versions. We may be able to discover + // things alias analysis can't on it's own (IE that a store and a + // load have the same value, and thus, it isn't clobbering the load) + return E; + } + +-/// This function is called when we have a +-/// memdep query of a load that ends up being a clobbering memory write (store, +-/// memset, memcpy, memmove). This means that the write *may* provide bits used +-/// by the load but we can't be sure because the pointers don't mustalias. +-/// +-/// Check this case to see if there is anything more we can do before we give +-/// up. This returns -1 if we have to give up, or a byte number in the stored +-/// value of the piece that feeds the load. +-int NewGVN::analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, +- Value *WritePtr, +- uint64_t WriteSizeInBits) { +- // If the loaded or stored value is a first class array or struct, don't try +- // to transform them. We need to be able to bitcast to integer. +- if (LoadTy->isStructTy() || LoadTy->isArrayTy()) +- return -1; +- +- int64_t StoreOffset = 0, LoadOffset = 0; +- Value *StoreBase = +- GetPointerBaseWithConstantOffset(WritePtr, StoreOffset, *DL); +- Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, *DL); +- if (StoreBase != LoadBase) +- return -1; +- +- // If the load and store don't overlap at all, the store doesn't provide +- // anything to the load. In this case, they really don't alias at all, AA +- // must have gotten confused. +- uint64_t LoadSize = DL->getTypeSizeInBits(LoadTy); +- +- if ((WriteSizeInBits & 7) | (LoadSize & 7)) +- return -1; +- uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes. +- LoadSize >>= 3; +- +- bool isAAFailure = false; +- if (StoreOffset < LoadOffset) +- isAAFailure = StoreOffset + int64_t(StoreSize) <= LoadOffset; +- else +- isAAFailure = LoadOffset + int64_t(LoadSize) <= StoreOffset; +- +- if (isAAFailure) { +- return -1; +- } +- +- // If the Load isn't completely contained within the stored bits, we don't +- // have all the bits to feed it. We could do something crazy in the future +- // (issue a smaller load then merge the bits in) but this seems unlikely to be +- // valuable. +- if (StoreOffset > LoadOffset || +- StoreOffset + StoreSize < LoadOffset + LoadSize) +- return -1; +- +- // Okay, we can do this transformation. Return the number of bytes into the +- // store that the load is. +- return LoadOffset - StoreOffset; +-} +- +-/// This function is called when we have a +-/// memdep query of a load that ends up being a clobbering store. +-int NewGVN::analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, +- StoreInst *DepSI) { +- +- // Cannot handle reading from store of first-class aggregate yet. +- if (DepSI->getValueOperand()->getType()->isStructTy() || +- DepSI->getValueOperand()->getType()->isArrayTy()) +- return -1; +- +- Value *StorePtr = DepSI->getPointerOperand(); +- uint64_t StoreSize = +- DL->getTypeSizeInBits(DepSI->getValueOperand()->getType()); +- return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, StorePtr, StoreSize); +-} +- +-/// This function is called when we have a +-/// memdep query of a load that ends up being clobbered by another load. See if +-/// the other load can feed into the second load. +-int NewGVN::analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, +- LoadInst *DepLI) { +- // Cannot handle reading from store of first-class aggregate yet. +- if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy()) +- return -1; +- +- Value *DepPtr = DepLI->getPointerOperand(); +- uint64_t DepSize = DL->getTypeSizeInBits(DepLI->getType()); +- int Offset = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize); +- +- if (Offset != -1) { +- // If the size is too large and we will have to widen, ensure we pass the +- // widening rules below +- unsigned SrcValSize = DL->getTypeStoreSize(DepLI->getType()); +- unsigned LoadSize = DL->getTypeStoreSize(LoadTy); +- if (Offset + LoadSize <= SrcValSize) +- return Offset; +- } +- +- // If we have a load/load clobber an DepLI can be widened to cover this load, +- // then we should widen it! +- int64_t LoadOffs = 0; +- const Value *LoadBase = +- GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, *DL); +- unsigned LoadSize = DL->getTypeStoreSize(LoadTy); +- +- unsigned Size = MemoryDependenceResults::getLoadLoadClobberFullWidthSize( +- LoadBase, LoadOffs, LoadSize, DepLI); +- if (Size == 0) +- return -1; +- +- return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size * 8); +-} +- +-int NewGVN::analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, +- MemIntrinsic *MI) { +- // If the mem operation is a non-constant size, we can't handle it. +- ConstantInt *SizeCst = dyn_cast(MI->getLength()); +- if (!SizeCst) +- return -1; +- uint64_t MemSizeInBits = SizeCst->getZExtValue() * 8; +- +- // If this is memset, we just need to see if the offset is valid in the size +- // of the memset.. +- if (MI->getIntrinsicID() == Intrinsic::memset) +- return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), +- MemSizeInBits); +- +- // If we have a memcpy/memmove, the only case we can handle is if this is a +- // copy from constant memory. In that case, we can read directly from the +- // constant memory. +- MemTransferInst *MTI = cast(MI); +- +- Constant *Src = dyn_cast(MTI->getSource()); +- if (!Src) +- return -1; +- +- GlobalVariable *GV = dyn_cast(GetUnderlyingObject(Src, *DL)); +- if (!GV || !GV->isConstant()) +- return -1; +- +- // See if the access is within the bounds of the transfer. +- int Offset = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), +- MemSizeInBits); +- if (Offset == -1) +- return Offset; +- +- unsigned AS = Src->getType()->getPointerAddressSpace(); +- // Otherwise, see if we can constant fold a load from the constant with the +- // offset applied as appropriate. +- Src = +- ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS)); +- Constant *OffsetCst = +- ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); +- Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, +- OffsetCst); +- Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); +- if (ConstantFoldLoadFromConstPtr(Src, LoadTy, *DL)) +- return Offset; +- return -1; +-} +- + const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I, + const BasicBlock *B) { + StoreInst *SI = cast(I); +- const Expression *E = +- createStoreExpression(SI, MSSAWalker->getClobberingMemoryAccess(SI), B); ++ const Expression *E = createStoreExpression(SI, MSSA->getMemoryAccess(SI), B); + return E; + } + +-const Expression * +-NewGVN::performSymbolicLoadCoercionFromPhi(Type *LoadType, Value *LoadPtr, +- LoadInst *LI, MemoryPhi *DefiningPhi, +- const BasicBlock *B) { +- if (!LoadPtr) +- return nullptr; +- AvailValInBlkVect ValuesPerBlock; +- for (auto &Op : DefiningPhi->operands()) { +- BasicBlock *IncomingBlock = DefiningPhi->getIncomingBlock(Op); +- // If the block is not reachable, it's undef +- if (!ReachableBlocks.count(IncomingBlock)) { +- ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(IncomingBlock)); +- continue; +- } +- // FIXME: We don't support phi over phi +- if (isa(Op)) +- return nullptr; +- const MemoryDef *Def = cast(Op); +- Instruction *Inst = Def->getMemoryInst(); +- if (!Inst) +- return nullptr; +- // If the dependence is to a store that writes to a superset of the bits +- // read by the load, we can extract the bits we need for the load from the +- // stored value. +- if (StoreInst *DepSI = dyn_cast(Inst)) { +- int Offset = analyzeLoadFromClobberingStore(LoadType, LoadPtr, DepSI); +- if (Offset != -1) { +- ValuesPerBlock.push_back(AvailableValueInBlock::get( +- Def->getBlock(), DepSI->getValueOperand(), Offset)); +- continue; +- } +- } +- // Check to see if we have something like this: +- // load i32* P +- // load i8* (P+1) +- // if we have this, replace the later with an extraction from the former. +- if (LoadInst *DepLI = dyn_cast(Inst)) { +- // If this is a clobber and L is the first instruction in its block, then +- // we have the first instruction in the entry block. +- if (DepLI != LI) { +- int Offset = analyzeLoadFromClobberingLoad(LoadType, LoadPtr, DepLI); +- +- if (Offset != -1) { +- ValuesPerBlock.push_back( +- AvailableValueInBlock::getLoad(Def->getBlock(), DepLI, Offset)); +- continue; +- } +- } +- } +- // If the clobbering value is a memset/memcpy/memmove, see if we can +- // forward a value on from it. +- if (MemIntrinsic *DepMI = dyn_cast(Inst)) { +- int Offset = analyzeLoadFromClobberingMemInst(LoadType, LoadPtr, DepMI); +- if (Offset != -1) { +- Value *PossibleConstant = +- getMemInstValueForLoad(DepMI, Offset, LoadType, LI, true); +- ValuesPerBlock.push_back( +- AvailableValueInBlock::getMI(Def->getBlock(), DepMI, Offset)); +- continue; +- } +- } +- return nullptr; +- } +- assert(ValuesPerBlock.size() == DefiningPhi->getNumIncomingValues()); +- return nullptr; +-} +- +-const Expression *NewGVN::performSymbolicLoadCoercion( +- Type *LoadType, Value *LoadPtr, LoadInst *LI, Instruction *DepInst, +- MemoryAccess *DefiningAccess, const BasicBlock *B) { +- assert((!LI || LI->isSimple()) && "Not a simple load"); +- +- if (StoreInst *DepSI = dyn_cast(DepInst)) { +- Value *LoadAddressLeader = lookupOperandLeader(LoadPtr, B).first; +- Value *StoreAddressLeader = +- lookupOperandLeader(DepSI->getPointerOperand(), B).first; +- Value *StoreVal = DepSI->getValueOperand(); +- if (StoreVal->getType() == LoadType && +- LoadAddressLeader == StoreAddressLeader) { +- return createVariableOrConstant(DepSI->getValueOperand(), B); +- } else { +- int Offset = analyzeLoadFromClobberingStore(LoadType, LoadPtr, DepSI); +- if (Offset >= 0) +- return createCoercibleLoadExpression( +- LoadType, LoadPtr, LI, DefiningAccess, (unsigned)Offset, DepSI, B); +- } +- } else if (LoadInst *DepLI = dyn_cast(DepInst)) { +- int Offset = analyzeLoadFromClobberingLoad(LoadType, LoadPtr, DepLI); +- if (Offset >= 0) +- return createCoercibleLoadExpression( +- LoadType, LoadPtr, LI, DefiningAccess, (unsigned)Offset, DepLI, B); +- } else if (MemIntrinsic *DepMI = dyn_cast(DepInst)) { +- int Offset = analyzeLoadFromClobberingMemInst(LoadType, LoadPtr, DepMI); +- if (Offset >= 0) +- return createCoercibleLoadExpression( +- LoadType, LoadPtr, LI, DefiningAccess, (unsigned)Offset, DepMI, B); +- } +- // If this load really doesn't depend on anything, then we must be loading +- // an +- // undef value. This can happen when loading for a fresh allocation with +- // no +- // intervening stores, for example. +- else if (isa(DepInst) || isMallocLikeFn(DepInst, TLI)) +- return createConstantExpression(UndefValue::get(LoadType), false); +- +- // If this load occurs either right after a lifetime begin, +- // then the loaded value is undefined. +- else if (IntrinsicInst *II = dyn_cast(DepInst)) { +- if (II->getIntrinsicID() == Intrinsic::lifetime_start) +- return createConstantExpression(UndefValue::get(LoadType), false); +- } +- // If this load follows a calloc (which zero initializes memory), +- // then the loaded value is zero +- else if (isCallocLikeFn(DepInst, TLI)) { +- return createConstantExpression(Constant::getNullValue(LoadType), false); +- } +- +- return nullptr; +-} +- + const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I, + const BasicBlock *B) { + LoadInst *LI = cast(I); +@@ -1238,10 +662,10 @@ const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I, + return nullptr; + + Value *LoadAddressLeader = +- lookupOperandLeader(LI->getPointerOperand(), B).first; ++ lookupOperandLeader(LI->getPointerOperand(), I, B); + // Load of undef is undef + if (isa(LoadAddressLeader)) +- return createConstantExpression(UndefValue::get(LI->getType()), false); ++ return createConstantExpression(UndefValue::get(LI->getType())); + + MemoryAccess *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(I); + +@@ -1251,58 +675,7 @@ const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I, + // If the defining instruction is not reachable, replace with + // undef + if (!ReachableBlocks.count(DefiningInst->getParent())) +- return createConstantExpression(UndefValue::get(LI->getType()), false); +- const Expression *CoercionResult = +- performSymbolicLoadCoercion(LI->getType(), LI->getPointerOperand(), +- LI, DefiningInst, DefiningAccess, B); +- if (CoercionResult) +- return CoercionResult; +- } else if (MemoryPhi *MP = dyn_cast(DefiningAccess)) { +- const Expression *CoercionResult = performSymbolicLoadCoercionFromPhi( +- LI->getType(), LI->getPointerOperand(), LI, MP, B); +- if (CoercionResult) +- return CoercionResult; +- } +- } else { +- BasicBlock *LoadBlock = LI->getParent(); +- MemoryAccess *LoadAccess = MSSA->getMemoryAccess(LI); +- // Okay, so uh, we couldn't use the defining access to grab a value out of +- // See if we can reuse any of it's uses by widening a load. +- for (const auto &U : DefiningAccess->users()) { +- MemoryAccess *MA = cast(U); +- if (MA == LoadAccess) +- continue; +- if (isa(MA)) +- continue; +- Instruction *DefiningInst = cast(MA)->getMemoryInst(); +- if (LoadInst *DepLI = dyn_cast(DefiningInst)) { +- BasicBlock *DefiningBlock = DefiningInst->getParent(); +- if (!DT->dominates(DefiningBlock, LoadBlock)) +- continue; +- +- // Make sure the dependent load comes before the load we are trying +- // to coerce if they are in the same block +- if (InstrDFS[DepLI] >= InstrDFS[LI]) +- continue; +- +- // Now, first make sure they really aren't identical loads, and if +- // they aren't see if the dominating one can be coerced. +- // We don't want to mark identical loads coercible, since coercible +- // loads don't value number with normal loads. +- Value *DepAddressLeader = +- lookupOperandLeader(DepLI->getPointerOperand(), B).first; +- +- if (LI->getType() != DepLI->getType() || +- DepAddressLeader != LoadAddressLeader || +- LI->isSimple() != DepLI->isSimple()) { +- int Offset = analyzeLoadFromClobberingLoad( +- LI->getType(), LI->getPointerOperand(), DepLI); +- if (Offset >= 0) +- return createCoercibleLoadExpression( +- LI->getType(), LI->getPointerOperand(), LI, DefiningAccess, +- (unsigned)Offset, DepLI, B); +- } +- } ++ return createConstantExpression(UndefValue::get(LI->getType())); + } + } + +@@ -1335,7 +708,7 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I, + << "\n"); + E->deallocateOperands(ArgRecycler); + ExpressionAllocator.Deallocate(E); +- return createConstantExpression(UndefValue::get(I->getType()), false); ++ return createConstantExpression(UndefValue::get(I->getType())); + } + + Value *AllSameValue = E->getOperand(0); +@@ -1369,8 +742,8 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I, + E->deallocateOperands(ArgRecycler); + ExpressionAllocator.Deallocate(E); + if (Constant *C = dyn_cast(AllSameValue)) +- return createConstantExpression(C, E->usedEquivalence()); +- return createVariableExpression(AllSameValue, E->usedEquivalence()); ++ return createConstantExpression(C); ++ return createVariableExpression(AllSameValue); + } + return E; + } +@@ -1421,12 +794,11 @@ NewGVN::performSymbolicAggrValueEvaluation(Instruction *I, + /// before value numbering + const Expression *NewGVN::performSymbolicEvaluation(Value *V, + const BasicBlock *B) { +- // XXX: Use forward propagation by substituting DefiningExpr + const Expression *E = NULL; + if (Constant *C = dyn_cast(V)) +- E = createConstantExpression(C, false); ++ E = createConstantExpression(C); + else if (isa(V) || isa(V)) { +- E = createVariableExpression(V, false); ++ E = createVariableExpression(V); + } else { + // TODO: memory intrinsics + // TODO: Some day, we should do the forward propagation and reassociation +@@ -1509,14 +881,7 @@ bool NewGVN::isOnlyReachableViaThisEdge(const BasicBlockEdge &E) { + // only reachable from Src, in practice it is pointless since at the time + // GVN runs all such loops have preheaders, which means that Dst will have + // been changed to have only one predecessor, namely Src. +-#if 0 +- BasicBlock *EdgeEnd = const_cast(E.getEnd()); +- if (PredCache.size(EdgeEnd) != 1) +- return false; +- const BasicBlock *Pred = PredCache.get(EdgeEnd)[0]; +-#else + const BasicBlock *Pred = E.getEnd()->getSinglePredecessor(); +-#endif + const BasicBlock *Src = E.getStart(); + assert((!Pred || Pred == Src) && "No edge between these basic blocks!"); + (void)Src; +@@ -1600,31 +965,16 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) { + DEBUG(dbgs() << "New congruence class for " << V << " is " << EClass->ID + << "\n"); + +- if (E && isa(E)) { +- const CoercibleLoadExpression *L = cast(E); +- VClass->CoercibleMembers.erase(V); +- EClass->CoercibleMembers.insert(V); +- CoercionInfo.insert( +- std::make_pair(V, std::make_pair(L->getOffset(), L->getSrc()))); +- } else { +- VClass->Members.erase(V); +- EClass->Members.insert(V); +- } +- ++ VClass->Members.erase(V); ++ EClass->Members.insert(V); + ValueToClass[V] = EClass; + // See if we destroyed the class or need to swap leaders +- if ((VClass->Members.empty() && VClass->CoercibleMembers.empty()) && +- VClass != InitialClass) { ++ if (VClass->Members.empty() && VClass != InitialClass) { + if (VClass->DefiningExpr) { + VClass->Dead = true; +- + DEBUG(dbgs() << "Erasing expression " << *E << " from table\n"); +- // bool wasE = *E == *VClass->expression; + ExpressionToClass.erase(VClass->DefiningExpr); +- // if (wasE) +- // lookupMap->insert({E, EClass}); + } +- // delete VClass; + } else if (VClass->RepLeader == V) { + /// XXX: Check this. When the leader changes, the value numbering of + /// everything may change, so we need to reprocess. +@@ -1634,11 +984,6 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) { + TouchedInstructions.set(InstrDFS[I]); + ChangedValues.insert(M); + } +- for (auto EM : VClass->CoercibleMembers) { +- if (Instruction *I = dyn_cast(EM)) +- TouchedInstructions.set(InstrDFS[I]); +- ChangedValues.insert(EM); +- } + } + } + markUsersTouched(V); +@@ -1653,12 +998,10 @@ void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) { + if (ReachableEdges.insert({From, To}).second) { + // If this block wasn't reachable before, all instructions are touched + if (ReachableBlocks.insert(To).second) { +- SingleReachableIncoming.insert({To, From}); + DEBUG(dbgs() << "Block " << getBlockName(To) << " marked reachable\n"); + const auto &InstRange = BlockInstRange.lookup(To); + TouchedInstructions.set(InstRange.first, InstRange.second); + } else { +- SingleReachableIncoming.erase(To); + DEBUG(dbgs() << "Block " << getBlockName(To) + << " was reachable, but new edge {" << getBlockName(From) + << "," << getBlockName(To) << "} to it found\n"); +@@ -1672,8 +1015,6 @@ void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) { + TouchedInstructions.set(InstrDFS[&*BI]); + ++BI; + } +- // Propagate the change downstream. +- propagateChangeInEdge(To); + } + } + } +@@ -1682,43 +1023,11 @@ void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) { + // switch, cmp, or whatever) and a block, see if we know some constant + // value for it already + Value *NewGVN::findConditionEquivalence(Value *Cond, BasicBlock *B) const { +- auto Result = lookupOperandLeader(Cond, B); +- if (isa(Result.first)) +- return Result.first; +- ++ auto Result = lookupOperandLeader(Cond, nullptr, B); ++ if (isa(Result)) ++ return Result; + return nullptr; + } +-Value *NewGVN::inferValueAtBlock(Value *V, const BasicBlock *B) const { +- V = lookupOperandLeader(V, B).first; +- if (isa(V)) +- return V; +- const BasicBlock *FirstBlock = B; +- const BasicBlock *LastBlock = nullptr; +- while (!isa(V) && B != LastBlock) { +- B = FirstBlock; +- while (B != LastBlock) { +- const BasicBlock *SingleIncomingBlock = SingleReachableIncoming.lookup(B); +- if (!SingleIncomingBlock) { +- const auto DomNode = DT->getNode(B)->getIDom(); +- if (!DomNode) { +- B = LastBlock; +- break; +- } +- +- B = DomNode->getBlock(); +- continue; +- } +- const auto EPI = EdgePredicates.find({SingleIncomingBlock, B}); +- if (EPI != EdgePredicates.end()) { +- for (const auto &Predicate : EPI->second) +- if (Predicate.First == V || Predicate.Second == V) +- DEBUG(dbgs() << "Edge Predicate found for value " << *V << " \n"); +- } +- B = SingleIncomingBlock; +- } +- } +- return V; +-} + + // processOutgoingEdges - Process the outgoing edges of a block for + // reachability. +@@ -1738,37 +1047,10 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) { + } else if (isa(Cond)) { + CondEvaluated = Cond; + } +- } else { +- InvolvedInEquivalence.set(InstrDFS[TI]); + } + ConstantInt *CI; + BasicBlock *TrueSucc = BR->getSuccessor(0); + BasicBlock *FalseSucc = BR->getSuccessor(1); +- // Start with the true predicate +- if (CmpInst *CI = dyn_cast(Cond)) { +- CmpInst::Predicate Predicate = CI->getPredicate(); +- Value *Op0 = CI->getOperand(0); +- Value *Op1 = CI->getOperand(1); +- EdgePredicate EP = {Op0, Predicate, Op1, +- ConstantInt::getTrue(B->getContext())}; +- // Make a consistent order for predicates +- if (Op0 > Op1) { +- std::swap(EP.First, EP.Second); +- EP.Op = CmpInst::getSwappedPredicate(EP.Op); +- } +- +- // XXX: Infer value of predicate +- // XXX: Recompute predicates properly +- const auto EPT = EdgePredicates[{B, TrueSucc}].insert(EP); +- if (!EPT.second) +- propagateChangeInEdge(TrueSucc); +- +- EP.Val = ConstantInt::getFalse(B->getContext()); +- const auto EPF = EdgePredicates[{B, FalseSucc}].insert(EP); +- if (!EPF.second) +- propagateChangeInEdge(FalseSucc); +- } +- + if (CondEvaluated && (CI = dyn_cast(CondEvaluated))) { + if (CI->isOne()) { + DEBUG(dbgs() << "Condition for Terminator " << *TI +@@ -1780,13 +1062,6 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) { + updateReachableEdge(B, FalseSucc); + } + } else { +- /* XXX:Predicates +- propagateEquality(Cond, ConstantInt::getTrue(TrueSucc->getContext()), +- false, {B, TrueSucc}); +- +- propagateEquality(Cond, ConstantInt::getFalse(FalseSucc->getContext()), +- false, {B, FalseSucc}); +- */ + updateReachableEdge(B, TrueSucc); + updateReachableEdge(B, FalseSucc); + } +@@ -1802,7 +1077,6 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) { + Value *CondEvaluated = findConditionEquivalence(SwitchCond, B); + // See if we were able to turn this switch statement into a constant + if (CondEvaluated && isa(CondEvaluated)) { +- InvolvedInEquivalence.set(InstrDFS[TI]); + ConstantInt *CondVal = cast(CondEvaluated); + // We should be able to get case value for this + auto CaseVal = SI->findCaseValue(CondVal); +@@ -1827,27 +1101,12 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) { + if (Block == TargetBlock) + MultipleEdgesOneReachable = true; + } +- const BasicBlockEdge E(B, TargetBlock); +- /* XXX:Predicates +- propagateEquality(SwitchCond, CaseVal.getCaseValue(), +- MultipleEdgesOneReachable, E);*/ + } else { + for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) { + BasicBlock *TargetBlock = SI->getSuccessor(i); + ++SwitchEdges[TargetBlock]; + updateReachableEdge(B, TargetBlock); + } +- /*XXX:Predicates +- // Regardless of answers, propagate equalities for case values +- for (auto i = SI->case_begin(), e = SI->case_end(); i != e; ++i) { +- BasicBlock *TargetBlock = i.getCaseSuccessor(); +- if (SwitchEdges.lookup(TargetBlock) == 1) { +- const BasicBlockEdge E(B, TargetBlock); +- propagateEquality(SwitchCond, i.getCaseValue(), +- MultipleEdgesOneReachable, E); +- } +- } +- */ + } + } else { + // Otherwise this is either unconditional, or a type we have no +@@ -1859,98 +1118,17 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) { + } + } + +-// Figure out and cache the dominated instruction range for this block. We do +-// this by doing a depth first search, and figuring out the min and the max of +-// the dominated instruction range. +-// On the way up, we cache the ranges for all the child blocks. +-// This is essentially an incremental DFS +- +-const std::pair +-NewGVN::calculateDominatedInstRange(const DomTreeNode *DTN) { +- +- // First see if we have it, if not, we need to recalculate +- const auto &DominatedRange = DominatedInstRange.find(DTN); +- if (DominatedRange != DominatedInstRange.end()) { +- return DominatedRange->second; +- } +- // Recalculate +- SmallVector, 32> +- WorkStack; +- WorkStack.emplace_back(DTN, DTN->begin()); +- unsigned MaxSeen = 0; +- while (!WorkStack.empty()) { +- const auto &Back = WorkStack.back(); +- const DomTreeNode *Node = Back.first; +- auto ChildIt = Back.second; +- const auto &Result = BlockInstRange.lookup(Node->getBlock()); +- MaxSeen = std::max(MaxSeen, Result.second); +- // If we visited all of the children of this node, "recurse" back up the +- // stack setting the ranges +- if (ChildIt == Node->end()) { +- const auto &Result = BlockInstRange.lookup(Node->getBlock()); +- DominatedInstRange[DTN] = {Result.first, MaxSeen}; +- WorkStack.pop_back(); +- if (WorkStack.empty()) +- return {Result.first, MaxSeen}; +- } else { +- while (ChildIt != Node->end()) { +- // Otherwise, recursively visit this child. +- const DomTreeNode *Child = *ChildIt; +- ++WorkStack.back().second; +- const auto &LookupResult = DominatedInstRange.find(Child); +- if (LookupResult == DominatedInstRange.end()) { +- WorkStack.emplace_back(Child, Child->begin()); +- break; +- } else { +- // We already have calculated this subtree +- MaxSeen = std::max(MaxSeen, LookupResult->second.second); +- ++ChildIt; +- } +- } +- } +- } +- llvm_unreachable("Should have returned a value already"); +-} +- +-/// propagateChangeInEdge - Propagate a change in edge reachability +-// When we discover a new edge to an existing reachable block, that +-// can affect the value of instructions that used equivalences. +-// +-void NewGVN::propagateChangeInEdge(BasicBlock *Dest) { +- // Unlike, the published algorithm, we don't touch blocks because we aren't +- // recomputing predicates. +- // Instead, we touch all the instructions we dominate that were used in an +- // equivalence, in case they changed. +- // We incrementally compute the dominated instruction ranges, because doing it +- // up front requires a DFS walk of the dominator tree that is a complete waste +- // of time if no equivalences ever get seen. +- // Note that we expect phi nodes in the Dest block will have already been +- // touched by our caller. +- DomTreeNode *DTN = DT->getNode(Dest); +- +- // Note that this is an overshoot, because the inst ranges are calculated in +- // RPO order, not dominator tree order. +- const std::pair Result = calculateDominatedInstRange(DTN); +- +- // Touch all the downstream dominated instructions that used equivalences. +- for (int InstrNum = InvolvedInEquivalence.find_next(Result.first - 1); +- InstrNum != -1 && (Result.second - InstrNum > 0); +- InstrNum = InvolvedInEquivalence.find_next(InstrNum)) { +- // TODO: We could do confluence block checks here. +- TouchedInstructions.set(InstrNum); +- } +-} +- ++// The algorithm initially places the values of the routine in the INITIAL congruence ++// class. The leader of INITIAL is the undetermined value `TOP`. ++// When the algorithm has finished, values still in INITIAL are unreachable. + void NewGVN::initializeCongruenceClasses(Function &F) { + // FIXME now i can't remember why this is 2 + NextCongruenceNum = 2; + // Initialize all other instructions to be in INITIAL class + CongruenceClass::MemberSet InitialValues; +- for (auto &B : F) { +- for (auto &I : B) { ++ for (auto &B : F) ++ for (auto &I : B) + InitialValues.insert(&I); +- } +- } + + InitialClass = createCongruenceClass(NULL, NULL); + for (auto L : InitialValues) +@@ -1964,22 +1142,21 @@ void NewGVN::initializeCongruenceClasses(Function &F) { + } + + void NewGVN::cleanupTables() { +- +- ValueToClass.clear(); ++#ifndef NDEBUG + for (unsigned i = 0, e = CongruenceClasses.size(); i != e; ++i) { + DEBUG(dbgs() << "Congruence class " << CongruenceClasses[i]->ID << " has " + << CongruenceClasses[i]->Members.size() << " members\n"); + } ++#endif + ++ ValueToClass.clear(); + ArgRecycler.clear(ExpressionAllocator); + ExpressionAllocator.Reset(); + CongruenceClasses.clear(); + ExpressionToClass.clear(); + ValueToExpression.clear(); +- SingleReachableIncoming.clear(); + ReachableBlocks.clear(); + ReachableEdges.clear(); +- EdgePredicates.clear(); + ProcessedCount.clear(); + DFSDomMap.clear(); + InstrDFS.clear(); +@@ -1988,11 +1165,7 @@ void NewGVN::cleanupTables() { + DFSToInstr.clear(); + BlockInstRange.clear(); + TouchedInstructions.clear(); +- InvolvedInEquivalence.clear(); +- CoercionInfo.clear(); +- CoercionForwarding.clear(); + DominatedInstRange.clear(); +- PredCache.clear(); + } + + std::pair NewGVN::assignDFSNumbers(BasicBlock *B, +@@ -2009,30 +1182,23 @@ std::pair NewGVN::assignDFSNumbers(BasicBlock *B, + return std::make_pair(Start, End); + } + +-void NewGVN::topoVisitCongruenceClass( +- CongruenceClass *CC, SmallDenseMap &UsedCount, +- SmallPtrSetImpl &Visited) { +- Visited.insert(CC); +- // Visit the classes of the values of the operands of the leader set +- for (auto Member : CC->Members) { +- if (User *U = dyn_cast(Member)) { +- for (auto &I : U->operands()) { +- CongruenceClass *OperandCC = ValueToClass.lookup(I); +- if (OperandCC) { +- UsedCount[OperandCC] += 1; +- if (!Visited.count(OperandCC)) +- topoVisitCongruenceClass(OperandCC, UsedCount, Visited); +- } +- } +- } ++void NewGVN::updateProcessedCount(Value *V) { ++#ifndef NDEBUG ++ if (ProcessedCount.count(V) == 0) { ++ ProcessedCount.insert({V, 1}); ++ } else { ++ ProcessedCount[V] += 1; ++ assert(ProcessedCount[V] < 100 && ++ "Seem to have processed the same Value a lot\n"); + } ++#endif + } + + /// runOnFunction - This is the main transformation entry point for a + /// function. + bool NewGVN::runGVN(Function &F, DominatorTree *DT, AssumptionCache *AC, +- TargetLibraryInfo *TLI, AliasAnalysis *AA, +- MemorySSA *MSSA) { ++ TargetLibraryInfo *TLI, AliasAnalysis *AA, ++ MemorySSA *MSSA) { + bool Changed = false; + this->DT = DT; + this->AC = AC; +@@ -2042,12 +1208,9 @@ bool NewGVN::runGVN(Function &F, DominatorTree *DT, AssumptionCache *AC, + DL = &F.getParent()->getDataLayout(); + MSSAWalker = MSSA->getWalker(); + +- // SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(AA, DT)); +- +- unsigned ICount = 0; + // Count number of instructions for sizing of hash tables, and come +- // up with a global dfs numbering for instructions +- ++ // up with a global dfs numbering for instructions. ++ unsigned ICount = 0; + SmallPtrSet VisitedBlocks; + + // Note: We want RPO traversal of the blocks, which is not quite the same as +@@ -2056,7 +1219,7 @@ bool NewGVN::runGVN(Function &F, DominatorTree *DT, AssumptionCache *AC, + // If we visit in the wrong order, we will end up performing N times as many + // iterations. + ReversePostOrderTraversal RPOT(&F); +- for (BasicBlock *B : RPOT) { ++ for (auto &B : RPOT) { + VisitedBlocks.insert(B); + const auto &BlockRange = assignDFSNumbers(B, ICount); + BlockInstRange.insert({B, BlockRange}); +@@ -2066,7 +1229,7 @@ bool NewGVN::runGVN(Function &F, DominatorTree *DT, AssumptionCache *AC, + // Handle forward unreachable blocks and figure out which blocks + // have single preds + +- for (BasicBlock &B : F) { ++ for (auto &B : F) { + // Assign numbers to unreachable blocks + if (!VisitedBlocks.count(&B)) { + const auto &BlockRange = assignDFSNumbers(&B, ICount); +@@ -2076,7 +1239,6 @@ bool NewGVN::runGVN(Function &F, DominatorTree *DT, AssumptionCache *AC, + } + + TouchedInstructions.resize(ICount + 1); +- InvolvedInEquivalence.resize(ICount + 1); + DominatedInstRange.reserve(F.size()); + // Ensure we don't end up resizing the expressionToClass map, as + // that can be quite expensive. At most, we have one expression per +@@ -2112,17 +1274,7 @@ bool NewGVN::runGVN(Function &F, DominatorTree *DT, AssumptionCache *AC, + << " because it is unreachable\n"); + continue; + } +- //#ifndef NDEBUG +- if (ProcessedCount.count(CurrBlock) == 0) { +- ProcessedCount.insert({CurrBlock, 1}); +- } else { +- ProcessedCount[CurrBlock] += 1; +- assert(ProcessedCount[CurrBlock] < 100 && +- "Seem to have processed the same block a lot\n"); +- if (ProcessedCount[CurrBlock] >= 100) +- report_fatal_error("Processed block too many times"); +- } +- //#endif ++ updateProcessedCount(CurrBlock); + } + DEBUG(dbgs() << "Processing instruction " << *I << "\n"); + if (I->use_empty() && !I->getType()->isVoidTy()) { +@@ -2132,23 +1284,12 @@ bool NewGVN::runGVN(Function &F, DominatorTree *DT, AssumptionCache *AC, + TouchedInstructions.reset(InstrNum); + continue; + } ++ updateProcessedCount(I); + +-// This is done in case something eliminates the instruction +-// along the way. +- +-#ifndef NDEBUG +- if (ProcessedCount.count(I) == 0) { +- ProcessedCount.insert({I, 1}); +- } else { +- ProcessedCount[I] += 1; +- assert(ProcessedCount[I] < 100 && +- "Seem to have processed the same instruction a lot"); +- } +-#endif ++ // This is done in case something eliminates the instruction ++ // along the way. + if (!I->isTerminator()) { + const Expression *Symbolized = performSymbolicEvaluation(I, CurrBlock); +- if (Symbolized && Symbolized->usedEquivalence()) +- InvolvedInEquivalence.set(InstrDFS[I]); + performCongruenceFinding(I, Symbolized); + } else { + processOutgoingEdges(dyn_cast(I), CurrBlock); +@@ -2161,53 +1302,6 @@ bool NewGVN::runGVN(Function &F, DominatorTree *DT, AssumptionCache *AC, + + Changed |= eliminateInstructions(F); + +-// The ideal ordering for processing is not quite topological ordering, +-// because there are multiple roots. It is essentially "group every vertex +-// that depends on a given vertex together after that vertex", which is not +-// the same. It is in fact, an NP complete problem, and given that the graph +-// may be cyclic anyway, we order the congruence classes by how many things +-// depend on them. This is a good approximation, and will cut down the number +-// of iterations. +-#if 0 +- SmallDenseMap UsedCount; +- SmallPtrSet VisitedClasses; +- +- for (int i = CongruenceClasses.size() - 1; i >= 0; --i) { +- CongruenceClass *CC = CongruenceClasses[i]; +- if (CC == InitialClass || CC->dead || VisitedClasses.count(CC)) +- continue; +- topoVisitCongruenceClass(CC, UsedCount, VisitedClasses); +- } +- SmallVector Worklist; +- for (auto &CC : CongruenceClasses) +- Worklist.push_back(CC); +- // std::sort(Worklist.begin(), Worklist.end(), +- // [&UsedCount](CongruenceClass *&A, CongruenceClass *&B) { +- // return UsedCount[A] > UsedCount[B]; +- // }); +- +- bool PREChanged = true; +- while (PREChanged) { +- PREChanged = false; +-#if 1 +- // FIXME: Handle added congruence classes +- if (Worklist.size() != CongruenceClasses.size()) +- Worklist.insert(Worklist.end(), +- CongruenceClasses.begin() + (Worklist.size() - 1), +- CongruenceClasses.end()); +-#endif +- for (auto CC : Worklist) { +- if (CC == InitialClass || CC->dead) +- continue; +- +- PREChanged |= performPREOnClass(CC); +- } +- } +- +- PREValueForwarding.clear(); +- +- Changed |= PREChanged; +-#endif + // Delete all instructions marked for deletion. + for (Instruction *ToErase : InstructionsToErase) { + if (!ToErase->use_empty()) +@@ -2241,7 +1335,8 @@ bool NewGVN::runOnFunction(Function &F) { + &getAnalysis().getMSSA()); + } + +-PreservedAnalyses NewGVNPass::run(Function &F, AnalysisManager &AM) { ++PreservedAnalyses NewGVNPass::run(Function &F, ++ AnalysisManager &AM) { + NewGVN Impl; + + // Apparently the order in which we get these results matter for +@@ -2280,13 +1375,11 @@ struct NewGVN::ValueDFS { + int DFSIn; + int DFSOut; + int LocalNum; +- bool Coercible; + // Only one of these will be set + Value *Val; + Use *U; + ValueDFS() +- : DFSIn(0), DFSOut(0), LocalNum(0), Coercible(false), Val(nullptr), +- U(nullptr) {} ++ : DFSIn(0), DFSOut(0), LocalNum(0), Val(nullptr), U(nullptr) {} + + bool operator<(const ValueDFS &other) const { + // It's not enough that any given field be less than - we have sets +@@ -2309,8 +1402,6 @@ struct NewGVN::ValueDFS { + if (LocalNum < other.LocalNum) + return true; + else if (LocalNum == other.LocalNum) { +- if (!!Coercible < !!other.Coercible) +- return true; + if (Val < other.Val) + return true; + if (U < other.U) +@@ -2323,8 +1414,7 @@ struct NewGVN::ValueDFS { + }; + + void NewGVN::convertDenseToDFSOrdered(CongruenceClass::MemberSet &Dense, +- std::vector &DFSOrderedSet, +- bool Coercible) { ++ std::vector &DFSOrderedSet) { + for (auto D : Dense) { + // First add the value + BasicBlock *BB = getBlockForValue(D); +@@ -2338,7 +1428,6 @@ void NewGVN::convertDenseToDFSOrdered(CongruenceClass::MemberSet &Dense, + VD.DFSIn = DFSPair.first; + VD.DFSOut = DFSPair.second; + VD.Val = D; +- VD.Coercible = Coercible; + // If it's an instruction, use the real local dfs number, + if (Instruction *I = dyn_cast(D)) + VD.LocalNum = InstrDFS[I]; +@@ -2351,7 +1440,6 @@ void NewGVN::convertDenseToDFSOrdered(CongruenceClass::MemberSet &Dense, + for (auto &U : D->uses()) { + if (Instruction *I = dyn_cast(U.getUser())) { + ValueDFS VD; +- VD.Coercible = Coercible; + // Put the phi node uses in the incoming block + BasicBlock *IBlock; + if (PHINode *P = dyn_cast(I)) { +@@ -2426,30 +1514,11 @@ void NewGVN::deleteInstructionsInBlock(BasicBlock *BB) { + if (!Inst.use_empty()) + Inst.replaceAllUsesWith(UndefValue::get(Inst.getType())); + if (isa(Inst)) +- continue; ++ continue; + + Inst.eraseFromParent(); + ++NumGVNInstrDeleted; + } +-#if 0 +- // Delete the instructions backwards, as it has a reduced likelihood of having +- // to update as many def-use and use-def chains. +- Instruction *EndInst = BB->getTerminator(); // Last not to be deleted. +- while (EndInst->getIterator() != BB->begin()) { +- // Delete the next to last instruction. +- BasicBlock::iterator I = EndInst->getIterator(); +- Instruction *Inst = &*--I; +- if (!Inst->use_empty()) +- Inst->replaceAllUsesWith(UndefValue::get(Inst->getType())); +- if (isa(Inst)) { +- EndInst = Inst; +- continue; +- } +- Inst->eraseFromParent(); +- BB->getInstList().erase(Inst); +- ++NumGVNInstrDeleted; +- } +-#endif + } + + void NewGVN::markInstructionForDeletion(Instruction *I) { +@@ -2504,372 +1573,6 @@ private: + }; + } + +-/// CanCoerceMustAliasedValueToLoad - Return true if +-/// CoerceAvailableValueToLoadType will succeed. +-bool NewGVN::canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy) { +- +- // If the loaded or stored value is an first class array or struct, don't +- // try +- // to transform them. We need to be able to bitcast to integer. +- if (LoadTy->isStructTy() || LoadTy->isArrayTy() || +- StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy()) +- return false; +- +- // The store has to be at least as big as the load. +- if (DL->getTypeSizeInBits(StoredVal->getType()) < +- DL->getTypeSizeInBits(LoadTy)) +- return false; +- +- return true; +-} +- +-/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, +-/// and +-/// then a load from a must-aliased pointer of a different type, try to coerce +-/// the stored value. LoadedTy is the type of the load we want to replace and +-/// InsertPt is the place to insert new instructions. +-/// +-/// If we can't do it, return null. +-Value *NewGVN::coerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy, +- Instruction *InsertPt) { +- +- if (!canCoerceMustAliasedValueToLoad(StoredVal, LoadedTy)) +- return 0; +- +- // If this is already the right type, just return it. +- Type *StoredValTy = StoredVal->getType(); +- +- uint64_t StoreSize = DL->getTypeSizeInBits(StoredValTy); +- uint64_t LoadSize = DL->getTypeSizeInBits(LoadedTy); +- +- // If the store and reload are the same size, we can always reuse it. +- if (StoreSize == LoadSize) { +- // Pointer to Pointer -> use bitcast. +- if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) { +- Instruction *I = new BitCastInst(StoredVal, LoadedTy, "", InsertPt); +- handleNewInstruction(I); +- return I; +- } +- +- // Convert source pointers to integers, which can be bitcast. +- if (StoredValTy->isPointerTy()) { +- StoredValTy = DL->getIntPtrType(StoredValTy->getContext()); +- Instruction *I = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); +- StoredVal = I; +- handleNewInstruction(I); +- } +- +- Type *TypeToCastTo = LoadedTy; +- if (TypeToCastTo->isPointerTy()) +- TypeToCastTo = DL->getIntPtrType(StoredValTy->getContext()); +- +- if (StoredValTy != TypeToCastTo) { +- Instruction *I = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt); +- StoredVal = I; +- handleNewInstruction(I); +- } +- +- // Cast to pointer if the load needs a pointer type. +- if (LoadedTy->isPointerTy()) { +- Instruction *I = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt); +- StoredVal = I; +- handleNewInstruction(I); +- } +- return StoredVal; +- } +- +- // If the loaded value is smaller than the available value, then we can +- // extract out a piece from it. If the available value is too small, then +- // we +- // can't do anything. +- assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail"); +- +- // Convert source pointers to integers, which can be manipulated. +- if (StoredValTy->isPointerTy()) { +- StoredValTy = DL->getIntPtrType(StoredValTy->getContext()); +- Instruction *I = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); +- StoredVal = I; +- handleNewInstruction(I); +- } +- +- // Convert vectors and fp to integer, which can be manipulated. +- if (!StoredValTy->isIntegerTy()) { +- StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize); +- Instruction *I = new BitCastInst(StoredVal, StoredValTy, "", InsertPt); +- StoredVal = I; +- handleNewInstruction(I); +- } +- +- // If this is a big-endian system, we need to shift the value down to the +- // low +- // bits so that a truncate will work. +- if (DL->isBigEndian()) { +- Constant *Val = +- ConstantInt::get(StoredVal->getType(), StoreSize - LoadSize); +- StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt); +- if (Instruction *I = dyn_cast(StoredVal)) +- handleNewInstruction(I); +- } +- +- // Truncate the integer to the right size now. +- Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize); +- Instruction *I = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt); +- StoredVal = I; +- handleNewInstruction(I); +- +- if (LoadedTy == NewIntTy) +- return StoredVal; +- +- // If the result is a pointer, inttoptr. +- if (LoadedTy->isPointerTy()) { +- I = new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt); +- handleNewInstruction(I); +- return I; +- } +- +- // Otherwise, bitcast. +- I = new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt); +- handleNewInstruction(I); +- return I; +-} +- +-/// GetStoreValueForLoad - This function is called when we have a +-/// memdep query of a load that ends up being a clobbering store. This means +-/// that the store provides bits used by the load but we the pointers don't +-/// mustalias. Check this case to see if there is anything more we can do +-/// before we give up. +-Value *NewGVN::getStoreValueForLoad(Value *SrcVal, unsigned Offset, +- Type *LoadTy, Instruction *InsertPt) { +- +- LLVMContext &Ctx = SrcVal->getType()->getContext(); +- +- uint64_t StoreSize = (DL->getTypeSizeInBits(SrcVal->getType()) + 7) / 8; +- uint64_t LoadSize = (DL->getTypeSizeInBits(LoadTy) + 7) / 8; +- +- IRBuilder<> Builder(InsertPt); +- +- // Compute which bits of the stored value are being used by the load. Convert +- // to an integer type to start with. +- if (SrcVal->getType()->getScalarType()->isPointerTy()) { +- SrcVal = +- Builder.CreatePtrToInt(SrcVal, DL->getIntPtrType(SrcVal->getType())); +- if (Instruction *I = dyn_cast(SrcVal)) +- handleNewInstruction(I); +- } +- +- if (!SrcVal->getType()->isIntegerTy()) { +- SrcVal = +- Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8)); +- if (Instruction *I = dyn_cast(SrcVal)) +- handleNewInstruction(I); +- } +- // Shift the bits to the least significant depending on endianness. +- unsigned ShiftAmt; +- if (DL->isLittleEndian()) +- ShiftAmt = Offset * 8; +- else +- ShiftAmt = (StoreSize - LoadSize - Offset) * 8; +- +- if (ShiftAmt) { +- SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt); +- if (Instruction *I = dyn_cast(SrcVal)) +- handleNewInstruction(I); +- } +- if (LoadSize != StoreSize) { +- SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize * 8)); +- if (Instruction *I = dyn_cast(SrcVal)) +- handleNewInstruction(I); +- } +- return coerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt); +-} +- +-/// GetLoadValueForLoad - This function is called when we have a +-/// memdep query of a load that ends up being a clobbering load. This means +-/// that the load *may* provide bits used by the load but we can't be sure +-/// because the pointers don't mustalias. Check this case to see if there is +-/// anything more we can do before we give up. +-Value *NewGVN::getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, +- Type *LoadTy, Instruction *InsertPt) { +- // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to +- // widen SrcVal out to a larger load. +- unsigned SrcValSize = DL->getTypeStoreSize(SrcVal->getType()); +- unsigned LoadSize = DL->getTypeStoreSize(LoadTy); +- if (Offset + LoadSize > SrcValSize) { +- assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!"); +- assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load"); +- // If we have a load/load clobber an DepLI can be widened to cover this +- // load, then we should widen it to the next power of 2 size big enough! +- unsigned NewLoadSize = Offset + LoadSize; +- if (!isPowerOf2_32(NewLoadSize)) +- NewLoadSize = NextPowerOf2(NewLoadSize); +- +- Value *PtrVal = SrcVal->getPointerOperand(); +- +- // Insert the new load after the old load. This ensures that subsequent +- // memdep queries will find the new load. We can't easily remove the old +- // load completely because it is already in the value numbering table. +- IRBuilder<> Builder(SrcVal->getParent(), ++(SrcVal->getIterator())); +- Type *DestPTy = IntegerType::get(LoadTy->getContext(), NewLoadSize * 8); +- DestPTy = +- PointerType::get(DestPTy, PtrVal->getType()->getPointerAddressSpace()); +- Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc()); +- PtrVal = Builder.CreateBitCast(PtrVal, DestPTy); +- if (Instruction *I = dyn_cast(PtrVal)) +- handleNewInstruction(I); +- LoadInst *NewLoad = Builder.CreateLoad(PtrVal); +- NewLoad->takeName(SrcVal); +- NewLoad->setAlignment(SrcVal->getAlignment()); +- handleNewInstruction(NewLoad); +- DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n"); +- DEBUG(dbgs() << "TO: " << *NewLoad << "\n"); +- // This ensures we forward other coercions onto the new load, instead of the +- // old one +- CoercionForwarding[SrcVal] = NewLoad; +- +- // Replace uses of the original load with the wider load. On a big endian +- // system, we need to shift down to get the relevant bits. +- Value *RV = NewLoad; +- if (DL->isBigEndian()) { +- RV = Builder.CreateLShr( +- RV, NewLoadSize * 8 - SrcVal->getType()->getPrimitiveSizeInBits()); +- if (Instruction *I = dyn_cast(RV)) +- handleNewInstruction(I); +- } +- +- RV = Builder.CreateTrunc(RV, SrcVal->getType()); +- if (Instruction *I = dyn_cast(RV)) +- handleNewInstruction(I); +- +- // markUsersTouched(SrcVal); +- // assert(false && "Need to debug this"); +- // So, we just widened a load that we will have already gone past in +- // elimination, so in order to get rid of the uses, we have to do it here +- +- SrcVal->replaceAllUsesWith(RV); +- +- // We would like to use gvn.markInstructionForDeletion here, but we can't +- // because the load is already memoized into the leader map table that GVN +- // tracks. It is potentially possible to remove the load from the table, +- // but then there all of the operations based on it would need to be +- // rehashed. Just leave the dead load around. +- // FIXME: This is no longer a problem +- markInstructionForDeletion(SrcVal); +- SrcVal = NewLoad; +- } +- +- return getStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt); +-} +- +-/// GetMemInstValueForLoad - This function is called when we have a +-/// memdep query of a load that ends up being a clobbering mem intrinsic. +-Value *NewGVN::getMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, +- Type *LoadTy, Instruction *InsertPt, +- bool NoNewInst) { +- +- LLVMContext &Ctx = LoadTy->getContext(); +- uint64_t LoadSize = DL->getTypeSizeInBits(LoadTy) / 8; +- +- IRBuilder<> Builder(InsertPt); +- +- // We know that this method is only called when the mem transfer fully +- // provides the bits for the load. +- if (MemSetInst *MSI = dyn_cast(SrcInst)) { +- // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and +- // independently of what the offset is. +- Value *Val = MSI->getValue(); +- if (LoadSize != 1) { +- if (NoNewInst) +- return nullptr; +- Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize * 8)); +- if (Instruction *I = dyn_cast(Val)) +- handleNewInstruction(I); +- } +- +- Value *OneElt = Val; +- +- // Splat the value out to the right number of bits. +- for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize;) { +- if (NoNewInst) +- return nullptr; +- +- // If we can double the number of bytes set, do it. +- if (NumBytesSet * 2 <= LoadSize) { +- Value *ShVal = Builder.CreateShl(Val, NumBytesSet * 8); +- if (Instruction *I = dyn_cast(ShVal)) +- handleNewInstruction(I); +- Val = Builder.CreateOr(Val, ShVal); +- if (Instruction *I = dyn_cast(Val)) +- handleNewInstruction(I); +- NumBytesSet <<= 1; +- continue; +- } +- +- // Otherwise insert one byte at a time. +- Value *ShVal = Builder.CreateShl(Val, 1 * 8); +- if (Instruction *I = dyn_cast(ShVal)) +- handleNewInstruction(I); +- +- Val = Builder.CreateOr(OneElt, ShVal); +- if (Instruction *I = dyn_cast(Val)) +- handleNewInstruction(I); +- +- ++NumBytesSet; +- } +- +- return coerceAvailableValueToLoadType(Val, LoadTy, InsertPt); +- } +- +- // Otherwise, this is a memcpy/memmove from a constant global. +- MemTransferInst *MTI = cast(SrcInst); +- Constant *Src = cast(MTI->getSource()); +- unsigned AS = Src->getType()->getPointerAddressSpace(); +- +- // Otherwise, see if we can constant fold a load from the constant with the +- // offset applied as appropriate. +- Src = ConstantExpr::getBitCast( +- Src, llvm::Type::getInt8PtrTy(Src->getContext(), AS)); +- Constant *OffsetCst = +- ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); +- Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, +- OffsetCst); +- Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); +- return ConstantFoldLoadFromConstPtr(Src, LoadTy, *DL); +-} +- +-Value *NewGVN::coerceLoad(Value *V) { +- assert(isa(V) && "Trying to coerce something other than a load"); +- LoadInst *LI = cast(V); +- // This is an offset, source pair +- const std::pair &Info = CoercionInfo.lookup(LI); +- Value *Result; +- Value *RealValue = Info.second; +- // Walk all the coercion fowarding chains, in case this load has already been +- // widened into another load +- while (true) { +- auto ForwardingResult = CoercionForwarding.find(RealValue); +- if (ForwardingResult != CoercionForwarding.end()) +- RealValue = ForwardingResult->second; +- else +- break; +- } +- +- assert(DT->dominates(cast(RealValue), LI) && +- "Trying to replace a load with one that doesn't dominate it"); +- if (StoreInst *DepSI = dyn_cast(RealValue)) +- Result = getStoreValueForLoad(DepSI->getValueOperand(), Info.first, +- LI->getType(), LI); +- else if (LoadInst *DepLI = dyn_cast(RealValue)) +- Result = getLoadValueForLoad(DepLI, Info.first, LI->getType(), LI); +- else if (MemIntrinsic *DepMI = dyn_cast(RealValue)) +- Result = getMemInstValueForLoad(DepMI, Info.first, LI->getType(), LI); +- else +- llvm_unreachable("Unknown coercion type"); +- +- assert(Result && "Should have been able to coerce"); +- DEBUG(dbgs() << "Coerced load " << *LI << " output is " << *Result << "\n"); +- return Result; +-} +- + bool NewGVN::eliminateInstructions(Function &F) { + // This is a non-standard eliminator. The normal way to eliminate is + // to walk the dominator tree in order, keeping track of available +@@ -2960,9 +1663,8 @@ bool NewGVN::eliminateInstructions(Function &F) { + + } else { + DEBUG(dbgs() << "Eliminating in congruence class " << CC->ID << "\n"); +- // If this is a singleton, with no equivalences or coercible members, we +- // can skip it +- if (CC->Members.size() != 1 || !CC->CoercibleMembers.empty()) { ++ // If this is a singleton, we can skip it ++ if (CC->Members.size() != 1) { + + // This is a stack because equality replacement/etc may place + // constants in the middle of the member list, and we want to use +@@ -2971,11 +1673,11 @@ bool NewGVN::eliminateInstructions(Function &F) { + + ValueDFSStack EliminationStack; + +- // Convert the members and equivalences to DFS ordered sets and ++ // Convert the members to DFS ordered sets and + // then merge them. + std::vector DFSOrderedSet; +- convertDenseToDFSOrdered(CC->Members, DFSOrderedSet, false); +- convertDenseToDFSOrdered(CC->CoercibleMembers, DFSOrderedSet, true); ++ convertDenseToDFSOrdered(CC->Members, DFSOrderedSet); ++ + // Sort the whole thing + sort(DFSOrderedSet.begin(), DFSOrderedSet.end()); + +@@ -2984,7 +1686,6 @@ bool NewGVN::eliminateInstructions(Function &F) { + int MemberDFSOut = C.DFSOut; + Value *Member = C.Val; + Use *MemberUse = C.U; +- bool Coercible = C.Coercible; + + // We ignore void things because we can't get a value from + // them. +@@ -3027,8 +1728,6 @@ bool NewGVN::eliminateInstructions(Function &F) { + // Push if we need to + ShouldPush |= Member && EliminationStack.empty(); + if (ShouldPush) { +- if (Coercible) +- Member = coerceLoad(Member); + EliminationStack.push_back(Member, MemberDFSIn, MemberDFSOut); + } + } +@@ -3084,630 +1783,8 @@ bool NewGVN::eliminateInstructions(Function &F) { + MembersLeft.insert(Member); + } + CC->Members.swap(MembersLeft); +- CC->CoercibleMembers.clear(); + } + + return AnythingReplaced; + } + +-Value *NewGVN::AvailableValue::MaterializeAdjustedValue(LoadInst *LI, +- Instruction *InsertPt, +- NewGVN &gvn) const { +- Value *Res; +- Type *LoadTy = LI->getType(); +- +- if (isSimpleValue()) { +- Res = getSimpleValue(); +- if (Res->getType() != LoadTy) { +- Res = gvn.getStoreValueForLoad(Res, Offset, LoadTy, InsertPt); +- +- DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " +- << *getSimpleValue() << '\n' +- << *Res << '\n' +- << "\n\n\n"); +- } +- } else if (isCoercedLoadValue()) { +- LoadInst *Load = getCoercedLoadValue(); +- if (Load->getType() == LoadTy && Offset == 0) { +- Res = Load; +- } else { +- Res = gvn.getLoadValueForLoad(Load, Offset, LoadTy, InsertPt); +- +- DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " " +- << *getCoercedLoadValue() << '\n' +- << *Res << '\n' +- << "\n\n\n"); +- } +- } else if (isMemIntrinValue()) { +- Res = gvn.getMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy, +- InsertPt); +- DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset +- << " " << *getMemIntrinValue() << '\n' +- << *Res << '\n' +- << "\n\n\n"); +- } else { +- assert(isUndefValue() && "Should be UndefVal"); +- DEBUG(dbgs() << "GVN COERCED NONLOCAL Undef:\n";); +- return UndefValue::get(LoadTy); +- } +- assert(Res && "failed to materialize?"); +- return Res; +-} +-#if 0 +-Value * +-NewGVN::AvailableValueInBlock::MaterializeAdjustedValue(Instruction *I, +- NewGVN &gvn) const { +- if (!isa(I)) { +- assert((isSimpleValue() || isUndefValue()) && +- "Should have been a simple or undef value for a non-load!"); +- if (isSimpleValue()) +- return getSimpleValue(); +- else +- return UndefValue::get(I->getType()); +- } +- +- Value *Res; +- Type *LoadTy = I->getType(); +- +- if (isSimpleValue()) { +- Res = getSimpleValue(); +- if (Res->getType() != LoadTy) { +- Res = gvn.getStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator()); +- +- DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " +- << *getSimpleValue() << '\n' << *Res << '\n' << "\n\n\n"); +- } +- } else if (isCoercedLoadValue()) { +- LoadInst *Load = getCoercedLoadValue(); +- if (Load->getType() == LoadTy && Offset == 0) { +- Res = Load; +- } else { +- Res = gvn.getLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator()); +- +- DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " " +- << *getCoercedLoadValue() << '\n' << *Res << '\n' +- << "\n\n\n"); +- } +- } else if (isMemIntrinValue()) { +- Res = gvn.getMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy, +- BB->getTerminator()); +- DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset +- << " " << *getMemIntrinValue() << '\n' << *Res << '\n' +- << "\n\n\n"); +- } else { +- assert(isUndefValue() && "Should be UndefVal"); +- DEBUG(dbgs() << "GVN COERCED NONLOCAL Undef:\n";); +- return UndefValue::get(LoadTy); +- } +- return Res; +-} +-#endif +-Value *NewGVN::constructSSAForSet( +- Instruction *I, SmallVectorImpl &ValuesPerBlock) { +- // Check for the fully redundant, dominating load case. In this case, we can +- // just use the dominating value directly. +- if (ValuesPerBlock.size() == 1 && +- DT->properlyDominates(ValuesPerBlock[0].BB, I->getParent())) { +- assert(!ValuesPerBlock[0].AV.isUndefValue() && +- "Dead BB dominate this block"); +- return ValuesPerBlock[0].MaterializeAdjustedValue(cast(I), *this); +- } +- // Otherwise, we have to construct SSA form. +- SmallVector NewPHIs; +- SSAUpdater SSAUpdate(&NewPHIs); +- SSAUpdate.Initialize(I->getType(), I->getName()); +- +- for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { +- const AvailableValueInBlock &AV = ValuesPerBlock[i]; +- BasicBlock *BB = AV.BB; +- +- if (SSAUpdate.HasValueForBlock(BB)) +- continue; +- +- SSAUpdate.AddAvailableValue( +- BB, AV.MaterializeAdjustedValue(cast(I), *this)); +- } +- +- // Perform PHI construction. +- Value *V = SSAUpdate.GetValueInMiddleOfBlock(I->getParent()); +- +- return V; +-} +- +-/// Split the critical edge connecting the given two blocks, and return +-/// the block inserted to the critical edge. +-BasicBlock *NewGVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) { +- BasicBlock *BB = +- SplitCriticalEdge(Pred, Succ, CriticalEdgeSplittingOptions(DT)); +- if (MD) +- MD->invalidateCachedPredecessors(); +- return BB; +-} +- +-void NewGVN::valueNumberNewInstructionToValue(Value *New, Value *Old) { +- if (!ValueToClass.count(New)) +- ValueToClass[New] = InitialClass; +- const Expression *NewExpr = createVariableExpression(Old, false); +- performCongruenceFinding(New, NewExpr); +-} +- +-void NewGVN::valueNumberNewInstruction(Value *V) { +- if (!ValueToClass.count(V)) +- ValueToClass[V] = InitialClass; +- const Expression *NewExpr = +- performSymbolicEvaluation(V, cast(V)->getParent()); +- performCongruenceFinding(V, NewExpr); +-} +- +-Value *NewGVN::regenerateExpression(const Expression *E, BasicBlock *BB) { +-#if FIXME +- Value *V = findPRELeader(E, BB, nullptr); +- if (V) +- return V; +- if (const LoadExpression *LE = dyn_cast(E)) { +- LoadInst *LI = new LoadInst(LE->getOperand(0), "loadpre", false, +- LE->getAlignment(), BB->getTerminator()); +- const_cast(LE)->setLoadInst(LI); +- MSSA->addNewMemoryUse(LI, MemorySSA::InsertionPlace::End); +- +- return LI; +- } else if (const BasicExpression *BE = dyn_cast(E)) { +- unsigned Opcode = BE->getOpcode(); +- if (Instruction::isBinaryOp(Opcode)) { +- BinaryOperator *BO = BinaryOperator::Create( +- Instruction::BinaryOps(Opcode), BE->getOperand(0), BE->getOperand(1), +- "binaryoppre", BB->getTerminator()); +- // FIXME: Track NSW/NUW +- return BO; +- } else if (Opcode == Instruction::MemoryOps::GetElementPtr) { +-#if FIXME +- // It wants the pointee type, which is complex, and not equivalent to +- // getType on the original GEP +- // FIXME: Store getSrcType on the GEP +- Type *PointeeType = +- cast(BE->getOperand(0)->getType()->getScalarType()) +- ->getElementType(); +-#endif +- Value *GEP = nullptr; +- if (Value *V = SimplifyGEPInst( +- BE->getType(), makeArrayRef(BE->ops_begin(), BE->ops_end()), *DL, +- TLI, DT, AC)) +- GEP = V; +- else +- GEP = GetElementPtrInst::Create( +- BE->getType(), BE->getOperand(0), +- makeArrayRef(BE->ops_begin(), BE->ops_end()).slice(1), "geppre", +- BB->getTerminator()); +- // FIXME: Track inbounds +- return GEP; +- } else if (((Opcode & 0xff00) >> 8) == Instruction::ICmp) { +- CmpInst::Predicate Pred = (CmpInst::Predicate)(Opcode & 0xff); +- ICmpInst *Cmp = new ICmpInst(BB->getTerminator(), Pred, BE->getOperand(0), +- BE->getOperand(1), "icmppre"); +- return Cmp; +- } else if (((Opcode & 0xff00) >> 8) == Instruction::FCmp) { +- CmpInst::Predicate Pred = (CmpInst::Predicate)(Opcode & 0xff); +- FCmpInst *Cmp = new FCmpInst(BB->getTerminator(), Pred, BE->getOperand(0), +- BE->getOperand(1), "fcmppre"); +- return Cmp; +- } else +- llvm_unreachable("What!"); +- } +- llvm_unreachable("What!"); +-#endif +- return nullptr; +-} +- +-bool NewGVN::performPRE(Instruction *I, AvailValInBlkVect &ValuesPerBlock, +- UnavailBlkVect &UnavailableBlocks) { +- bool Changed = false; +- // Okay, we have *some* definitions of the value. This means that the value +- // is available in some of our (transitive) predecessors. Lets think about +- // doing PRE of this instruction. This will involve inserting a new +- // instruction into the +- // predecessor when it's not available. We could do this in general, but +- // prefer to not increase code size. As such, we only do this when we know +- // that we only have to insert *one* load (which means we're basically moving +- // the load, not inserting a new one). +- +- // Decide whether PRE is profitable for this load. +- unsigned NumUnavailablePreds = UnavailableBlocks.size(); +- assert(NumUnavailablePreds != 0 && +- "Fully available value should already be eliminated!"); +- +- // If this load is unavailable in multiple predecessors, reject it. +- // FIXME: If we could restructure the CFG, we could make a common pred with +- // all the preds that don't have an available LI and insert a new load into +- // that one block. +- if (NumUnavailablePreds > 2) +- return false; +- +- // Check if the load can safely be moved to all the unavailable predecessors. +- bool CanDoPRE = true; +- +- SmallVector NewInsts; +- // FIXME: Insert Can Be Avail stuff here +- if (!CanDoPRE) { +- while (!NewInsts.empty()) { +- Instruction *I = NewInsts.pop_back_val(); +- if (MD) +- MD->removeInstruction(I); +- I->eraseFromParent(); +- } +- // HINT: Don't revert the edge-splitting as following transformation may +- // also need to split these critical edges. +- return Changed; +- } +- +- // Okay, we can eliminate this load by inserting a reload in the predecessor +- // and using PHI construction to get the value in the other predecessors, do +- // it. +- DEBUG(dbgs() << "GVN REMOVING PRE INSTRUCTION: " << *I << '\n'); +- DEBUG(if (!NewInsts.empty()) dbgs() << "INSERTED " << NewInsts.size() +- << " INSTS: " << *NewInsts.back() +- << '\n'); +- +- // Assign value numbers to the new instructions. +- for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) { +- valueNumberNewInstruction(NewInsts[i]); +- } +- +- for (auto &UnavailInfo : UnavailableBlocks) +- if (!UnavailInfo.second) +- return false; +- +- for (auto &UnavailInfo : UnavailableBlocks) { +- BasicBlock *UnavailablePred = UnavailInfo.first; +- +- Value *NewVal = regenerateExpression(UnavailInfo.second, UnavailablePred); +- valueNumberNewInstruction(NewVal); +- if (Instruction *NewInst = dyn_cast(NewVal)) { +- NewInst->setDebugLoc(I->getDebugLoc()); +- LoadInst *LI = dyn_cast(I); +- LoadInst *NewLoadInst = dyn_cast(NewInst); +- // Transfer load tags +- if (LI && NewLoadInst) { +- AAMDNodes Tags; +- LI->getAAMetadata(Tags); +- if (Tags) +- NewLoadInst->setAAMetadata(Tags); +- if (MD) +- MD->invalidateCachedPointerInfo(NewLoadInst->getPointerOperand()); +- } +- } +- +- // Add the newly created load. +- ValuesPerBlock.push_back( +- AvailableValueInBlock::get(UnavailablePred, NewVal)); +- DEBUG(dbgs() << "GVN INSERTED " << *NewVal << '\n'); +- } +- +- // Perform PHI construction. +- Value *V = constructSSAForSet(I, ValuesPerBlock); +- I->replaceAllUsesWith(V); +- PREValueForwarding[I] = V; +- // Value of V is the same as the value of the old instruction (or it would not +- // be redundant) +- valueNumberNewInstructionToValue(V, I); +- +- ValueToExpression[I] = ValueToExpression[V]; +- // I no longer exists +- ValueToClass.lookup(I)->Members.erase(I); +- if (isa(V)) +- V->takeName(I); +- if (MD && V->getType()->getScalarType()->isPointerTy()) +- MD->invalidateCachedPointerInfo(V); +- markInstructionForDeletion(I); +- ++NumPRELoad; +- return true; +-} +- +-void NewGVN::analyzeAvailability(Instruction *I, +- AvailValInBlkVect &ValuesPerBlock, +- UnavailBlkVect &UnavailableBlocks) { +- for (auto P : PredCache.get(I->getParent())) { +- if (!ReachableBlocks.count(P)) { +- ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(P)); +- continue; +- } +- const Expression *Before = ValueToExpression.lookup(I); +- // This can happen if it was too complex or complicated an expression for +- // GVN to analyze +- if (!Before) +- return; +- const Expression *E = phiTranslateExpression(Before, I->getParent(), P, I); +- Value *V = nullptr; +- if (E) { +- E = trySimplifyPREExpression(E, P); +- DEBUG(dbgs() << "After simplification, expression is " << *E << "\n"); +- +- V = findPRELeader(E, P, I); +- // If we got a store, we can rip out the source value. +- // Otherwise, void type stuff is not acceptable +- if (V && V->getType()->isVoidTy()) { +- if (StoreInst *SI = dyn_cast(V)) +- V = SI->getValueOperand(); +- else +- V = nullptr; +- } +- } +- +- if (!V) +- UnavailableBlocks.push_back({P, E}); +- else +- ValuesPerBlock.push_back(AvailableValueInBlock::get(P, V)); +- } +-} +- +-bool NewGVN::performPREOnClass(CongruenceClass *CC) { +- +- // FIXME: Only do some +- bool Changed = false; +- AvailValInBlkVect ValuesPerBlock; +- +- for (auto M : CC->Members) { +- if (isa(M) || M->getType()->isVoidTy()) +- continue; +- if (!isa(M) && !isa(M)) +- continue; +- +- if (Instruction *I = dyn_cast(M)) { +- UnavailBlkVect UnavailableBlocks; +- BasicBlock *IBlock = I->getParent(); +- if (PredCache.size(IBlock) == 0) +- continue; +- analyzeAvailability(I, ValuesPerBlock, UnavailableBlocks); +- +- if ((UnavailableBlocks.size() + ValuesPerBlock.size()) != +- PredCache.size(IBlock)) +- continue; +-#if 1 +- // If we have no predecessors that produce a known value for this load, +- // exit early. +- if (ValuesPerBlock.empty()) +- continue; +-#endif +- // Step 3: Eliminate fully redundancy. +- // +- // If all of the instructions we depend on produce a known value for this +- // load, then it is fully redundant and we can use PHI insertion to +- // compute +- // its value. Insert PHIs and remove the fully redundant value now. +- if (UnavailableBlocks.empty()) { +- DEBUG(dbgs() << "GVN REMOVING INSTRUCTION: " << *I << '\n'); +- +- // Perform PHI construction. +- Value *V = constructSSAForSet(I, ValuesPerBlock); +- I->replaceAllUsesWith(V); +- PREValueForwarding[I] = V; +- valueNumberNewInstruction(V); +- ValueToClass.lookup(I)->Members.erase(I); +- ValueToExpression[I] = ValueToExpression[V]; +- if (isa(V)) +- V->takeName(I); +- if (MD && V->getType()->getScalarType()->isPointerTy()) +- MD->invalidateCachedPointerInfo(V); +- markInstructionForDeletion(I); +- ++NumNewGVNPRE; +- Changed = true; +- continue; +- } +- // Step 4: Eliminate partial redundancy. +- if (!EnablePRE || !EnableLoadPRE) +- continue; +- Changed |= performPRE(I, ValuesPerBlock, UnavailableBlocks); +- } +- } +- return Changed; +-} +- +-// Find a leader for OP in BB. +-Value *NewGVN::findPRELeader(Value *Op, const BasicBlock *BB, +- const Value *MustDominate) { +- if (alwaysAvailable(Op)) +- return Op; +- +- CongruenceClass *CC = ValueToClass[Op]; +- if (!CC || CC == InitialClass) +- return 0; +- +- if (CC->RepLeader && alwaysAvailable(CC->RepLeader)) +- return CC->RepLeader; +- for (auto M : CC->Members) { +- if (M == MustDominate) +- continue; +- if (Instruction *I = dyn_cast(M)) +- if (DT->dominates(I->getParent(), BB)) +- return I; +- } +- return 0; +-} +- +-// Find a leader for OP in BB. +-Value *NewGVN::findPRELeader(const Expression *E, const BasicBlock *BB, +- const Value *MustDominate) { +- if (const ConstantExpression *CE = dyn_cast(E)) +- return CE->getConstantValue(); +- else if (const VariableExpression *VE = dyn_cast(E)) +- return findPRELeader(VE->getVariableValue(), BB, MustDominate); +- +- DEBUG(dbgs() << "Hash value was " << E->getHashValue() << "\n"); +- +- const auto Result = ExpressionToClass.find(E); +- if (Result == ExpressionToClass.end()) +- return 0; +- +- CongruenceClass *CC = Result->second; +- +- if (!CC || CC == InitialClass) +- return 0; +- +- if (CC->RepLeader && +- (isa(CC->RepLeader) || isa(CC->RepLeader) || +- isa(CC->RepLeader))) +- return CC->RepLeader; +- +- for (auto M : CC->Members) { +- if (M == MustDominate) +- continue; +- if (Instruction *I = dyn_cast(M)) +- if (DT->dominates(I->getParent(), BB)) +- return I; +- } +- return 0; +-} +- +-MemoryAccess *NewGVN::phiTranslateMemoryAccess(MemoryAccess *MA, +- const BasicBlock *Pred) { +- if (MemoryPhi *MP = dyn_cast(MA)) { +- for (const auto &A : MP->operands()) { +- if (MP->getIncomingBlock(A) == Pred) { +- return cast(A); +- } +- } +- // We should have found something +- return nullptr; +- } +- return MA; +-} +- +-static Value *phiTranslateValue(Value *Incoming, const BasicBlock *Pred) { +- // Back translate if defined by a phi in this block +- PHINode *P = dyn_cast(Incoming); +- int Index = P->getBasicBlockIndex(Pred); +- if (Index != -1) { +- return P->getIncomingValue(Index); +- } +- // Not defined by a phi in this block +- return Incoming; +-} +- +-bool NewGVN::phiTranslateArguments(const BasicExpression *From, +- BasicExpression *To, const BasicBlock *Pred, +- const Value *MustDominate) { +- for (unsigned i = 0, e = From->getNumOperands(); i != e; ++i) { +- Value *Arg = From->getOperand(i); +- Value *Forwarded = PREValueForwarding.lookup(Arg); +- if (Forwarded) +- Arg = Forwarded; +- // Fold in immediate adds +- bool processedAdd = false; +- if (Instruction *I = dyn_cast(Arg)) { +- if (I->getOpcode() == Instruction::Add && +- isa(I->getOperand(0)) && +- isa(I->getOperand(1))) { +- +- Constant *RHS = cast(I->getOperand(1)); +- bool isNSW = cast(I)->hasNoSignedWrap(); +- bool isNUW = cast(I)->hasNoUnsignedWrap(); +- Value *LHS = phiTranslateValue(I->getOperand(0), Pred); +- // If the PHI translated LHS is an add of a constant, fold the +- // immediates. +- if (BinaryOperator *BOp = dyn_cast(LHS)) +- if (BOp->getOpcode() == Instruction::Add) +- if (ConstantInt *CI = dyn_cast(BOp->getOperand(1))) { +- LHS = BOp->getOperand(0); +- RHS = ConstantExpr::getAdd(RHS, CI); +- isNSW = isNUW = false; +- } +- // FIXME: Handle NSW +- const Expression *BinExpr = createBinaryExpression( +- I->getOpcode(), I->getType(), LHS, RHS, Pred); +- Value *Leader = findPRELeader(BinExpr, Pred, MustDominate); +- if (Leader) { +- Arg = Leader; +- processedAdd = true; +- } +- } +- } +- // If arg is still a phi node, and wasn't processed by the add processing +- // above, translate it now +- if (!processedAdd && isa(Arg)) +- Arg = phiTranslateValue(Arg, Pred); +- +- Value *Leader = findPRELeader(Arg, Pred, MustDominate); +- if (Leader == nullptr) +- return false; +- To->setOperand(i, Leader); +- } +- return true; +-} +-const Expression *NewGVN::phiTranslateExpression(const Expression *E, +- BasicBlock *Curr, +- BasicBlock *Pred, +- const Value *MustDominate) { +- const Expression *ResultExpr = nullptr; +- if (const LoadExpression *LE = dyn_cast(E)) { +- +- MemoryAccess *MA = phiTranslateMemoryAccess(LE->getDefiningAccess(), Pred); +- if (!MA) +- return nullptr; +- LoadExpression *NLE = createLoadExpression(LE->getType(), LE->getOperand(0), +- LE->getLoadInst(), MA, Pred); +- if (!phiTranslateArguments(LE, NLE, Pred, MustDominate)) +- return nullptr; +- LoadInst *LI = LE->getLoadInst(); +- MemoryLocation Loc; +- if (LI) { +- Loc = MemoryLocation::get(LI); +- Loc = Loc.getWithNewPtr(NLE->getOperand(0)); +- } else { +- Loc.Ptr = NLE->getOperand(0); +- } +- MA = MSSAWalker->getClobberingMemoryAccess(MA, Loc); +- NLE->setDefiningAccess(MA); +- ResultExpr = NLE; +- } else if (const BasicExpression *BE = dyn_cast(E)) { +- BasicExpression *NBE = +- new (ExpressionAllocator) BasicExpression(BE->getNumOperands()); +- NBE->setType(BE->getType()); +- NBE->setOpcode(BE->getOpcode()); +- NBE->allocateOperands(ArgRecycler, ExpressionAllocator); +- for (unsigned i = 0, e = BE->getNumOperands(); i != e; ++i) +- NBE->ops_push_back(nullptr); +- if (!phiTranslateArguments(BE, NBE, Pred, MustDominate)) +- return nullptr; +- ResultExpr = NBE; +- } +- +- return ResultExpr; +-} +- +-const Expression *NewGVN::trySimplifyPREExpression(const Expression *E, +- const BasicBlock *B) { +- const Expression *ResultExpr = E; +- // This must come first, because LoadExpression's are BasicExpressions +- if (const LoadExpression *LE = dyn_cast(E)) { +- MemoryDef *MD = dyn_cast(LE->getDefiningAccess()); +- if (MD && !MSSA->isLiveOnEntryDef(MD)) { +- const Expression *Temp = performSymbolicLoadCoercion( +- LE->getType(), LE->getOperand(0), LE->getLoadInst(), +- MD->getMemoryInst(), MD, B); +- if (Temp) +- ResultExpr = Temp; +- } +- } else if (const BasicExpression *BE = dyn_cast(E)) { +- unsigned Opcode = BE->getOpcode(); +- if (Instruction::isBinaryOp(Opcode)) { +- Value *V = SimplifyBinOp(Opcode, BE->getOperand(0), BE->getOperand(1), +- *DL, TLI, DT, AC); +- if (V) { +- if (Constant *C = dyn_cast(V)) +- ResultExpr = createConstantExpression(C, false); +- else if (alwaysAvailable(V)) +- ResultExpr = createVariableExpression(V, false); +- } +- } else if (Opcode == Instruction::GetElementPtr) { +- Value *V = SimplifyGEPInst(BE->getType(), +- makeArrayRef(BE->ops_begin(), BE->ops_end()), +- *DL, TLI, DT, AC); +- if (V) { +- if (Constant *C = dyn_cast(V)) +- ResultExpr = createConstantExpression(C, false); +- else if (alwaysAvailable(V)) +- ResultExpr = createVariableExpression(V, false); +- } +- } +- } +- return ResultExpr; +-} Index: test/Transforms/NewGVN/completeness.ll =================================================================== --- /dev/null +++ test/Transforms/NewGVN/completeness.ll @@ -0,0 +1,389 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +define i32 @test1(i32, i8**) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP0:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP5:%.*]] +; CHECK: br label [[TMP6:%.*]] +; CHECK: br label [[TMP6]] +; CHECK: [[TMP7:%.*]] = phi i32 [ 75, [[TMP4]] ], [ 105, [[TMP5]] ] +; CHECK-NEXT: [[DOT0:%.*]] = phi i32 [ 5, [[TMP4]] ], [ 7, [[TMP5]] ] +; CHECK-NEXT: ret i32 [[TMP7]] +; + %3 = icmp ne i32 %0, 0 + br i1 %3, label %4, label %5 + +;