Index: lib/Transforms/Scalar/NewGVN.cpp =================================================================== --- lib/Transforms/Scalar/NewGVN.cpp +++ lib/Transforms/Scalar/NewGVN.cpp @@ -81,6 +81,10 @@ STATISTIC(NumGVNPhisAllSame, "Number of PHIs whos arguments are all the same"); STATISTIC(NumGVNMaxIterations, "Maximum Number of iterations it took to converge GVN"); +STATISTIC(NumGVNLeaderChanges, "Number of leader changes"); +STATISTIC(NumGVNSortedLeaderChanges, "Number of sorted leader changes"); +STATISTIC(NumGVNAvoidedSortedLeaderChanges, + "Number of avoided sorted leader changes"); //===----------------------------------------------------------------------===// // GVN Pass @@ -139,6 +143,10 @@ // This is used so we can detect store equivalence changes properly. int StoreCount = 0; + // The most dominating leader after our current leader, because the member set + // is not sorted and is expensive to keep sorted all the time. + std::pair NextLeader = {nullptr, ~0}; + explicit CongruenceClass(unsigned ID) : ID(ID) {} CongruenceClass(unsigned ID, Value *Leader, const Expression *E) : ID(ID), RepLeader(Leader), DefiningExpr(E) {} @@ -320,8 +328,8 @@ // Templated to allow them to work both on BB's and BB-edges. template Value *lookupOperandLeader(Value *, const User *, const T &) const; - void performCongruenceFinding(Value *, const Expression *); - void moveValueToNewCongruenceClass(Value *, CongruenceClass *, + void performCongruenceFinding(Instruction *, const Expression *); + void moveValueToNewCongruenceClass(Instruction *, CongruenceClass *, CongruenceClass *); // Reachability handling. void updateReachableEdge(BasicBlock *, BasicBlock *); @@ -1056,20 +1064,41 @@ // Move a value, currently in OldClass, to be part of NewClass // Update OldClass for the move (including changing leaders, etc) -void NewGVN::moveValueToNewCongruenceClass(Value *V, CongruenceClass *OldClass, +void NewGVN::moveValueToNewCongruenceClass(Instruction *I, + CongruenceClass *OldClass, CongruenceClass *NewClass) { - DEBUG(dbgs() << "New congruence class for " << V << " is " << NewClass->ID + DEBUG(dbgs() << "New congruence class for " << I << " is " << NewClass->ID << "\n"); - OldClass->Members.erase(V); - NewClass->Members.insert(V); - if (isa(V)) { + + if (I == OldClass->NextLeader.first) + OldClass->NextLeader = {nullptr, ~0}; + + // The new instruction and new class leader may either be siblings in the + // dominator tree, or the new class leader should dominate the new member + // instruction. We simply check that the member instruction does not properly + // dominate the new class leader. + assert( + !isa(NewClass->RepLeader) || !NewClass->RepLeader || + I == NewClass->RepLeader || + !DT->properlyDominates( + I->getParent(), cast(NewClass->RepLeader)->getParent())); + + if (NewClass->RepLeader != I) { + auto DFSNum = InstrDFS.lookup(I); + if (DFSNum < NewClass->NextLeader.second) + NewClass->NextLeader = {I, DFSNum}; + } + + OldClass->Members.erase(I); + NewClass->Members.insert(I); + if (isa(I)) { --OldClass->StoreCount; assert(OldClass->StoreCount >= 0); ++NewClass->StoreCount; assert(NewClass->StoreCount > 0); } - ValueToClass[V] = NewClass; + ValueToClass[I] = NewClass; // See if we destroyed the class or need to swap leaders. if (OldClass->Members.empty() && OldClass != InitialClass) { if (OldClass->DefiningExpr) { @@ -1078,25 +1107,48 @@ << " from table\n"); ExpressionToClass.erase(OldClass->DefiningExpr); } - } else if (OldClass->RepLeader == V) { + } else if (OldClass->RepLeader == I) { // When the leader changes, the value numbering of // everything may change due to symbolization changes, so we need to // reprocess. - OldClass->RepLeader = *(OldClass->Members.begin()); + DEBUG(dbgs() << "Leader change!\n"); + ++NumGVNLeaderChanges; + // We don't need to sort members if there is only 1, and we don't care about + // sorting the initial class because everything either gets out of it or is + // unreachable. + if (OldClass->Members.size() == 1 || OldClass == InitialClass) { + OldClass->RepLeader = *(OldClass->Members.begin()); + } else if (OldClass->NextLeader.first) { + ++NumGVNAvoidedSortedLeaderChanges; + OldClass->RepLeader = OldClass->NextLeader.first; + OldClass->NextLeader = {nullptr, ~0}; + } else { + ++NumGVNSortedLeaderChanges; + // TODO: If this ends up to slow, we can maintain a dual structure for + // member testing/insertion, or keep things mostly sorted, and sort only + // here, or .... + std::pair MinDFS = {nullptr, ~0}; + for (const auto X : OldClass->Members) { + auto DFSNum = InstrDFS.lookup(X); + if (DFSNum < MinDFS.second) + MinDFS = {X, DFSNum}; + } + OldClass->RepLeader = MinDFS.first; + } markLeaderChangeTouched(OldClass); } } // Perform congruence finding on a given value numbering expression. -void NewGVN::performCongruenceFinding(Value *V, const Expression *E) { - ValueToExpression[V] = E; +void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) { + ValueToExpression[I] = E; // This is guaranteed to return something, since it will at least find // INITIAL. - CongruenceClass *VClass = ValueToClass[V]; - assert(VClass && "Should have found a vclass"); + CongruenceClass *IClass = ValueToClass[I]; + assert(IClass && "Should have found a IClass"); // Dead classes should have been eliminated from the mapping. - assert(!VClass->Dead && "Found a dead class"); + assert(!IClass->Dead && "Found a dead class"); CongruenceClass *EClass; if (const auto *VE = dyn_cast(E)) { @@ -1118,13 +1170,13 @@ NewClass->RepLeader = lookupOperandLeader(SI->getValueOperand(), SI, SI->getParent()); } else { - NewClass->RepLeader = V; + NewClass->RepLeader = I; } assert(!isa(E) && "VariableExpression should have been handled already"); EClass = NewClass; - DEBUG(dbgs() << "Created new congruence class for " << *V + DEBUG(dbgs() << "Created new congruence class for " << *I << " using expression " << *E << " at " << NewClass->ID << " and leader " << *(NewClass->RepLeader) << "\n"); DEBUG(dbgs() << "Hash value was " << E->getHashValue() << "\n"); @@ -1140,36 +1192,32 @@ assert(!EClass->Dead && "We accidentally looked up a dead class"); } } - bool ClassChanged = VClass != EClass; - bool LeaderChanged = LeaderChanges.erase(V); + bool ClassChanged = IClass != EClass; + bool LeaderChanged = LeaderChanges.erase(I); if (ClassChanged || LeaderChanged) { DEBUG(dbgs() << "Found class " << EClass->ID << " for expression " << E << "\n"); if (ClassChanged) - - moveValueToNewCongruenceClass(V, VClass, EClass); - - - markUsersTouched(V); - if (auto *I = dyn_cast(V)) { - if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) { - // If this is a MemoryDef, we need to update the equivalence table. If - // we determined the expression is congruent to a different memory - // state, use that different memory state. If we determined it didn't, - // we update that as well. Right now, we only support store - // expressions. - if (!isa(MA) && isa(E) && - EClass->Members.size() != 1) { - auto *DefAccess = cast(E)->getDefiningAccess(); - setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr); - } else { - setMemoryAccessEquivTo(MA, nullptr); - } - markMemoryUsersTouched(MA); + moveValueToNewCongruenceClass(I, IClass, EClass); + + markUsersTouched(I); + if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) { + // If this is a MemoryDef, we need to update the equivalence table. If + // we determined the expression is congruent to a different memory + // state, use that different memory state. If we determined it didn't, + // we update that as well. Right now, we only support store + // expressions. + if (!isa(MA) && isa(E) && + EClass->Members.size() != 1) { + auto *DefAccess = cast(E)->getDefiningAccess(); + setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr); + } else { + setMemoryAccessEquivTo(MA, nullptr); } + markMemoryUsersTouched(MA); } - } else if (StoreInst *SI = dyn_cast(V)) { + } else if (StoreInst *SI = dyn_cast(I)) { // There is, sadly, one complicating thing for stores. Stores do not // produce values, only consume them. However, in order to make loads and // stores value number the same, we ignore the value operand of the store. @@ -1511,7 +1559,8 @@ // Verify the that the memory equivalence table makes sense relative to the // congruence classes. Note that this checking is not perfect, and is currently -// subject to very rare false negatives. It is only useful for testing/debugging. +// subject to very rare false negatives. It is only useful for testing and +// debugging. void NewGVN::verifyMemoryCongruency() const { // Anything equivalent in the memory access table should be in the same // congruence class. @@ -1540,11 +1589,11 @@ auto *SecondMUD = dyn_cast(KV.second); if (FirstMUD && SecondMUD) assert((singleReachablePHIPath(FirstMUD, SecondMUD) || - ValueToClass.lookup(FirstMUD->getMemoryInst()) == - ValueToClass.lookup(SecondMUD->getMemoryInst())) && - "The instructions for these memory operations should have " - "been in the same congruence class or reachable through" - "a single argument phi"); + ValueToClass.lookup(FirstMUD->getMemoryInst()) == + ValueToClass.lookup(SecondMUD->getMemoryInst())) && + "The instructions for these memory operations should have " + "been in the same congruence class or reachable through" + "a single argument phi"); } else if (auto *FirstMP = dyn_cast(KV.first)) { // We can only sanely verify that MemoryDefs in the operand list all have Index: test/Transforms/NewGVN/pr31613.ll =================================================================== --- /dev/null +++ test/Transforms/NewGVN/pr31613.ll @@ -0,0 +1,136 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +;; Both of these tests are tests of phi nodes that end up all equivalent to each other +;; Without proper leader ordering, we will end up cycling the leader between all of them and never converge. + +define void @foo() { +; CHECK-LABEL: @foo( +; CHECK-NEXT: bb: +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ 1, [[BB18:%.*]] ] +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB4:%.*]] +; CHECK: bb4: +; CHECK-NEXT: br i1 undef, label [[BB18]], label [[BB7:%.*]] +; CHECK: bb7: +; CHECK-NEXT: br label [[BB9:%.*]] +; CHECK: bb9: +; CHECK-NEXT: br i1 undef, label [[BB2]], label [[BB11:%.*]] +; CHECK: bb11: +; CHECK-NEXT: br i1 undef, label [[BB16:%.*]], label [[BB14:%.*]] +; CHECK: bb14: +; CHECK-NEXT: br label [[BB4]] +; CHECK: bb16: +; CHECK-NEXT: br label [[BB7]] +; CHECK: bb18: +; CHECK-NEXT: br label [[BB1]] +; +bb: + br label %bb1 + +bb1: ; preds = %bb18, %bb + %tmp = phi i32 [ 0, %bb ], [ 1, %bb18 ] + br label %bb2 + +bb2: ; preds = %bb9, %bb1 + %tmp3 = phi i32 [ %tmp, %bb1 ], [ %tmp8, %bb9 ] + br label %bb4 + +bb4: ; preds = %bb14, %bb2 + %tmp5 = phi i32 [ %tmp3, %bb2 ], [ %tmp15, %bb14 ] + br i1 undef, label %bb18, label %bb7 + +bb7: ; preds = %bb16, %bb4 + %tmp8 = phi i32 [ %tmp17, %bb16 ], [ %tmp5, %bb4 ] + br label %bb9 + +bb9: ; preds = %bb7 + br i1 undef, label %bb2, label %bb11 + +bb11: ; preds = %bb9 + br i1 undef, label %bb16, label %bb14 + +bb14: ; preds = %bb11 + %tmp15 = phi i32 [ %tmp8, %bb11 ] + br label %bb4 + +bb16: ; preds = %bb11 + %tmp17 = phi i32 [ %tmp8, %bb11 ] + br label %bb7 + +bb18: ; preds = %bb4 + br label %bb1 +} +; ModuleID = 'pr31613-3.ll' + +%struct.a = type {} +%struct.b = type {} + +declare void @c.d.p(i64, i8*) + +define void @e() { +; CHECK-LABEL: @e( +; CHECK-NEXT: [[F:%.*]] = alloca i32 +; CHECK-NEXT: store i32 undef, i32* [[F]], !g !0 +; CHECK-NEXT: br label [[H:%.*]] +; CHECK: h: +; CHECK-NEXT: call void @c.d.p(i64 8, i8* undef) +; CHECK-NEXT: [[I:%.*]] = load i32, i32* [[F]] +; CHECK-NEXT: [[J:%.*]] = load i32, i32* null +; CHECK-NEXT: [[K:%.*]] = icmp eq i32 [[I]], [[J]] +; CHECK-NEXT: br i1 [[K]], label [[L:%.*]], label [[Q:%.*]] +; CHECK: l: +; CHECK-NEXT: br label [[R:%.*]] +; CHECK: q: +; CHECK-NEXT: [[M:%.*]] = load [[STRUCT_A*:%.*]], [[STRUCT_A**:%.*]] null +; CHECK-NEXT: br label [[R]] +; CHECK: r: +; CHECK-NEXT: switch i32 undef, label [[N:%.*]] [ +; CHECK-NEXT: i32 0, label [[S:%.*]] +; CHECK-NEXT: ] +; CHECK: s: +; CHECK-NEXT: store i32 undef, i32* [[F]], !g !0 +; CHECK-NEXT: br label [[H]] +; CHECK: n: +; CHECK-NEXT: [[O:%.*]] = load [[STRUCT_A*]], [[STRUCT_A**]] null +; CHECK-NEXT: ret void +; + %f = alloca i32 + store i32 undef, i32* %f, !g !0 + br label %h + +h: ; preds = %s, %0 + call void @c.d.p(i64 8, i8* undef) + %i = load i32, i32* %f + %j = load i32, i32* null + %k = icmp eq i32 %i, %j + br i1 %k, label %l, label %q + +l: ; preds = %h + br label %r + +q: ; preds = %h + %m = load %struct.a*, %struct.a** null + %1 = bitcast %struct.a* %m to %struct.b* + br label %r + +r: ; preds = %q, %l + switch i32 undef, label %n [ + i32 0, label %s + ] + +s: ; preds = %r + store i32 undef, i32* %f, !g !0 + br label %h + +n: ; preds = %r + %o = load %struct.a*, %struct.a** null + %2 = bitcast %struct.a* %o to %struct.b* + ret void +} + +!0 = !{}