Index: llvm/trunk/lib/Transforms/Scalar/NewGVN.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/NewGVN.cpp +++ llvm/trunk/lib/Transforms/Scalar/NewGVN.cpp @@ -108,6 +108,7 @@ "Number of avoided sorted leader changes"); STATISTIC(NumGVNNotMostDominatingLeader, "Number of times a member dominated it's new classes' leader"); +STATISTIC(NumGVNDeadStores, "Number of redundant/dead stores eliminated"); //===----------------------------------------------------------------------===// // GVN Pass @@ -362,6 +363,7 @@ CongruenceClass *); bool setMemoryAccessEquivTo(MemoryAccess *From, CongruenceClass *To); MemoryAccess *lookupMemoryAccessEquiv(MemoryAccess *) const; + bool isMemoryAccessTop(const MemoryAccess *) const; // Reachability handling. void updateReachableEdge(BasicBlock *, BasicBlock *); @@ -371,8 +373,10 @@ // Elimination. struct ValueDFS; - void convertDenseToDFSOrdered(CongruenceClass::MemberSet &, + void convertDenseToDFSOrdered(const CongruenceClass::MemberSet &, SmallVectorImpl &); + void convertDenseToLoadsAndStores(const CongruenceClass::MemberSet &, + SmallVectorImpl &); bool eliminateInstructions(Function &); void replaceInstruction(Instruction *, Value *); @@ -733,6 +737,13 @@ return MA; } +// Return true if the MemoryAccess is really equivalent to everything. This is +// equivalent to the lattice value "TOP" in most lattices. This is the initial +// state of all memory accesses. +bool NewGVN::isMemoryAccessTop(const MemoryAccess *MA) const { + return MemoryAccessToClass.lookup(MA) == InitialClass; +} + LoadExpression *NewGVN::createLoadExpression(Type *LoadType, Value *PointerOp, LoadInst *LI, MemoryAccess *DA, const BasicBlock *B) { @@ -786,13 +797,17 @@ // are simple and avoid value numbering them. auto *SI = cast(I); MemoryAccess *StoreAccess = MSSA->getMemoryAccess(SI); - // See if we are defined by a previous store expression, it already has a - // value, and it's the same value as our current store. FIXME: Right now, we - // only do this for simple stores, we should expand to cover memcpys, etc. + // Get the expression, if any, for the RHS of the MemoryDef. + MemoryAccess *StoreRHS = lookupMemoryAccessEquiv( + cast(StoreAccess)->getDefiningAccess()); + // If we are defined by ourselves, use the live on entry def. + if (StoreRHS == StoreAccess) + StoreRHS = MSSA->getLiveOnEntryDef(); + if (SI->isSimple()) { - // Get the expression, if any, for the RHS of the MemoryDef. - MemoryAccess *StoreRHS = lookupMemoryAccessEquiv( - cast(StoreAccess)->getDefiningAccess()); + // See if we are defined by a previous store expression, it already has a + // value, and it's the same value as our current store. FIXME: Right now, we + // only do this for simple stores, we should expand to cover memcpys, etc. const Expression *OldStore = createStoreExpression(SI, StoreRHS, B); CongruenceClass *CC = ExpressionToClass.lookup(OldStore); // Basically, check if the congruence class the store is in is defined by a @@ -803,8 +818,18 @@ if (CC && CC->RepStoredValue == lookupOperandLeader(SI->getValueOperand(), SI, B)) return createStoreExpression(SI, StoreRHS, B); + // Also check if our value operand is defined by a load of the same memory + // location, and the memory state is the same as it was then + // (otherwise, it could have been overwritten later. See test32 in + // transforms/DeadStoreElimination/simple.ll) + if (LoadInst *LI = dyn_cast(SI->getValueOperand())) { + if ((lookupOperandLeader(LI->getPointerOperand(), LI, LI->getParent()) == + lookupOperandLeader(SI->getPointerOperand(), SI, B)) && + (lookupMemoryAccessEquiv( + MSSA->getMemoryAccess(LI)->getDefiningAccess()) == StoreRHS)) + return createVariableExpression(LI); + } } - return createStoreExpression(SI, StoreAccess, B); } @@ -1531,14 +1556,20 @@ void NewGVN::valueNumberMemoryPhi(MemoryPhi *MP) { // If all the arguments are the same, the MemoryPhi has the same value as the // argument. - // Filter out unreachable blocks from our operands. + // Filter out unreachable blocks and self phis from our operands. auto Filtered = make_filter_range(MP->operands(), [&](const Use &U) { - return ReachableBlocks.count(MP->getIncomingBlock(U)); + return lookupMemoryAccessEquiv(cast(U)) != MP && + !isMemoryAccessTop(cast(U)) && + ReachableBlocks.count(MP->getIncomingBlock(U)); }); - - assert(Filtered.begin() != Filtered.end() && - "We should not be processing a MemoryPhi in a completely " - "unreachable block"); + // If all that is left is nothing, our memoryphi is undef. We keep it as + // InitialClass. Note: The only case this should happen is if we have at + // least one self-argument. + if (Filtered.begin() == Filtered.end()) { + if (setMemoryAccessEquivTo(MP, InitialClass)) + markMemoryUsersTouched(MP); + return; + } // Transform the remaining operands into operand leaders. // FIXME: mapped_iterator should have a range version. @@ -1951,8 +1982,10 @@ } }; +// This function converts the set of members for a congruence class from values, +// to sets of defs and uses with associated DFS info. void NewGVN::convertDenseToDFSOrdered( - CongruenceClass::MemberSet &Dense, + const CongruenceClass::MemberSet &Dense, SmallVectorImpl &DFSOrderedSet) { for (auto D : Dense) { // First add the value. @@ -1961,7 +1994,6 @@ // we should only be left with instructions as members. assert(BB && "Should have figured out a basic block for value"); ValueDFS VD; - DomTreeNode *DomNode = DT->getNode(BB); VD.DFSIn = DomNode->getDFSNumIn(); VD.DFSOut = DomNode->getDFSNumOut(); @@ -1974,7 +2006,6 @@ VD.Val = D; } - // If it's an instruction, use the real local dfs number. if (auto *I = dyn_cast(D)) VD.LocalNum = InstrDFS.lookup(I); else @@ -2013,6 +2044,32 @@ } } +// This function converts the set of members for a congruence class from values, +// to the set of defs for loads and stores, with associated DFS info. +void NewGVN::convertDenseToLoadsAndStores( + const CongruenceClass::MemberSet &Dense, + SmallVectorImpl &LoadsAndStores) { + for (auto D : Dense) { + if (!isa(D) && !isa(D)) + continue; + + BasicBlock *BB = getBlockForValue(D); + ValueDFS VD; + DomTreeNode *DomNode = DT->getNode(BB); + VD.DFSIn = DomNode->getDFSNumIn(); + VD.DFSOut = DomNode->getDFSNumOut(); + VD.Val = D; + + // If it's an instruction, use the real local dfs number. + if (auto *I = dyn_cast(D)) + VD.LocalNum = InstrDFS.lookup(I); + else + llvm_unreachable("Should have been an instruction"); + + LoadsAndStores.emplace_back(VD); + } +} + static void patchReplacementInstruction(Instruction *I, Value *Repl) { // Patch the replacement so that it is not more restrictive than the value // being replaced. @@ -2167,6 +2224,10 @@ } for (CongruenceClass *CC : CongruenceClasses) { + // Track the equivalent store info so we can decide whether to try + // dead store elimination. + SmallVector PossibleDeadStores; + // FIXME: We should eventually be able to replace everything still // in the initial class with undef, as they should be unreachable. // Right now, initial still contains some things we skip value @@ -2183,15 +2244,12 @@ if (alwaysAvailable(Leader)) { SmallPtrSet MembersLeft; for (auto M : CC->Members) { - Value *Member = M; - // Void things have no uses we can replace. if (Member == CC->RepLeader || Member->getType()->isVoidTy()) { MembersLeft.insert(Member); continue; } - DEBUG(dbgs() << "Found replacement " << *(Leader) << " for " << *Member << "\n"); // Due to equality propagation, these may not always be @@ -2208,7 +2266,6 @@ } } CC->Members.swap(MembersLeft); - } else { DEBUG(dbgs() << "Eliminating in congruence class " << CC->ID << "\n"); // If this is a singleton, we can skip it. @@ -2226,20 +2283,15 @@ // Sort the whole thing. std::sort(DFSOrderedSet.begin(), DFSOrderedSet.end()); - for (auto &VD : DFSOrderedSet) { int MemberDFSIn = VD.DFSIn; int MemberDFSOut = VD.DFSOut; Value *Member = VD.Val; Use *MemberUse = VD.U; - if (Member) { - // We ignore void things because we can't get a value from them. - // FIXME: We could actually use this to kill dead stores that are - // dominated by equivalent earlier stores. - if (Member->getType()->isVoidTy()) - continue; - } + // We ignore void things because we can't get a value from them. + if (Member && Member->getType()->isVoidTy()) + continue; if (EliminationStack.empty()) { DEBUG(dbgs() << "Elimination Stack is empty\n"); @@ -2326,6 +2378,39 @@ MembersLeft.insert(Member); } CC->Members.swap(MembersLeft); + + // If we have possible dead stores to look at, try to eliminate them. + if (CC->StoreCount > 0) { + convertDenseToLoadsAndStores(CC->Members, PossibleDeadStores); + std::sort(PossibleDeadStores.begin(), PossibleDeadStores.end()); + ValueDFSStack EliminationStack; + for (auto &VD : PossibleDeadStores) { + int MemberDFSIn = VD.DFSIn; + int MemberDFSOut = VD.DFSOut; + Instruction *Member = cast(VD.Val); + if (EliminationStack.empty() || + !EliminationStack.isInScope(MemberDFSIn, MemberDFSOut)) { + // Sync to our current scope. + EliminationStack.popUntilDFSScope(MemberDFSIn, MemberDFSOut); + if (EliminationStack.empty()) { + EliminationStack.push_back(Member, MemberDFSIn, MemberDFSOut); + continue; + } + } + // We already did load elimination, so nothing to do here. + if (isa(Member)) + continue; + assert(!EliminationStack.empty()); + Instruction *Leader = cast(EliminationStack.back()); + assert(DT->dominates(Leader->getParent(), Member->getParent())); + // Member is dominater by Leader, and thus dead + DEBUG(dbgs() << "Marking dead store " << *Member + << " that is dominated by " << *Leader << "\n"); + markInstructionForDeletion(Member); + CC->Members.erase(Member); + ++NumGVNDeadStores; + } + } } return AnythingReplaced; Index: llvm/trunk/test/Transforms/NewGVN/basic-cyclic-opt.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/basic-cyclic-opt.ll +++ llvm/trunk/test/Transforms/NewGVN/basic-cyclic-opt.ll @@ -169,7 +169,6 @@ ; CHECK-NEXT: [[TMP10:%.*]] = icmp slt i32 [[I_0]], 30 ; CHECK-NEXT: br i1 [[TMP10]], label [[BB11:%.*]], label [[BB14:%.*]] ; CHECK: bb11: -; CHECK-NEXT: store i32 0, i32* [[TMP9]], align 4 ; CHECK-NEXT: br label [[BB14]] ; CHECK: bb14: ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 1 @@ -228,6 +227,53 @@ ret i32 %p.0 } +;; This is an irreducible test case that will cause a memoryphi node loop +;; in the two block. +;; it's equivalent to something like +;; *a = 0 +;; if (<....>) goto loopmiddle +;; loopstart: +;; loopmiddle: +;; load *a +;; *a = 0 +;; if (<....>) goto loopstart otherwise goto loopend +;; loopend: +;; load *a +;; add the results of the loads +;; return them +;; +;; Both loads should equal 0, but it requires being +;; completely optimistic about MemoryPhis, otherwise +;; we will not be able to see through the cycle. +define i8 @quux(i8* noalias %arg, i8* noalias %arg2) { +; CHECK-LABEL: @quux( +; CHECK-NEXT: bb: +; CHECK-NEXT: store i8 0, i8* [[ARG:%.*]] +; CHECK-NEXT: br i1 undef, label [[BB2:%.*]], label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB2]] +; CHECK: bb2: +; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB3:%.*]] +; CHECK: bb3: +; CHECK-NEXT: ret i8 0 +; +bb: + store i8 0, i8 *%arg + br i1 undef, label %bb2, label %bb1 + +bb1: ; preds = %bb2, %bb + br label %bb2 + +bb2: ; preds = %bb1, %bb + %tmp2 = load i8, i8* %arg + store i8 0, i8 *%arg + br i1 undef, label %bb1, label %bb3 + +bb3: ; preds = %bb2 + %tmp = load i8, i8* %arg + %tmp3 = add i8 %tmp, %tmp2 + ret i8 %tmp3 +} attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } !llvm.ident = !{!0, !0, !0} Index: llvm/trunk/test/Transforms/NewGVN/deadstore.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/deadstore.ll +++ llvm/trunk/test/Transforms/NewGVN/deadstore.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s + +;; Most of these are borrowed from transforms/DSE/simple.ll +;; NewGVN should be able to eliminate any stores of the same value that are actually redundnat. + +;; tmp5 is store of the same value to the same location as the load. +define void @test12({ i32, i32 }* %x) nounwind { +; CHECK-LABEL: @test12( +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[X:%.*]], i32 0, i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[X]], i32 0, i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = sub i32 0, [[TMP8]] +; CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP7]], align 4 +; CHECK-NEXT: ret void +; + %tmp4 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 0 + %tmp5 = load i32, i32* %tmp4, align 4 + %tmp7 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 1 + %tmp8 = load i32, i32* %tmp7, align 4 + %tmp17 = sub i32 0, %tmp8 + store i32 %tmp5, i32* %tmp4, align 4 + store i32 %tmp17, i32* %tmp7, align 4 + ret void +} +; Remove redundant store if loaded value is in another block. +define i32 @test26(i1 %c, i32* %p) { +; CHECK-LABEL: @test26( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: ret i32 0 +; +entry: + %v = load i32, i32* %p, align 4 + br i1 %c, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + store i32 %v, i32* %p, align 4 + br label %bb3 +bb3: + ret i32 0 +} + +declare void @unknown_func() +; Remove redundant store, which is in the same loop as the load. +define i32 @test33(i1 %c, i32* %p, i32 %i) { +; CHECK-LABEL: @test33( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: call void @unknown_func() +; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB3:%.*]] +; CHECK: bb3: +; CHECK-NEXT: ret i32 0 +; +entry: + br label %bb1 +bb1: + %v = load i32, i32* %p, align 4 + br label %bb2 +bb2: + store i32 %v, i32* %p, align 4 + ; Might read and overwrite value at %p, but doesn't matter. + call void @unknown_func() + br i1 undef, label %bb1, label %bb3 +bb3: + ret i32 0 +}