Index: lib/Transforms/Scalar/NewGVN.cpp =================================================================== --- lib/Transforms/Scalar/NewGVN.cpp +++ lib/Transforms/Scalar/NewGVN.cpp @@ -87,6 +87,7 @@ "Number of avoided sorted leader changes"); STATISTIC(NumGVNNotMostDominatingLeader, "Number of times a member dominated it's new classes' leader"); +STATISTIC(NumGVNDeadStores, "Number of dead stores eliminated"); //===----------------------------------------------------------------------===// // GVN Pass @@ -347,8 +348,10 @@ // Elimination. struct ValueDFS; - void convertDenseToDFSOrdered(CongruenceClass::MemberSet &, + void convertDenseToDFSOrdered(const CongruenceClass::MemberSet &, SmallVectorImpl &); + void convertDenseToLoadsAndStores(const CongruenceClass::MemberSet &, + SmallVectorImpl &); bool eliminateInstructions(Function &); void replaceInstruction(Instruction *, Value *); @@ -757,13 +760,17 @@ // are simple and avoid value numbering them. auto *SI = cast(I); MemoryAccess *StoreAccess = MSSA->getMemoryAccess(SI); - // See if we are defined by a previous store expression, it already has a - // value, and it's the same value as our current store. FIXME: Right now, we - // only do this for simple stores, we should expand to cover memcpys, etc. + // Get the expression, if any, for the RHS of the MemoryDef. + MemoryAccess *StoreRHS = lookupMemoryAccessEquiv( + cast(StoreAccess)->getDefiningAccess()); + // If we are defined by ourselves, use the live on entry def. + if (StoreRHS == StoreAccess) + StoreRHS = MSSA->getLiveOnEntryDef(); + if (SI->isSimple()) { - // Get the expression, if any, for the RHS of the MemoryDef. - MemoryAccess *StoreRHS = lookupMemoryAccessEquiv( - cast(StoreAccess)->getDefiningAccess()); + // See if we are defined by a previous store expression, it already has a + // value, and it's the same value as our current store. FIXME: Right now, we + // only do this for simple stores, we should expand to cover memcpys, etc. const Expression *OldStore = createStoreExpression(SI, StoreRHS, B); CongruenceClass *CC = ExpressionToClass.lookup(OldStore); // Basically, check if the congruence class the store is in is defined by a @@ -774,8 +781,18 @@ if (CC && CC->DefiningExpr && isa(CC->DefiningExpr) && CC->RepStoredValue == lookupOperandLeader(SI->getValueOperand(), SI, B)) return createStoreExpression(SI, StoreRHS, B); + // Also check if our value operand is defined by a load of the same memory + // location, and the memory state is the same as it was then + // (otherwise, it could have been overwritten later. See test32 in + // transforms/DeadStoreElimination/simple.ll) + if (LoadInst *LI = dyn_cast(SI->getValueOperand())) { + if ((lookupOperandLeader(LI->getPointerOperand(), LI, LI->getParent()) == + lookupOperandLeader(SI->getPointerOperand(), SI, B)) && + (lookupMemoryAccessEquiv( + MSSA->getMemoryAccess(LI)->getDefiningAccess()) == StoreRHS)) + return createVariableExpression(LI); + } } - return createStoreExpression(SI, StoreAccess, B); } @@ -1471,14 +1488,19 @@ void NewGVN::valueNumberMemoryPhi(MemoryPhi *MP) { // If all the arguments are the same, the MemoryPhi has the same value as the // argument. - // Filter out unreachable blocks from our operands. + // Filter out unreachable blocks and self phis from our operands. auto Filtered = make_filter_range(MP->operands(), [&](const Use &U) { - return ReachableBlocks.count(MP->getIncomingBlock(U)); + return lookupMemoryAccessEquiv(cast(U)) != MP && + ReachableBlocks.count(MP->getIncomingBlock(U)); }); - - assert(Filtered.begin() != Filtered.end() && - "We should not be processing a MemoryPhi in a completely " - "unreachable block"); + // If all that is left is nothing, our memoryphi is undef. + // Note: The only case this should happen is if we have at least one + // self-argument. + if (Filtered.begin() == Filtered.end()) { + if (setMemoryAccessEquivTo(MP, MSSA->getLiveOnEntryDef())) + markMemoryUsersTouched(MP); + return; + } // Transform the remaining operands into operand leaders. // FIXME: mapped_iterator should have a range version. @@ -1893,7 +1915,7 @@ }; void NewGVN::convertDenseToDFSOrdered( - CongruenceClass::MemberSet &Dense, + const CongruenceClass::MemberSet &Dense, SmallVectorImpl &DFSOrderedSet) { for (auto D : Dense) { // First add the value. @@ -1902,7 +1924,6 @@ // we should only be left with instructions as members. assert(BB && "Should have figured out a basic block for value"); ValueDFS VD; - DomTreeNode *DomNode = DT->getNode(BB); VD.DFSIn = DomNode->getDFSNumIn(); VD.DFSOut = DomNode->getDFSNumOut(); @@ -1948,6 +1969,30 @@ } } +void NewGVN::convertDenseToLoadsAndStores( + const CongruenceClass::MemberSet &Dense, + SmallVectorImpl &LoadsAndStores) { + for (auto D : Dense) { + if (!isa(D) && !isa(D)) + continue; + + BasicBlock *BB = getBlockForValue(D); + ValueDFS VD; + DomTreeNode *DomNode = DT->getNode(BB); + VD.DFSIn = DomNode->getDFSNumIn(); + VD.DFSOut = DomNode->getDFSNumOut(); + VD.Val = D; + + // If it's an instruction, use the real local dfs number. + if (auto *I = dyn_cast(D)) + VD.LocalNum = InstrDFS.lookup(I); + else + llvm_unreachable("Should have been an instruction"); + + LoadsAndStores.emplace_back(VD); + } +} + static void patchReplacementInstruction(Instruction *I, Value *Repl) { // Patch the replacement so that it is not more restrictive than the value // being replaced. @@ -2106,7 +2151,11 @@ } } - for (CongruenceClass *CC : CongruenceClasses) { + for (auto *CC : CongruenceClasses) { + // Track the equivalent store info so we can decide whether to try + // dead store elimination. + SmallVector PossibleDeadStores; + // FIXME: We should eventually be able to replace everything still // in the initial class with undef, as they should be unreachable. // Right now, initial still contains some things we skip value @@ -2123,7 +2172,6 @@ if (alwaysAvailable(Leader)) { SmallPtrSet MembersLeft; for (auto M : CC->Members) { - Value *Member = M; // Void things have no uses we can replace. @@ -2148,7 +2196,6 @@ } } CC->Members.swap(MembersLeft); - } else { DEBUG(dbgs() << "Eliminating in congruence class " << CC->ID << "\n"); // If this is a singleton, we can skip it. @@ -2166,19 +2213,15 @@ // Sort the whole thing. std::sort(DFSOrderedSet.begin(), DFSOrderedSet.end()); - for (auto &VD : DFSOrderedSet) { int MemberDFSIn = VD.DFSIn; int MemberDFSOut = VD.DFSOut; Value *Member = VD.Val; Use *MemberUse = VD.U; - if (Member) { - // We ignore void things because we can't get a value from them. - // FIXME: We could actually use this to kill dead stores that are - // dominated by equivalent earlier stores. - if (Member->getType()->isVoidTy()) - continue; + // We ignore void things because we can't get a value from them. + if (Member && Member->getType()->isVoidTy()) { + continue; } if (EliminationStack.empty()) { @@ -2223,6 +2266,11 @@ if (EliminationStack.empty()) continue; + // Push any loads onto the dead store stack, so we can possibly + // eliminate them later. + if (Member && isa(Member)) + PossibleDeadStores.push_back(VD); + // Skip the Value's, we only want to eliminate on their uses. if (Member) continue; @@ -2266,6 +2314,39 @@ MembersLeft.insert(Member); } CC->Members.swap(MembersLeft); + + // If we have possible dead stores to look at, try to eliminate them. + if (CC->StoreCount > 0) { + convertDenseToLoadsAndStores(CC->Members, PossibleDeadStores); + std::sort(PossibleDeadStores.begin(), PossibleDeadStores.end()); + ValueDFSStack EliminationStack; + for (auto &VD : PossibleDeadStores) { + int MemberDFSIn = VD.DFSIn; + int MemberDFSOut = VD.DFSOut; + Instruction *Member = cast(VD.Val); + if (EliminationStack.empty() || + !EliminationStack.isInScope(MemberDFSIn, MemberDFSOut)) { + // Sync to our current scope. + EliminationStack.popUntilDFSScope(MemberDFSIn, MemberDFSOut); + if (EliminationStack.empty()) { + EliminationStack.push_back(Member, MemberDFSIn, MemberDFSOut); + continue; + } + } + // We already did load elimination, so nothing to do here. + if (isa(Member)) + continue; + assert(!EliminationStack.empty()); + Instruction *Leader = cast(EliminationStack.back()); + assert(DT->dominates(Leader->getParent(), Member->getParent())); + // Member is dominater by Leader, and thus dead + DEBUG(dbgs() << "Marking dead store " << *Member + << " that is dominated by " << *Leader << "\n"); + markInstructionForDeletion(Member); + CC->Members.erase(Member); + ++NumGVNDeadStores; + } + } } return AnythingReplaced; Index: test/Transforms/NewGVN/deadstore.ll =================================================================== --- /dev/null +++ test/Transforms/NewGVN/deadstore.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s + +;; Most of these are borrowed from transforms/DSE/simple.ll +;; NewGVN should be able to eliminate any stores of the same value that are actually redundnat. + +;; tmp5 is store of the same value to the same location as the load. +define void @test12({ i32, i32 }* %x) nounwind { +; CHECK-LABEL: @test12( +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[X:%.*]], i32 0, i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[X]], i32 0, i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = sub i32 0, [[TMP8]] +; CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP7]], align 4 +; CHECK-NEXT: ret void +; + %tmp4 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 0 + %tmp5 = load i32, i32* %tmp4, align 4 + %tmp7 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 1 + %tmp8 = load i32, i32* %tmp7, align 4 + %tmp17 = sub i32 0, %tmp8 + store i32 %tmp5, i32* %tmp4, align 4 + store i32 %tmp17, i32* %tmp7, align 4 + ret void +} +; Remove redundant store if loaded value is in another block. +define i32 @test26(i1 %c, i32* %p) { +; CHECK-LABEL: @test26( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: ret i32 0 +; +entry: + %v = load i32, i32* %p, align 4 + br i1 %c, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + store i32 %v, i32* %p, align 4 + br label %bb3 +bb3: + ret i32 0 +} + +declare void @unknown_func() +; Remove redundant store, which is in the same loop as the load. +define i32 @test33(i1 %c, i32* %p, i32 %i) { +; CHECK-LABEL: @test33( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: call void @unknown_func() +; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB3:%.*]] +; CHECK: bb3: +; CHECK-NEXT: ret i32 0 +; +entry: + br label %bb1 +bb1: + %v = load i32, i32* %p, align 4 + br label %bb2 +bb2: + store i32 %v, i32* %p, align 4 + ; Might read and overwrite value at %p, but doesn't matter. + call void @unknown_func() + br i1 undef, label %bb1, label %bb3 +bb3: + ret i32 0 +}