diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1922,22 +1922,38 @@ } return false; } -}; -/// \returns true if \p KillingDef stores the result of \p Load to the source of -/// \p Load. -static bool storeIsNoop(MemorySSA &MSSA, LoadInst *Load, - MemoryDef *KillingDef) { - Instruction *Store = KillingDef->getMemoryInst(); - // If the load's operand isn't the destination of the store, bail. - if (Load->getPointerOperand() != Store->getOperand(1)) - return false; + /// \returns true if \p Def is a no-op store, either because it + /// directly stores back a loaded value or stores zero to a calloced object. + bool storeIsNoop(MemoryDef *Def, MemoryLocation DefLoc, const Value *DefUO) { + StoreInst *Store = dyn_cast(Def->getMemoryInst()); + if (!Store) + return false; - // Get the defining access for the load. - auto *LoadAccess = MSSA.getMemoryAccess(Load)->getDefiningAccess(); - // The store is dead if the defining accesses are the same. - return LoadAccess == KillingDef->getDefiningAccess(); -} + if (auto *LoadI = dyn_cast(Store->getOperand(0))) { + if (LoadI->getPointerOperand() == Store->getOperand(1)) { + auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess(); + // If both accesses share the same defining access, no instructions + // between them can modify the memory location. + return LoadAccess == Def->getDefiningAccess(); + } + } + + Constant *StoredConstant = dyn_cast(Store->getOperand(0)); + if (StoredConstant && StoredConstant->isNullValue()) { + auto *DefUOInst = dyn_cast(DefUO); + if (DefUOInst && isCallocLikeFn(DefUOInst, &TLI)) { + auto *UnderlyingDef = cast(MSSA.getMemoryAccess(DefUOInst)); + // If UnderlyingDef is the clobbering access of Def, no instructions + // between them can modify the memory location. + auto *ClobberDef = + MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def); + return UnderlyingDef == ClobberDef; + } + } + return false; + } +}; bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, @@ -1954,18 +1970,6 @@ continue; Instruction *SI = KillingDef->getMemoryInst(); - // Check if we're storing a value that we just loaded. - if (auto *Load = dyn_cast(SI->getOperand(0))) { - if (storeIsNoop(MSSA, Load, KillingDef)) { - LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *SI - << '\n'); - State.deleteDeadInstruction(SI); - NumNoopStores++; - MadeChange = true; - continue; - } - } - auto MaybeSILoc = State.getLocForWriteEx(SI); if (!MaybeSILoc) { LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for " @@ -1975,6 +1979,16 @@ MemoryLocation SILoc = *MaybeSILoc; assert(SILoc.Ptr && "SILoc should not be null"); const Value *SILocUnd = GetUnderlyingObject(SILoc.Ptr, DL); + + // Check if the store is a no-op. + if (State.storeIsNoop(KillingDef, SILoc, SILocUnd)) { + LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *SI << '\n'); + State.deleteDeadInstruction(SI); + NumNoopStores++; + MadeChange = true; + continue; + } + Instruction *DefObj = const_cast(dyn_cast(SILocUnd)); bool DefVisibleToCallerBeforeRet = diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/calloc-store.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/calloc-store.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/calloc-store.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/calloc-store.ll @@ -1,5 +1,3 @@ -; XFAIL: * - ; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s declare noalias i8* @calloc(i64, i64) @@ -65,3 +63,98 @@ ; CHECK: store i8 %arg, i8* %1, align 4 ret i8* %1 } + +define i8* @test8() { +; CHECK-LABEL: test8 +; CHECK-NOT: store + %p = tail call noalias i8* @calloc(i64 1, i64 4) + store i8 0, i8* %p, align 1 + %p.1 = getelementptr i8, i8* %p, i32 1 + store i8 0, i8* %p.1, align 1 + %p.3 = getelementptr i8, i8* %p, i32 3 + store i8 0, i8* %p.3, align 1 + %p.2 = getelementptr i8, i8* %p, i32 2 + store i8 0, i8* %p.2, align 1 + ret i8* %p +} + +define i8* @test9() { +; CHECK-LABEL: test9 +; CHECK-NEXT: %p = tail call noalias i8* @calloc(i64 1, i64 4) +; CHECK-NEXT: store i8 5, i8* %p, align 1 +; CHECK-NEXT: ret i8* %p + + %p = tail call noalias i8* @calloc(i64 1, i64 4) + store i8 5, i8* %p, align 1 + %p.1 = getelementptr i8, i8* %p, i32 1 + store i8 0, i8* %p.1, align 1 + %p.3 = getelementptr i8, i8* %p, i32 3 + store i8 0, i8* %p.3, align 1 + %p.2 = getelementptr i8, i8* %p, i32 2 + store i8 0, i8* %p.2, align 1 + ret i8* %p +} + +define i8* @test10() { +; CHECK-LABEL: @test10( +; CHECK-NEXT: [[P:%.*]] = tail call noalias i8* @calloc(i64 1, i64 4) +; CHECK-NEXT: [[P_3:%.*]] = getelementptr i8, i8* [[P]], i32 3 +; CHECK-NEXT: store i8 5, i8* [[P_3]], align 1 +; CHECK-NEXT: ret i8* [[P]] +; + + %p = tail call noalias i8* @calloc(i64 1, i64 4) + store i8 0, i8* %p, align 1 + %p.1 = getelementptr i8, i8* %p, i32 1 + store i8 0, i8* %p.1, align 1 + %p.3 = getelementptr i8, i8* %p, i32 3 + store i8 5, i8* %p.3, align 1 + %p.2 = getelementptr i8, i8* %p, i32 2 + store i8 0, i8* %p.2, align 1 + ret i8* %p +} + +; TODO: we could also eliminate the last store i8 0, i8* %p.3.2, but currently +; don't because those are eliminated before eliminating killed stores. +define i8* @test11() { +; CHECK-LABEL: @test11( +; CHECK-NEXT: [[P:%.*]] = tail call noalias i8* @calloc(i64 1, i64 4) +; CHECK-NEXT: [[P_3_2:%.*]] = getelementptr i8, i8* [[P]], i32 3 +; CHECK-NEXT: store i8 0, i8* [[P_3_2]], align 1 +; CHECK-NEXT: ret i8* [[P]] +; + + %p = tail call noalias i8* @calloc(i64 1, i64 4) + store i8 0, i8* %p, align 1 + %p.1 = getelementptr i8, i8* %p, i32 1 + store i8 0, i8* %p.1, align 1 + %p.3 = getelementptr i8, i8* %p, i32 3 + store i8 5, i8* %p.3, align 1 + %p.2 = getelementptr i8, i8* %p, i32 2 + store i8 0, i8* %p.2, align 1 + %p.3.2 = getelementptr i8, i8* %p, i32 3 + store i8 0, i8* %p.3.2, align 1 + ret i8* %p +} + +define i8* @test12() { +; CHECK-LABEL: @test12( +; CHECK-NEXT: [[P:%.*]] = tail call noalias i8* @calloc(i64 1, i64 4) +; CHECK-NEXT: [[P_3:%.*]] = getelementptr i8, i8* [[P]], i32 3 +; CHECK-NEXT: store i8 5, i8* [[P_3]], align 1 +; CHECK-NEXT: call void @use(i8* [[P]]) +; CHECK-NEXT: [[P_3_2:%.*]] = getelementptr i8, i8* [[P]], i32 3 +; CHECK-NEXT: store i8 0, i8* [[P_3_2]], align 1 +; CHECK-NEXT: ret i8* [[P]] +; + + %p = tail call noalias i8* @calloc(i64 1, i64 4) + %p.3 = getelementptr i8, i8* %p, i32 3 + store i8 5, i8* %p.3, align 1 + call void @use(i8* %p) + %p.3.2 = getelementptr i8, i8* %p, i32 3 + store i8 0, i8* %p.3.2, align 1 + ret i8* %p +} + +declare void @use(i8*) readonly diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll @@ -55,7 +55,6 @@ declare noalias i8* @malloc(i32) -declare noalias i8* @calloc(i32, i32) define void @test14(i32* %Q) { ; CHECK-LABEL: @test14( @@ -77,15 +76,6 @@ ret void } -define void @test21() { -; CHECK-LABEL: @test21( -; CHECK-NEXT: ret void -; - %m = call i8* @calloc(i32 9, i32 7) - store i8 0, i8* %m - ret void -} - define void @test22(i1 %i, i32 %k, i32 %m) nounwind { ; CHECK-LABEL: @test22( ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll @@ -213,6 +213,15 @@ } +define void @test21() { +; CHECK-LABEL: @test21( +; CHECK-NEXT: ret void +; + %m = call i8* @calloc(i32 9, i32 7) + store i8 0, i8* %m + ret void +} + ; PR13547 declare noalias i8* @strdup(i8* nocapture) nounwind define noalias i8* @test23() nounwind uwtable ssp {