diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1467,6 +1467,8 @@ DenseMap IOLs; + Function *ExitUseFn; + DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, const TargetLibraryInfo &TLI) : F(F), AA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI) {} @@ -1475,6 +1477,13 @@ DominatorTree &DT, PostDominatorTree &PDT, const TargetLibraryInfo &TLI) { DSEState State(F, AA, MSSA, DT, PDT, TLI); + Module *M = F.getParent(); + LLVMContext &Ctx = M->getContext(); + FunctionType *FnTy = FunctionType::get(Type::getVoidTy(Ctx), {}, false); + State.ExitUseFn = + cast(M->getOrInsertFunction("____foobar", FnTy).getCallee()); + State.ExitUseFn->addFnAttr(Attribute::ReadOnly); + // Collect blocks with throwing instructions not modeled in MemorySSA and // alloc-like objects. unsigned PO = 0; @@ -1497,6 +1506,15 @@ (isAllocLikeFn(&I, &TLI) && !PointerMayBeCaptured(&I, false, true))) State.InvisibleToCaller.insert(&I); } + if (isa(BB->getTerminator()) && DT.isReachableFromEntry(BB)) { + MemorySSAUpdater MSSAU(&MSSA); + CallInst *CI = + CallInst::Create(State.ExitUseFn->getFunctionType(), + State.ExitUseFn, {}, BB->getTerminator()); + MemoryAccess *NewMemAcc = MSSAU.createMemoryAccessInBB( + CI, nullptr, CI->getParent(), MemorySSA::End); + MSSAU.insertUse(cast(NewMemAcc), false); + } } // Treat byval or inalloca arguments the same as Allocas, stores to them are @@ -1533,7 +1551,7 @@ } /// Returns true if \p Use completely overwrites \p DefLoc. - bool isCompleteOverwrite(MemoryLocation DefLoc, Instruction *UseInst) const { + bool isMustWriteClobber(MemoryLocation DefLoc, Instruction *UseInst) const { // UseInst has a MemoryDef associated in MemorySSA. It's possible for a // MemoryDef to not write to memory, e.g. a volatile load is modeled as a // MemoryDef. @@ -1545,10 +1563,6 @@ return false; ModRefInfo MR = AA.getModRefInfo(UseInst, DefLoc); - // If necessary, perform additional analysis. - if (isModSet(MR)) - MR = AA.callCapturesBefore(UseInst, DefLoc, &DT); - Optional UseLoc = getLocForWriteEx(UseInst); return isModSet(MR) && isMustSet(MR) && UseLoc->Size.getValue() >= DefLoc.Size.getValue(); @@ -1559,10 +1573,19 @@ if (!UseInst->mayReadFromMemory()) return false; - if (auto CS = CallSite(UseInst)) + if (auto CS = CallSite(UseInst)) { if (CS.onlyAccessesInaccessibleMemory()) return false; + if (CS.getCalledFunction() == ExitUseFn) { + DataLayout DL = F.getParent()->getDataLayout(); + const Value *UO = GetUnderlyingObject(DefLoc.Ptr, DL); + /// Maybe pre-compute + return !UO || InvisibleToCaller.find(UO) == InvisibleToCaller.end() || + (!isa(UO) && PointerMayBeCaptured(UO, true, false)); + } + } + ModRefInfo MR = AA.getModRefInfo(UseInst, DefLoc); // If necessary, perform additional analysis. if (isRefSet(MR)) @@ -1606,12 +1629,9 @@ if (MSSA.isLiveOnEntryDef(DomAccess)) return None; - // Check if we can skip DomDef for DSE. We also require the KillingDef - // execute whenever DomDef executes and use post-dominance to ensure that. - + // Check if we can skip DomDef for DSE. MemoryDef *DomDef = dyn_cast(DomAccess); - if ((DomDef && canSkipDef(DomDef, DefVisibleToCaller)) || - !PDT.dominates(KillingDef->getBlock(), DomDef->getBlock())) { + if ((DomDef && canSkipDef(DomDef, DefVisibleToCaller))) { StepAgain = true; Current = DomDef->getDefiningAccess(); } @@ -1675,8 +1695,19 @@ // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias, // stores [0,1] if (MemoryDef *UseDef = dyn_cast(UseAccess)) { - if (!isCompleteOverwrite(DefLoc, UseInst)) + int64_t InstWriteOffset, DepWriteOffset; + auto CC = getLocForWriteEx(UseInst); + InstOverlapIntervalsTy IOL; + + const DataLayout &DL = F.getParent()->getDataLayout(); + + if (!isMustWriteClobber(DefLoc, UseInst) || + (CC && + isOverwrite(DefLoc, *CC, DL, TLI, DepWriteOffset, InstWriteOffset, + UseInst, IOL, AA, &F) != OW_Complete)) { + LLVM_DEBUG(dbgs() << " ... found non-aliasing MemoryDef\n"); PushMemUses(UseDef); + } } } @@ -1877,6 +1908,17 @@ for (auto &KV : State.IOLs) MadeChange |= removePartiallyOverlappedStores(&AA, DL, KV.second); + MemorySSAUpdater MSSAU(&MSSA); + for (auto &BB : F) { + if (!DT.isReachableFromEntry(&BB)) + continue; + if (isa(BB.getTerminator())) { + Instruction *C = &*std::prev(BB.getTerminator()->getIterator()); + MSSAU.removeMemoryAccess(C); + C->eraseFromParent(); + } + } + return MadeChange; } } // end anonymous namespace diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll @@ -21,10 +21,9 @@ ; } -define dso_local i32 @_Z1av() !dbg !7 { +define dso_local i32 @_Z1av([5 x i64]* %b) !dbg !7 { entry: %retval = alloca i32, align 4 - %b = alloca [5 x i64], align 16 call void @llvm.dbg.declare(metadata [5 x i64]* %b, metadata !11, metadata !DIExpression()), !dbg !16 %0 = bitcast [5 x i64]* %b to i8*, !dbg !16 call void @llvm.memset.p0i8.i64(i8* align 16 %0, i8 0, i64 40, i1 false), !dbg !16 @@ -37,7 +36,7 @@ store i64 2, i64* %4, align 16, !dbg !16 %5 = getelementptr inbounds [5 x i64], [5 x i64]* %1, i32 0, i32 3, !dbg !16 store i64 2, i64* %5, align 8, !dbg !16 - %call = call i32 @_Z1av(), !dbg !17 + %call = call i32 @_Z1av([5 x i64]* %b), !dbg !17 %tobool = icmp ne i32 %call, 0, !dbg !17 br i1 %tobool, label %if.then, label %if.end, !dbg !19 diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll @@ -1,8 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; XFAIL: * -; TODO: Handling of free not implemented yet. - ; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + +declare void @use(i32 *) + +define void @test4(i32* noalias %P, i1 %c1) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: br i1 [[C1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: br label [[BB5:%.*]] +; CHECK: bb2: +; CHECK-NEXT: store i32 3, i32* [[P]] +; CHECK-NEXT: br label [[BB5]] +; CHECK: bb5: +; CHECK-NEXT: call void @use(i32* [[P]]) +; CHECK-NEXT: ret void +; + store i32 1, i32* %P + br i1 %c1, label %bb1, label %bb2 + +bb1: + store i32 0, i32* %P + br label %bb5 +bb2: + store i32 3, i32* %P + br label %bb5 + +bb5: + call void @use(i32* %P) + ret void +} + +define void @test5(i32* noalias %P) { +; CHECK-LABEL: @test5( +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: br label [[BB5:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br i1 undef, label [[BB3:%.*]], label [[BB4:%.*]] +; CHECK: bb3: +; CHECK-NEXT: store i32 3, i32* [[P]] +; CHECK-NEXT: br label [[BB5]] +; CHECK: bb4: +; CHECK-NEXT: store i32 5, i32* [[P]] +; CHECK-NEXT: br label [[BB5]] +; CHECK: bb5: +; CHECK-NEXT: call void @use(i32* [[P]]) +; CHECK-NEXT: ret void +; + store i32 1, i32* %P + br i1 true, label %bb1, label %bb2 +bb1: + store i32 0, i32* %P + br label %bb5 + +bb2: + br i1 undef, label %bb3, label %bb4 + +bb3: + store i32 3, i32* %P + br label %bb5 + +bb4: + store i32 5, i32* %P + br label %bb5 + +bb5: + call void @use(i32* %P) + ret void +} diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-overlap.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-overlap.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-overlap.ll @@ -0,0 +1,114 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -dse -enable-dse-memoryssa %s -S | FileCheck %s + + +%struct.ham = type { [3 x double], [3 x double]} + +declare void @may_throw() +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) + +define void @overlap1(%struct.ham* %arg, i1 %cond) { +; CHECK-LABEL: @overlap1( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds [[STRUCT_HAM:%.*]], %struct.ham* [[ARG:%.*]], i64 0, i32 0, i64 2 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_HAM]], %struct.ham* [[ARG]], i64 0, i32 0, i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_HAM]], %struct.ham* [[ARG]], i64 0, i32 0, i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_HAM]], %struct.ham* [[ARG]], i64 0, i32 1, i64 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_HAM]], %struct.ham* [[ARG]], i64 0, i32 1, i64 1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_HAM]], %struct.ham* [[ARG]], i64 0, i32 1, i32 0 +; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB7:%.*]], label [[BB8:%.*]] +; CHECK: bb7: +; CHECK-NEXT: br label [[BB9:%.*]] +; CHECK: bb8: +; CHECK-NEXT: br label [[BB9]] +; CHECK: bb9: +; CHECK-NEXT: store double 1.000000e+00, double* [[TMP2]], align 8 +; CHECK-NEXT: store double 2.000000e+00, double* [[TMP1]], align 8 +; CHECK-NEXT: store double 3.000000e+00, double* [[TMP]], align 8 +; CHECK-NEXT: store double 4.000000e+00, double* [[TMP5]], align 8 +; CHECK-NEXT: store double 5.000000e+00, double* [[TMP4]], align 8 +; CHECK-NEXT: store double 6.000000e+00, double* [[TMP3]], align 8 +; CHECK-NEXT: ret void +; +bb: + %tmp = getelementptr inbounds %struct.ham, %struct.ham* %arg, i64 0, i32 0, i64 2 + %tmp1 = getelementptr inbounds %struct.ham, %struct.ham* %arg, i64 0, i32 0, i64 1 + %tmp2 = getelementptr inbounds %struct.ham, %struct.ham* %arg, i64 0, i32 0, i64 0 + %tmp3 = getelementptr inbounds %struct.ham, %struct.ham* %arg, i64 0,i32 1, i64 2 + %tmp4 = getelementptr inbounds %struct.ham, %struct.ham* %arg, i64 0, i32 1, i64 1 + %tmp5 = getelementptr inbounds %struct.ham, %struct.ham* %arg, i64 0, i32 1, i32 0 + %tmp6 = bitcast double* %tmp2 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 8 dereferenceable(48) %tmp6, i8 0, i64 48, i1 false) + br i1 %cond, label %bb7, label %bb8 + +bb7: ; preds = %bb + br label %bb9 + +bb8: ; preds = %bb + br label %bb9 + +bb9: ; preds = %bb8, %bb7 + store double 1.0, double* %tmp2, align 8 + store double 2.0, double* %tmp1, align 8 + store double 3.0, double* %tmp, align 8 + store double 4.0, double* %tmp5, align 8 + store double 5.0, double* %tmp4, align 8 + store double 6.0, double* %tmp3, align 8 + ret void +} + +define void @overlap2(%struct.ham* %arg, i1 %cond) { +; CHECK-LABEL: @overlap2( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds [[STRUCT_HAM:%.*]], %struct.ham* [[ARG:%.*]], i64 0, i32 0, i64 2 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_HAM]], %struct.ham* [[ARG]], i64 0, i32 0, i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_HAM]], %struct.ham* [[ARG]], i64 0, i32 0, i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_HAM]], %struct.ham* [[ARG]], i64 0, i32 1, i64 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_HAM]], %struct.ham* [[ARG]], i64 0, i32 1, i64 1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_HAM]], %struct.ham* [[ARG]], i64 0, i32 1, i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[TMP2]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 8 dereferenceable(48) [[TMP6]], i8 0, i64 48, i1 false) +; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB7:%.*]], label [[BB8:%.*]] +; CHECK: bb7: +; CHECK-NEXT: call void @may_throw() +; CHECK-NEXT: br label [[BB9:%.*]] +; CHECK: bb8: +; CHECK-NEXT: br label [[BB9]] +; CHECK: bb9: +; CHECK-NEXT: store double 1.000000e+00, double* [[TMP2]], align 8 +; CHECK-NEXT: store double 2.000000e+00, double* [[TMP1]], align 8 +; CHECK-NEXT: store double 3.000000e+00, double* [[TMP]], align 8 +; CHECK-NEXT: store double 4.000000e+00, double* [[TMP5]], align 8 +; CHECK-NEXT: store double 5.000000e+00, double* [[TMP4]], align 8 +; CHECK-NEXT: store double 6.000000e+00, double* [[TMP3]], align 8 +; CHECK-NEXT: ret void +; +bb: + %tmp = getelementptr inbounds %struct.ham, %struct.ham* %arg, i64 0, i32 0, i64 2 + %tmp1 = getelementptr inbounds %struct.ham, %struct.ham* %arg, i64 0, i32 0, i64 1 + %tmp2 = getelementptr inbounds %struct.ham, %struct.ham* %arg, i64 0, i32 0, i64 0 + %tmp3 = getelementptr inbounds %struct.ham, %struct.ham* %arg, i64 0,i32 1, i64 2 + %tmp4 = getelementptr inbounds %struct.ham, %struct.ham* %arg, i64 0, i32 1, i64 1 + %tmp5 = getelementptr inbounds %struct.ham, %struct.ham* %arg, i64 0, i32 1, i32 0 + %tmp6 = bitcast double* %tmp2 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 8 dereferenceable(48) %tmp6, i8 0, i64 48, i1 false) + br i1 %cond, label %bb7, label %bb8 + +bb7: ; preds = %bb + call void @may_throw() + br label %bb9 + +bb8: ; preds = %bb + br label %bb9 + +bb9: ; preds = %bb8, %bb7 + store double 1.0, double* %tmp2, align 8 + store double 2.0, double* %tmp1, align 8 + store double 3.0, double* %tmp, align 8 + store double 4.0, double* %tmp5, align 8 + store double 5.0, double* %tmp4, align 8 + store double 6.0, double* %tmp3, align 8 + ret void +} + + diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll @@ -175,7 +175,6 @@ define void @test11() { ; CHECK-LABEL: @test11( ; CHECK-NEXT: [[P:%.*]] = alloca i32 -; CHECK-NEXT: store i32 0, i32* [[P]] ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: store i32 0, i32* [[P]] @@ -200,10 +199,9 @@ define void @test12(i32* %P) { ; CHECK-LABEL: @test12( -; CHECK-NEXT: store i32 0, i32* [[P:%.*]] ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: store i32 1, i32* [[P]] +; CHECK-NEXT: store i32 1, i32* [[P:%.*]] ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb2: ; CHECK-NEXT: store i32 1, i32* [[P]] @@ -226,10 +224,9 @@ define void @test13(i32* %P) { ; CHECK-LABEL: @test13( -; CHECK-NEXT: store i32 0, i32* [[P:%.*]] ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: store i32 1, i32* [[P]] +; CHECK-NEXT: store i32 1, i32* [[P:%.*]] ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb2: ; CHECK-NEXT: store i32 1, i32* [[P]] diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll @@ -114,7 +114,7 @@ ; CHECK-NEXT: [[P_4:%.*]] = getelementptr i8, i8* [[P:%.*]], i64 4 ; CHECK-NEXT: [[TMP:%.*]] = load i8, i8* [[P_4]], align 1 ; CHECK-NEXT: store i8 0, i8* [[P_4]], align 1 -; CHECK-NEXT: [[Q:%.*]] = call i8* @strdup(i8* [[P]]) #6 +; CHECK-NEXT: [[Q:%.*]] = call i8* @strdup(i8* [[P]]) #7 ; CHECK-NEXT: store i8 [[TMP]], i8* [[P_4]], align 1 ; CHECK-NEXT: ret i8* [[Q]] ;