diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -522,7 +522,7 @@ // We must ignore debug info directives when counting (otherwise they // would affect codegen). Instruction *Inst = &*--ScanFrom; - if (isa(Inst)) + if (Inst->isDebugOrPseudoInst()) continue; // Restore ScanFrom to expected value in case next test succeeds @@ -610,7 +610,7 @@ SmallVector MustNotAliasInsts; for (Instruction &Inst : make_range(++Load->getReverseIterator(), ScanBB->rend())) { - if (isa(&Inst)) + if (Inst.isDebugOrPseudoInst()) continue; if (MaxInstsToScan-- == 0) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -1393,7 +1393,7 @@ --BBI; // Don't count debug info directives, lest they affect codegen, // and we skip pointer-to-pointer bitcasts, which are NOPs. - if (isa(BBI) || + if (BBI->isDebugOrPseudoInst() || (isa(BBI) && BBI->getType()->isPointerTy())) { ScanInsts++; continue; diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -399,6 +399,13 @@ } } + // Calls that only access inaccessible memory do not block merging + // accessible stores. + if (auto *CB = dyn_cast(BI)) { + if (CB->onlyAccessesInaccessibleMemory()) + continue; + } + if (!isa(BI) && !isa(BI)) { // If the instruction is readnone, ignore it, otherwise bail out. We // don't even allow readonly here because we don't want something like: diff --git a/llvm/lib/Transforms/Scalar/Sink.cpp b/llvm/lib/Transforms/Scalar/Sink.cpp --- a/llvm/lib/Transforms/Scalar/Sink.cpp +++ b/llvm/lib/Transforms/Scalar/Sink.cpp @@ -202,7 +202,7 @@ if (!ProcessedBegin) --I; - if (isa(Inst)) + if (Inst->isDebugOrPseudoInst()) continue; if (SinkInstruction(Inst, Stores, DT, LI, AA)) { diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-instcombine.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-instcombine.ll --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-instcombine.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-instcombine.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=instcombine -S < %s | FileCheck %s +; RUN: opt -passes=instcombine -available-load-scan-limit=2 -S < %s | FileCheck %s %struct.nonbonded = type { [2 x %struct.CompAtom*], [2 x %struct.CompAtomExt*], [2 x %struct.CompAtom*], [2 x %class.Vector*], [2 x %class.Vector*], [2 x i32], %class.Vector, double*, double*, %class.ComputeNonbondedWorkArrays*, %class.Pairlists*, i32, i32, double, double, i32, i32, i32, i32 } %struct.CompAtomExt = type { i32 } @@ -13,11 +13,13 @@ %class.ResizeArrayRaw.3 = type <{ %class.Vector*, i8*, i32, i32, i32, float, i32, [4 x i8] }> %class.Pairlists = type { i16*, i32, i32 } +@Y = constant [2 x { i32, float }] [ { i32, float } { i32 12, float 1.000000e+00 }, { i32, float } { i32 37, float 0x3FF3B2FEC0000000 } ] ; <[2 x { i32, float }]*> [#uses=2] + +define dso_local void @merge(%struct.nonbonded* nocapture readonly %params) local_unnamed_addr align 2 { ;; Check the minPart4 and minPart assignments are merged. +; CHECK-LABEL: @merge( ; CHECK-COUNT-1: getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 16 ; CHECK-NOT: getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 16 - -define dso_local void @_ZN20ComputeNonbondedUtil9calc_pairEP9nonbonded(%struct.nonbonded* nocapture readonly %params) local_unnamed_addr align 2 { entry: %savePairlists3 = getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 11 %0 = load i32, i32* %savePairlists3, align 8 @@ -58,7 +60,36 @@ ret void } -declare dso_local void @_ZN9Pairlists8addIndexEv() align 2 +define i32 @load(i32* nocapture %a, i32* nocapture %b) { +;; Check the last store is deleted. +; CHECK-LABEL: @load( +; CHECK-NEXT: %1 = getelementptr inbounds i32, i32* %a, i64 1 +; CHECK-NEXT: %2 = load i32, i32* %1, align 8 +; CHECK-NEXT: %3 = getelementptr inbounds i32, i32* %b, i64 1 +; CHECK-NEXT: store i32 %2, i32* %3, align 8 +; CHECK-NEXT: call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1) +; CHECK-NEXT: ret i32 %[[#]] + %1 = getelementptr inbounds i32, i32* %a, i32 1 + %2 = load i32, i32* %1, align 8 + %3 = getelementptr inbounds i32, i32* %b, i32 1 + store i32 %2, i32* %3, align 8 + %4 = getelementptr inbounds i32, i32* %b, i32 1 + call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1) + %5 = load i32, i32* %4, align 8 + ret i32 %5 +} + +define void @dse(i32* %p) { +;; Check the first store is deleted. +; CHECK-LABEL: @dse( +; CHECK-NEXT: call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1) +; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4 +; CHECK-NEXT: ret void + store i32 0, i32* %p + call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1) + store i32 0, i32* %p + ret void +} ; Function Attrs: inaccessiblememonly nounwind willreturn declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0 diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-memset.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-memset.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-memset.ll @@ -0,0 +1,25 @@ +; RUN: opt < %s -memcpyopt -S | FileCheck %s + +%struct.MV = type { i16, i16 } + +define void @test(i32* nocapture %c) nounwind optsize { +; All the stores in this example should be merged into a single memset. +; CHECK-NOT: store i32 -1 +; CHECK: call void @llvm.memset.p0i8.i64 + store i32 -1, i32* %c, align 4 + %1 = getelementptr inbounds i32, i32* %c, i32 1 + store i32 -1, i32* %1, align 4 + %2 = getelementptr inbounds i32, i32* %c, i32 2 + store i32 -1, i32* %2, align 4 + call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1) + %3 = getelementptr inbounds i32, i32* %c, i32 3 + store i32 -1, i32* %3, align 4 + %4 = getelementptr inbounds i32, i32* %c, i32 4 + store i32 -1, i32* %4, align 4 + ret void +} + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0 + +attributes #0 = { inaccessiblememonly nounwind willreturn }