diff --git a/llvm/include/llvm/IR/BasicBlock.h b/llvm/include/llvm/IR/BasicBlock.h --- a/llvm/include/llvm/IR/BasicBlock.h +++ b/llvm/include/llvm/IR/BasicBlock.h @@ -167,8 +167,8 @@ /// Returns a pointer to the first instruction in this block that is not a /// PHINode or a debug intrinsic, or any pseudo operation if \c SkipPseudoOp /// is true. - const Instruction *getFirstNonPHIOrDbg(bool SkipPseudoOp = false) const; - Instruction *getFirstNonPHIOrDbg(bool SkipPseudoOp = false) { + const Instruction *getFirstNonPHIOrDbg(bool SkipPseudoOp = true) const; + Instruction *getFirstNonPHIOrDbg(bool SkipPseudoOp = true) { return const_cast( static_cast(this)->getFirstNonPHIOrDbg( SkipPseudoOp)); @@ -178,8 +178,8 @@ /// PHINode, a debug intrinsic, or a lifetime intrinsic, or any pseudo /// operation if \c SkipPseudoOp is true. const Instruction * - getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp = false) const; - Instruction *getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp = false) { + getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp = true) const; + Instruction *getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp = true) { return const_cast( static_cast(this)->getFirstNonPHIOrDbgOrLifetime( SkipPseudoOp)); @@ -200,14 +200,14 @@ /// SkipPseudoOp is true. iterator_range>> - instructionsWithoutDebug(bool SkipPseudoOp = false) const; + instructionsWithoutDebug(bool SkipPseudoOp = true) const; /// Return an iterator range over the instructions in the block, skipping any /// debug instructions. Skip and any pseudo operations as well if \c /// SkipPseudoOp is true. iterator_range< filter_iterator>> - instructionsWithoutDebug(bool SkipPseudoOp = false); + instructionsWithoutDebug(bool SkipPseudoOp = true); /// Return the size of the basic block ignoring debug instructions filter_iterator(I)) - continue; - - // Skip pseudo-probes. - if (isa(I)) + // Similarly, skip pseudo-probes. + if (I.isDebugOrPseudoInst()) continue; // Skip ephemeral values. diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp --- a/llvm/lib/Analysis/MemorySSA.cpp +++ b/llvm/lib/Analysis/MemorySSA.cpp @@ -309,6 +309,7 @@ case Intrinsic::invariant_end: case Intrinsic::assume: case Intrinsic::experimental_noalias_scope_decl: + case Intrinsic::pseudoprobe: return {false, AliasResult(AliasResult::NoAlias)}; case Intrinsic::dbg_addr: case Intrinsic::dbg_declare: @@ -1782,6 +1783,7 @@ break; case Intrinsic::assume: case Intrinsic::experimental_noalias_scope_decl: + case Intrinsic::pseudoprobe: return nullptr; } } diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp --- a/llvm/lib/CodeGen/Analysis.cpp +++ b/llvm/lib/CodeGen/Analysis.cpp @@ -524,10 +524,8 @@ if (&*BBI == &Call) break; // Debug info intrinsics do not get in the way of tail call optimization. - if (isa(BBI)) - continue; // Pseudo probe intrinsics do not block tail call optimization either. - if (isa(BBI)) + if (BBI->isDebugOrPseudoInst()) continue; // A lifetime end, assume or noalias.decl intrinsic should not stop tail // call optimization. diff --git a/llvm/lib/IR/User.cpp b/llvm/lib/IR/User.cpp --- a/llvm/lib/IR/User.cpp +++ b/llvm/lib/IR/User.cpp @@ -107,7 +107,7 @@ } bool User::isDroppable() const { - return isa(this); + return isa(this) || isa(this); } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/llvm/lib/Transforms/IPO/GlobalDCE.cpp --- a/llvm/lib/Transforms/IPO/GlobalDCE.cpp +++ b/llvm/lib/Transforms/IPO/GlobalDCE.cpp @@ -88,7 +88,7 @@ static bool isEmptyFunction(Function *F) { BasicBlock &Entry = F->getEntryBlock(); for (auto &I : Entry) { - if (isa(I)) + if (I.isDebugOrPseudoInst()) continue; if (auto *RI = dyn_cast(&I)) return !RI->getReturnValue(); diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -2580,7 +2580,7 @@ return false; for (auto &I : Fn.getEntryBlock()) { - if (isa(I)) + if (I.isDebugOrPseudoInst()) continue; if (isa(I)) return true; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -682,7 +682,7 @@ BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend()); for (; BI != BE; ++BI) { if (auto *I = dyn_cast(&*BI)) { - if (isa(I) || + if (I->isDebugOrPseudoInst() || I->getIntrinsicID() == EndI.getIntrinsicID()) continue; if (IsStart(*I)) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -1487,8 +1487,8 @@ StoreInst *OtherStore = nullptr; if (OtherBr->isUnconditional()) { --BBI; - // Skip over debugging info. - while (isa(BBI) || + // Skip over debugging info and pseudo probes. + while (BBI->isDebugOrPseudoInst() || (isa(BBI) && BBI->getType()->isPointerTy())) { if (BBI==OtherBB->begin()) return false; diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2925,7 +2925,7 @@ auto GetLastSinkableStore = [](BasicBlock::iterator BBI) { auto IsNoopInstrForStoreMerging = [](BasicBlock::iterator BBI) { - return isa(BBI) || + return BBI->isDebugOrPseudoInst() || (isa(BBI) && BBI->getType()->isPointerTy()); }; diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -1265,6 +1265,12 @@ continue; } + // Skip pseudoprobe intrinsics, for the same reason as assume intrinsics. + if (match(&Inst, m_Intrinsic())) { + LLVM_DEBUG(dbgs() << "EarlyCSE skipping pseudoprobe: " << Inst << '\n'); + continue; + } + // We can skip all invariant.start intrinsics since they only read memory, // and we can forward values across it. For invariant starts without // invariant ends, we can use the fact that the invariantness never ends to diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1275,9 +1275,9 @@ // Skip debug info intrinsics. do { --I; - } while (isa(I) && I != Preheader->begin()); + } while (I->isDebugOrPseudoInst() && I != Preheader->begin()); - if (isa(I) && I == Preheader->begin()) + if (I->isDebugOrPseudoInst() && I == Preheader->begin()) Done = true; } else { Done = true; diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -62,7 +62,7 @@ NewBB->getInstList().push_back(NewInst); VMap[&I] = NewInst; // Add instruction map to value. - hasCalls |= (isa(I) && !isa(I)); + hasCalls |= (isa(I) && !I.isDebugOrPseudoInst()); if (const AllocaInst *AI = dyn_cast(&I)) { if (!AI->isStaticAlloca()) { hasDynamicAllocas = true; @@ -410,7 +410,7 @@ NewInst->setName(II->getName() + NameSuffix); VMap[&*II] = NewInst; // Add instruction map to value. NewBB->getInstList().push_back(NewInst); - hasCalls |= (isa(II) && !isa(II)); + hasCalls |= (isa(II) && !II->isDebugOrPseudoInst()); if (CodeInfo) { CodeInfo->OrigVMap[&*II] = NewInst; diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2587,7 +2587,7 @@ // Walk the loop in reverse so that we can identify ephemeral values properly // (values only feeding assumes). - for (Instruction &I : reverse(BB->instructionsWithoutDebug())) { + for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) { // Can't fold blocks that contain noduplicate or convergent calls. if (CallInst *CI = dyn_cast(&I)) if (CI->cannotDuplicate() || CI->isConvergent()) @@ -2891,8 +2891,7 @@ // instructions. for (BasicBlock *IfBlock : IfBlocks) for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I) - if (!AggressiveInsts.count(&*I) && !isa(I) && - !isa(I)) { + if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) { // This is not an aggressive instruction that we can promote. // Because of this, we won't be able to get rid of the control flow, so // the xform is not worth it. @@ -3416,7 +3415,7 @@ InstructionCost Cost = 0; InstructionCost Budget = PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; - for (auto &I : BB->instructionsWithoutDebug()) { + for (auto &I : BB->instructionsWithoutDebug(false)) { // Consider terminator instruction to be free. if (I.isTerminator()) continue; @@ -3739,7 +3738,7 @@ // fold the conditions into logical ops and one cond br. // Ignore dbg intrinsics. - if (&*BB->instructionsWithoutDebug().begin() != BI) + if (&*BB->instructionsWithoutDebug(false).begin() != BI) return false; int PBIOp, BIOp; @@ -5182,7 +5181,7 @@ // which we can constant-propagate the CaseVal, continue to its successor. SmallDenseMap ConstantPool; ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal)); - for (Instruction &I :CaseDest->instructionsWithoutDebug()) { + for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) { if (I.isTerminator()) { // If the terminator is a simple branch, continue to the next block. if (I.getNumSuccessors() != 1 || I.isExceptionalTerminator()) @@ -6197,7 +6196,7 @@ // If the block only contains the switch, see if we can fold the block // away into any preds. - if (SI == &*BB->instructionsWithoutDebug().begin()) + if (SI == &*BB->instructionsWithoutDebug(false).begin()) if (FoldValueComparisonIntoPredecessors(SI, Builder)) return requestResimplify(); } diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -1085,7 +1085,7 @@ continue; // Use early increment range so that we can erase instructions in loop. for (Instruction &I : make_early_inc_range(BB)) { - if (isa(I)) + if (I.isDebugOrPseudoInst()) continue; FoldInst(I); } diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-cse.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-cse.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-cse.ll @@ -0,0 +1,28 @@ +; RUN: opt < %s -S -early-cse-memssa | FileCheck %s + +define i16 @f1() readonly { + ret i16 0 +} + +declare void @f2() + +; Check that EarlyCSE correctly handles pseudo probes that don't have +; a MemoryAccess. + +define void @f3() { +; CHECK-LABEL: @f3( +; CHECK-NEXT: [[CALL1:%.*]] = call i16 @f1() +; CHECK-NEXT: call void @llvm.pseudoprobe +; CHECK-NEXT: ret void +; + %call1 = call i16 @f1() + call void @llvm.pseudoprobe(i64 6878943695821059507, i64 9, i32 0, i64 -1) + %call2 = call i16 @f1() + ret void +} + + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0 + +attributes #0 = { inaccessiblememonly nounwind willreturn } \ No newline at end of file diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-loop-deletion.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-loop-deletion.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-loop-deletion.ll @@ -0,0 +1,35 @@ +; RUN: opt %s -passes=loop-deletion -S | FileCheck %s --check-prefixes=CHECK + +%class.Loc.95 = type { %class.Domain.96 } +%class.Domain.96 = type { %class.DomainBase.97 } +%class.DomainBase.97 = type { [3 x %struct.WrapNoInit] } +%struct.WrapNoInit = type { %class.Loc } +%class.Loc = type { %class.Domain.67 } +%class.Domain.67 = type { %class.DomainBase.68 } +%class.DomainBase.68 = type { i32 } + +define dso_local void @foo(%class.Loc.95* %0) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: br label [[foo:%.*]] +; CHECK: foo.exit: +; CHECK-NEXT: ret void +; + br label %2 + +2: ; preds = %4, %1 + %.0.i.i = phi %class.Loc.95* [ undef, %1 ], [ %5, %4 ] + %3 = icmp ne %class.Loc.95* %.0.i.i, %0 + br i1 %3, label %4, label %foo.exit + +4: ; preds = %2 + call void @llvm.pseudoprobe(i64 6878943695821059507, i64 9, i32 0, i64 -1) + %5 = getelementptr inbounds %class.Loc.95, %class.Loc.95* %.0.i.i, i32 1 + br label %2 + +foo.exit: ; preds = %2 + ret void +} + +declare void @llvm.pseudoprobe(i64, i64, i32, i64) #1 + +attributes #1 = { willreturn readnone norecurse nofree }