diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -518,6 +518,7 @@ case Intrinsic::annotation: case Intrinsic::assume: case Intrinsic::sideeffect: + case Intrinsic::pseudoprobe: case Intrinsic::dbg_declare: case Intrinsic::dbg_value: case Intrinsic::dbg_label: diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1398,6 +1398,7 @@ case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::sideeffect: + case Intrinsic::pseudoprobe: return 0; case Intrinsic::masked_store: { Type *Ty = Tys[0]; diff --git a/llvm/include/llvm/IR/BasicBlock.h b/llvm/include/llvm/IR/BasicBlock.h --- a/llvm/include/llvm/IR/BasicBlock.h +++ b/llvm/include/llvm/IR/BasicBlock.h @@ -180,6 +180,14 @@ static_cast(this)->getFirstNonPHIOrDbgOrLifetime()); } + /// Returns a pointer to the first instruction in this block that is not a + /// PHINode, a debug intrinsic, or a pseudo probe intrinsic. + const Instruction *getFirstNonPHIOrDbgOrPseudoProbe() const; + Instruction *getFirstNonPHIOrDbgOrPseudoProbe() { + return const_cast(static_cast(this) + ->getFirstNonPHIOrDbgOrPseudoProbe()); + } + /// Returns an iterator to the first instruction in this block that is /// suitable for inserting a non-PHI instruction. /// diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -937,6 +937,23 @@ } }; +class PseudoProbeInst : public IntrinsicInst { +public: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::pseudoprobe; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + + ConstantInt *getFuncGuid() const { + return cast(const_cast(getArgOperand(0))); + } + + ConstantInt *getIndex() const { + return cast(const_cast(getArgOperand(1))); + } +}; } // end namespace llvm #endif // LLVM_IR_INTRINSICINST_H diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1250,6 +1250,12 @@ // which specify that infinite loops must be preserved. def int_sideeffect : Intrinsic<[], [], [IntrInaccessibleMemOnly, IntrWillReturn]>; +// The pseudoprobe intrinsic works as a place holder to the block it probes. +// Like the sideeffect intrinsic defined above, this intrinsic is treated by the +// optimizer as having opaque side effects so that it won't be get rid of or moved +// out of the block it probes. +def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty], [IntrInaccessibleMemOnly, IntrWillReturn]>; + // Intrinsics to support half precision floating point format let IntrProperties = [IntrNoMem, IntrWillReturn] in { def int_convert_to_fp16 : Intrinsic<[llvm_i16_ty], [llvm_anyfloat_ty]>; diff --git a/llvm/lib/Analysis/AliasSetTracker.cpp b/llvm/lib/Analysis/AliasSetTracker.cpp --- a/llvm/lib/Analysis/AliasSetTracker.cpp +++ b/llvm/lib/Analysis/AliasSetTracker.cpp @@ -444,6 +444,7 @@ // FIXME: Add lifetime/invariant intrinsics (See: PR30807). case Intrinsic::assume: case Intrinsic::sideeffect: + case Intrinsic::pseudoprobe: return; } } diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -599,6 +599,7 @@ // FIXME: This list is repeated from NoTTI::getIntrinsicCost. case Intrinsic::assume: case Intrinsic::sideeffect: + case Intrinsic::pseudoprobe: case Intrinsic::dbg_declare: case Intrinsic::dbg_value: case Intrinsic::dbg_label: diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -125,7 +125,7 @@ if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start || ID == Intrinsic::lifetime_end || ID == Intrinsic::assume || - ID == Intrinsic::sideeffect) + ID == Intrinsic::sideeffect || ID == Intrinsic::pseudoprobe) return ID; return Intrinsic::not_intrinsic; } diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp --- a/llvm/lib/CodeGen/Analysis.cpp +++ b/llvm/lib/CodeGen/Analysis.cpp @@ -537,6 +537,9 @@ // Debug info intrinsics do not get in the way of tail call optimization. if (isa(BBI)) continue; + // Pseudo probe intrinsics do not block tail call optimization either. + if (isa(BBI)) + continue; // A lifetime end or assume intrinsic should not stop tail call // optimization. if (const IntrinsicInst *II = dyn_cast(BBI)) diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2230,12 +2230,14 @@ // Skip over debug and the bitcast. do { ++BI; - } while (isa(BI) || &*BI == BCI || &*BI == EVI); + } while (isa(BI) || &*BI == BCI || &*BI == EVI || + isa(BI)); if (&*BI != RetI) return false; } else { BasicBlock::iterator BI = BB->begin(); - while (isa(BI)) ++BI; + while (isa(BI) || isa(BI)) + ++BI; if (&*BI != RetI) return false; } @@ -2265,7 +2267,10 @@ BasicBlock::InstListType &InstList = (*PI)->getInstList(); BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin(); BasicBlock::InstListType::reverse_iterator RE = InstList.rend(); - do { ++RI; } while (RI != RE && isa(&*RI)); + do { + ++RI; + } while (RI != RE && + (isa(&*RI) || isa(&*RI))); if (RI == RE) continue; diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp --- a/llvm/lib/IR/BasicBlock.cpp +++ b/llvm/lib/IR/BasicBlock.cpp @@ -99,7 +99,7 @@ std::function>> BasicBlock::instructionsWithoutDebug() const { std::function Fn = [](const Instruction &I) { - return !isa(I); + return !isa(I) && !isa(I); }; return make_filter_range(*this, Fn); } @@ -108,7 +108,7 @@ std::function>> BasicBlock::instructionsWithoutDebug() { std::function Fn = [](Instruction &I) { - return !isa(I); + return !isa(I) && !isa(I); }; return make_filter_range(*this, Fn); } @@ -238,6 +238,15 @@ return nullptr; } +const Instruction *BasicBlock::getFirstNonPHIOrDbgOrPseudoProbe() const { + for (const Instruction &I : *this) { + if (isa(I) || isa(I) || isa(I)) + continue; + return &I; + } + return nullptr; +} + BasicBlock::const_iterator BasicBlock::getFirstInsertionPt() const { const Instruction *FirstNonPHI = getFirstNonPHI(); if (!FirstNonPHI) diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp --- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -237,7 +237,10 @@ Escaped = ESCAPED; CallInst *CI = dyn_cast(&I); - if (!CI || CI->isTailCall() || isa(&I)) + // A PseudoProbeInst does access memory and will be marked as a tail call + // if we don't bail out here. + if (!CI || CI->isTailCall() || isa(&I) || + isa(&I)) continue; bool IsNoTail = CI->isNoTailCall() || CI->hasOperandBundles(); @@ -679,7 +682,7 @@ bool Change = false; // Make sure this block is a trivial return block. - assert(BB->getFirstNonPHIOrDbg() == Ret && + assert(BB->getFirstNonPHIOrDbgOrPseudoProbe() == Ret && "Trying to fold non-trivial return block"); // If the return block contains nothing but the return and PHI's, @@ -837,7 +840,7 @@ BasicBlock *BB = &*BBI++; // foldReturnAndProcessPred may delete BB. if (ReturnInst *Ret = dyn_cast(BB->getTerminator())) { bool Change = TRE.processReturningBlock(Ret, !CanTRETailMarkedCall); - if (!Change && BB->getFirstNonPHIOrDbg() == Ret) + if (!Change && BB->getFirstNonPHIOrDbgOrPseudoProbe() == Ret) Change = TRE.foldReturnAndProcessPred(Ret, !CanTRETailMarkedCall); MadeChange |= Change; } diff --git a/llvm/lib/Transforms/Utils/Evaluator.cpp b/llvm/lib/Transforms/Utils/Evaluator.cpp --- a/llvm/lib/Transforms/Utils/Evaluator.cpp +++ b/llvm/lib/Transforms/Utils/Evaluator.cpp @@ -551,6 +551,10 @@ LLVM_DEBUG(dbgs() << "Skipping sideeffect intrinsic.\n"); ++CurInst; continue; + } else if (II->getIntrinsicID() == Intrinsic::pseudoprobe) { + LLVM_DEBUG(dbgs() << "Skipping pseudoprobe intrinsic.\n"); + ++CurInst; + continue; } LLVM_DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n"); diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2134,6 +2134,14 @@ continue; } + // Skip pseudo probes. The consequence is we lose track of the branch + // probability for ThenBB, which is fine since the optimization here takes + // place regardless of the branch probability. + if (isa(I)) { + SpeculatedDbgIntrinsics.push_back(I); + continue; + } + // Only speculatively execute a single instruction (not counting the // terminator) for now. ++SpeculatedInstructions; @@ -2488,7 +2496,8 @@ } else { DomBlock = *pred_begin(IfBlock1); for (BasicBlock::iterator I = IfBlock1->begin(); !I->isTerminator(); ++I) - if (!AggressiveInsts.count(&*I) && !isa(I)) { + if (!AggressiveInsts.count(&*I) && !isa(I) && + !isa(I)) { // This is not an aggressive instruction that we can promote. // Because of this, we won't be able to get rid of the control flow, so // the xform is not worth it. @@ -2501,7 +2510,8 @@ } else { DomBlock = *pred_begin(IfBlock2); for (BasicBlock::iterator I = IfBlock2->begin(); !I->isTerminator(); ++I) - if (!AggressiveInsts.count(&*I) && !isa(I)) { + if (!AggressiveInsts.count(&*I) && !isa(I) && + !isa(I)) { // This is not an aggressive instruction that we can promote. // Because of this, we won't be able to get rid of the control flow, so // the xform is not worth it. diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -666,6 +666,10 @@ cast(&I)->getIntrinsicID() == Intrinsic::sideeffect) { // Ignore llvm.sideeffect calls. + } else if (isa(&I) && + cast(&I)->getIntrinsicID() == + Intrinsic::pseudoprobe) { + // Ignore llvm.pseudoprobe calls. } else if (IsLoadChain && (I.mayWriteToMemory() || I.mayThrow())) { LLVM_DEBUG(dbgs() << "LSV: Found may-write/throw operation: " << I << '\n'); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7359,7 +7359,8 @@ Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); if (ID && (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end || - ID == Intrinsic::lifetime_start || ID == Intrinsic::sideeffect)) + ID == Intrinsic::lifetime_start || ID == Intrinsic::sideeffect || + ID == Intrinsic::pseudoprobe)) return nullptr; auto willWiden = [&](ElementCount VF) -> bool { diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5124,7 +5124,9 @@ if (I->mayReadOrWriteMemory() && (!isa(I) || - cast(I)->getIntrinsicID() != Intrinsic::sideeffect)) { + (cast(I)->getIntrinsicID() != Intrinsic::sideeffect && + cast(I)->getIntrinsicID() != + Intrinsic::pseudoprobe))) { // Update the linked list of memory accessing instructions. if (CurrentLoadStore) { CurrentLoadStore->NextLoadStore = SD;