diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -521,6 +521,7 @@ case Intrinsic::annotation: case Intrinsic::assume: case Intrinsic::sideeffect: + case Intrinsic::pseudoprobe: case Intrinsic::dbg_declare: case Intrinsic::dbg_value: case Intrinsic::dbg_label: diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1416,6 +1416,7 @@ case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::sideeffect: + case Intrinsic::pseudoprobe: return 0; case Intrinsic::masked_store: { Type *Ty = Tys[0]; diff --git a/llvm/include/llvm/IR/BasicBlock.h b/llvm/include/llvm/IR/BasicBlock.h --- a/llvm/include/llvm/IR/BasicBlock.h +++ b/llvm/include/llvm/IR/BasicBlock.h @@ -180,6 +180,32 @@ static_cast(this)->getFirstNonPHIOrDbgOrLifetime()); } + /// Returns a pointer to the first instruction in this block that is not a + /// PHINode, a debug intrinsic, or a pseudo probe intrinsic. + const Instruction *getFirstNonPHIOrDbgOrPseudoProbe() const; + Instruction *getFirstNonPHIOrDbgOrPseudoProbe() { + return const_cast(static_cast(this) + ->getFirstNonPHIOrDbgOrPseudoProbe()); + } + + /// Returns a pointer to the first instruction in this block that is not a + /// PHINode, a debug intrinsic, a lifetime intrinsic, or a pseudo probe + /// intrinsic. + const Instruction *getFirstNonPHIOrDbgOrLifetimeOrPseudoProbe() const; + Instruction *getFirstNonPHIOrDbgOrLifetimeOrPseudoProbe() { + return const_cast( + static_cast(this) + ->getFirstNonPHIOrDbgOrLifetimeOrPseudoProbe()); + } + + /// Returns a pointer to the last instruction in this block that is not a + /// debug intrinsic, or a pseudo probe intrinsic. + const Instruction *getLastNonDbgOrPseudoProbe() const; + Instruction *getLastNonDbgOrPseudoProbe() { + return const_cast( + static_cast(this)->getLastNonDbgOrPseudoProbe()); + } + /// Returns an iterator to the first instruction in this block that is /// suitable for inserting a non-PHI instruction. /// diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -967,6 +967,23 @@ } }; +class PseudoProbeInst : public IntrinsicInst { +public: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::pseudoprobe; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + + ConstantInt *getFuncGuid() const { + return cast(const_cast(getArgOperand(0))); + } + + ConstantInt *getIndex() const { + return cast(const_cast(getArgOperand(1))); + } +}; } // end namespace llvm #endif // LLVM_IR_INTRINSICINST_H diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1260,6 +1260,12 @@ // which specify that infinite loops must be preserved. def int_sideeffect : DefaultAttrsIntrinsic<[], [], [IntrInaccessibleMemOnly, IntrWillReturn]>; +// The pseudoprobe intrinsic works as a place holder to the block it probes. +// Like the sideeffect intrinsic defined above, this intrinsic is treated by the +// optimizer as having opaque side effects so that it won't be get rid of or moved +// out of the block it probes. +def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty], [IntrInaccessibleMemOnly, IntrWillReturn]>; + // Intrinsics to support half precision floating point format let IntrProperties = [IntrNoMem, IntrWillReturn] in { def int_convert_to_fp16 : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_anyfloat_ty]>; diff --git a/llvm/lib/Analysis/AliasSetTracker.cpp b/llvm/lib/Analysis/AliasSetTracker.cpp --- a/llvm/lib/Analysis/AliasSetTracker.cpp +++ b/llvm/lib/Analysis/AliasSetTracker.cpp @@ -439,6 +439,7 @@ // FIXME: Add lifetime/invariant intrinsics (See: PR30807). case Intrinsic::assume: case Intrinsic::sideeffect: + case Intrinsic::pseudoprobe: return; } } diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -1911,6 +1911,10 @@ if (isa(I)) continue; + // Skip pseudo-probes. + if (isa(I)) + continue; + // Skip ephemeral values. if (EphValues.count(&*I)) continue; diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -527,6 +527,7 @@ // FIXME: This list is repeated from NoTTI::getIntrinsicCost. case Intrinsic::assume: case Intrinsic::sideeffect: + case Intrinsic::pseudoprobe: case Intrinsic::dbg_declare: case Intrinsic::dbg_value: case Intrinsic::dbg_label: diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -125,7 +125,7 @@ if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start || ID == Intrinsic::lifetime_end || ID == Intrinsic::assume || - ID == Intrinsic::sideeffect) + ID == Intrinsic::sideeffect || ID == Intrinsic::pseudoprobe) return ID; return Intrinsic::not_intrinsic; } diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp --- a/llvm/lib/CodeGen/Analysis.cpp +++ b/llvm/lib/CodeGen/Analysis.cpp @@ -537,6 +537,9 @@ // Debug info intrinsics do not get in the way of tail call optimization. if (isa(BBI)) continue; + // Pseudo probe intrinsics do not block tail call optimization either. + if (isa(BBI)) + continue; // A lifetime end or assume intrinsic should not stop tail call // optimization. if (const IntrinsicInst *II = dyn_cast(BBI)) diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2241,13 +2241,13 @@ // Skip over debug and the bitcast. do { ++BI; - } while (isa(BI) || &*BI == BCI || &*BI == EVI); + } while (isa(BI) || &*BI == BCI || &*BI == EVI || + isa(BI)); if (&*BI != RetI) return false; } else { - BasicBlock::iterator BI = BB->begin(); - while (isa(BI)) ++BI; - if (&*BI != RetI) + Instruction *I = BB->getFirstNonPHIOrDbgOrPseudoProbe(); + if (I != RetI) return false; } @@ -2272,18 +2272,12 @@ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { if (!VisitedBBs.insert(*PI).second) continue; - - BasicBlock::InstListType &InstList = (*PI)->getInstList(); - BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin(); - BasicBlock::InstListType::reverse_iterator RE = InstList.rend(); - do { ++RI; } while (RI != RE && isa(&*RI)); - if (RI == RE) - continue; - - CallInst *CI = dyn_cast(&*RI); - if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) && - attributesPermitTailCall(F, CI, RetI, *TLI)) - TailCallBBs.push_back(*PI); + if (Instruction *RI = (*PI)->getLastNonDbgOrPseudoProbe()) { + CallInst *CI = dyn_cast(RI); + if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) && + attributesPermitTailCall(F, CI, RetI, *TLI)) + TailCallBBs.push_back(*PI); + } } } diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp --- a/llvm/lib/IR/BasicBlock.cpp +++ b/llvm/lib/IR/BasicBlock.cpp @@ -238,6 +238,38 @@ return nullptr; } +const Instruction *BasicBlock::getFirstNonPHIOrDbgOrPseudoProbe() const { + for (const Instruction &I : *this) { + if (isa(I) || isa(I) || isa(I)) + continue; + return &I; + } + return nullptr; +} + +const Instruction * +BasicBlock::getFirstNonPHIOrDbgOrLifetimeOrPseudoProbe() const { + for (const Instruction &I : *this) { + if (isa(I) || isa(I) || isa(I)) + continue; + + if (I.isLifetimeStartOrEnd()) + continue; + + return &I; + } + return nullptr; +} + +const Instruction *BasicBlock::getLastNonDbgOrPseudoProbe() const { + for (const Instruction &I : reverse(*this)) { + if (isa(I) || isa(I)) + continue; + return &I; + } + return nullptr; +} + BasicBlock::const_iterator BasicBlock::getFirstInsertionPt() const { const Instruction *FirstNonPHI = getFirstNonPHI(); if (!FirstNonPHI) diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -538,6 +538,10 @@ // Debugger intrinsics don't incur code size. if (isa(I)) continue; + // Pseudo-probes don't incur code size. + if (isa(I)) + continue; + // If this is a pointer->pointer bitcast, it is free. if (isa(I) && I->getType()->isPointerTy()) continue; diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp --- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -240,7 +240,11 @@ Escaped = ESCAPED; CallInst *CI = dyn_cast(&I); - if (!CI || CI->isTailCall() || isa(&I)) + // A PseudoProbeInst has the IntrInaccessibleMemOnly tag hence it is + // considered accessing memory and will be marked as a tail call if we + // don't bail out here. + if (!CI || CI->isTailCall() || isa(&I) || + isa(&I)) continue; bool IsNoTail = CI->isNoTailCall() || CI->hasOperandBundles(); @@ -752,7 +756,8 @@ return false; BasicBlock *Succ = BI->getSuccessor(0); - ReturnInst *Ret = dyn_cast(Succ->getFirstNonPHIOrDbg()); + ReturnInst *Ret = + dyn_cast(Succ->getFirstNonPHIOrDbgOrPseudoProbe()); if (!Ret) return false; diff --git a/llvm/lib/Transforms/Utils/Evaluator.cpp b/llvm/lib/Transforms/Utils/Evaluator.cpp --- a/llvm/lib/Transforms/Utils/Evaluator.cpp +++ b/llvm/lib/Transforms/Utils/Evaluator.cpp @@ -551,6 +551,10 @@ LLVM_DEBUG(dbgs() << "Skipping sideeffect intrinsic.\n"); ++CurInst; continue; + } else if (II->getIntrinsicID() == Intrinsic::pseudoprobe) { + LLVM_DEBUG(dbgs() << "Skipping pseudoprobe intrinsic.\n"); + ++CurInst; + continue; } LLVM_DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n"); diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1987,7 +1987,10 @@ --MaxNumInstToLookAt; // Could be calling an instruction that affects memory like free(). - if (CurI.mayHaveSideEffects() && !isa(CurI)) + // Skip pseudo probe intrinsic calls which are not really killing any memory + // accesses. + if (CurI.mayHaveSideEffects() && !isa(CurI) && + !isa(CurI)) return nullptr; if (auto *SI = dyn_cast(&CurI)) { @@ -2141,6 +2144,14 @@ continue; } + // Skip pseudo probes. The consequence is we lose track of the branch + // probability for ThenBB, which is fine since the optimization here takes + // place regardless of the branch probability. + if (isa(I)) { + SpeculatedDbgIntrinsics.push_back(I); + continue; + } + // Only speculatively execute a single instruction (not counting the // terminator) for now. ++SpeculatedInstructions; @@ -2495,7 +2506,8 @@ } else { DomBlock = *pred_begin(IfBlock1); for (BasicBlock::iterator I = IfBlock1->begin(); !I->isTerminator(); ++I) - if (!AggressiveInsts.count(&*I) && !isa(I)) { + if (!AggressiveInsts.count(&*I) && !isa(I) && + !isa(I)) { // This is not an aggressive instruction that we can promote. // Because of this, we won't be able to get rid of the control flow, so // the xform is not worth it. @@ -2508,7 +2520,8 @@ } else { DomBlock = *pred_begin(IfBlock2); for (BasicBlock::iterator I = IfBlock2->begin(); !I->isTerminator(); ++I) - if (!AggressiveInsts.count(&*I) && !isa(I)) { + if (!AggressiveInsts.count(&*I) && !isa(I) && + !isa(I)) { // This is not an aggressive instruction that we can promote. // Because of this, we won't be able to get rid of the control flow, so // the xform is not worth it. diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -666,6 +666,10 @@ cast(&I)->getIntrinsicID() == Intrinsic::sideeffect) { // Ignore llvm.sideeffect calls. + } else if (isa(&I) && + cast(&I)->getIntrinsicID() == + Intrinsic::pseudoprobe) { + // Ignore llvm.pseudoprobe calls. } else if (IsLoadChain && (I.mayWriteToMemory() || I.mayThrow())) { LLVM_DEBUG(dbgs() << "LSV: Found may-write/throw operation: " << I << '\n'); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7412,7 +7412,8 @@ Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); if (ID && (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end || - ID == Intrinsic::lifetime_start || ID == Intrinsic::sideeffect)) + ID == Intrinsic::lifetime_start || ID == Intrinsic::sideeffect || + ID == Intrinsic::pseudoprobe)) return nullptr; auto willWiden = [&](ElementCount VF) -> bool { diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5175,7 +5175,9 @@ if (I->mayReadOrWriteMemory() && (!isa(I) || - cast(I)->getIntrinsicID() != Intrinsic::sideeffect)) { + (cast(I)->getIntrinsicID() != Intrinsic::sideeffect && + cast(I)->getIntrinsicID() != + Intrinsic::pseudoprobe))) { // Update the linked list of memory accessing instructions. if (CurrentLoadStore) { CurrentLoadStore->NextLoadStore = SD;