diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -526,6 +526,7 @@ case Intrinsic::annotation: case Intrinsic::assume: case Intrinsic::sideeffect: + case Intrinsic::pseudoprobe: case Intrinsic::dbg_declare: case Intrinsic::dbg_value: case Intrinsic::dbg_label: diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1436,6 +1436,7 @@ case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::sideeffect: + case Intrinsic::pseudoprobe: return 0; case Intrinsic::masked_store: { Type *Ty = Tys[0]; diff --git a/llvm/include/llvm/IR/BasicBlock.h b/llvm/include/llvm/IR/BasicBlock.h --- a/llvm/include/llvm/IR/BasicBlock.h +++ b/llvm/include/llvm/IR/BasicBlock.h @@ -165,19 +165,24 @@ } /// Returns a pointer to the first instruction in this block that is not a - /// PHINode or a debug intrinsic. - const Instruction* getFirstNonPHIOrDbg() const; - Instruction* getFirstNonPHIOrDbg() { + /// PHINode or a debug intrinsic, or any pseudo operation if \c SkipPseudoOp + /// is true. + const Instruction *getFirstNonPHIOrDbg(bool SkipPseudoOp = false) const; + Instruction *getFirstNonPHIOrDbg(bool SkipPseudoOp = false) { return const_cast( - static_cast(this)->getFirstNonPHIOrDbg()); + static_cast(this)->getFirstNonPHIOrDbg( + SkipPseudoOp)); } /// Returns a pointer to the first instruction in this block that is not a - /// PHINode, a debug intrinsic, or a lifetime intrinsic. - const Instruction* getFirstNonPHIOrDbgOrLifetime() const; - Instruction* getFirstNonPHIOrDbgOrLifetime() { + /// PHINode, a debug intrinsic, or a lifetime intrinsic, or any pseudo + /// operation if \c SkipPseudoOp is true. + const Instruction * + getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp = false) const; + Instruction *getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp = false) { return const_cast( - static_cast(this)->getFirstNonPHIOrDbgOrLifetime()); + static_cast(this)->getFirstNonPHIOrDbgOrLifetime( + SkipPseudoOp)); } /// Returns an iterator to the first instruction in this block that is @@ -191,16 +196,18 @@ } /// Return a const iterator range over the instructions in the block, skipping - /// any debug instructions. + /// any debug instructions. Skip any pseudo operations as well if \c + /// SkipPseudoOp is true. iterator_range>> - instructionsWithoutDebug() const; + instructionsWithoutDebug(bool SkipPseudoOp = false) const; /// Return an iterator range over the instructions in the block, skipping any - /// debug instructions. - iterator_range>> - instructionsWithoutDebug(); + /// debug instructions. Skip and any pseudo operations as well if \c + /// SkipPseudoOp is true. + iterator_range< + filter_iterator>> + instructionsWithoutDebug(bool SkipPseudoOp = false); /// Return the size of the basic block ignoring debug instructions filter_iterator( - static_cast(this)->getNextNonDebugInstruction()); + static_cast(this)->getNextNonDebugInstruction( + SkipPseudoOp)); } /// Return a pointer to the previous non-debug instruction in the same basic - /// block as 'this', or nullptr if no such instruction exists. - const Instruction *getPrevNonDebugInstruction() const; - Instruction *getPrevNonDebugInstruction() { + /// block as 'this', or nullptr if no such instruction exists. Skip any pseudo + /// operations if \c SkipPseudoOp is true. + const Instruction * + getPrevNonDebugInstruction(bool SkipPseudoOp = false) const; + Instruction *getPrevNonDebugInstruction(bool SkipPseudoOp = false) { return const_cast( - static_cast(this)->getPrevNonDebugInstruction()); + static_cast(this)->getPrevNonDebugInstruction( + SkipPseudoOp)); } /// Create a copy of 'this' instruction that is identical in all ways except diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -967,6 +967,28 @@ } }; +class PseudoProbeInst : public IntrinsicInst { +public: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::pseudoprobe; + } + + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + + ConstantInt *getFuncGuid() const { + return cast(const_cast(getArgOperand(0))); + } + + ConstantInt *getAttributes() const { + return cast(const_cast(getArgOperand(2))); + } + + ConstantInt *getIndex() const { + return cast(const_cast(getArgOperand(1))); + } +}; } // end namespace llvm #endif // LLVM_IR_INTRINSICINST_H diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1277,6 +1277,13 @@ // which specify that infinite loops must be preserved. def int_sideeffect : DefaultAttrsIntrinsic<[], [], [IntrInaccessibleMemOnly, IntrWillReturn]>; +// The pseudoprobe intrinsic works as a place holder to the block it probes. +// Like the sideeffect intrinsic defined above, this intrinsic is treated by the +// optimizer as having opaque side effects so that it won't be get rid of or moved +// out of the block it probes. +def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], + [IntrInaccessibleMemOnly, IntrWillReturn]>; + // Intrinsics to support half precision floating point format let IntrProperties = [IntrNoMem, IntrWillReturn] in { def int_convert_to_fp16 : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_anyfloat_ty]>; diff --git a/llvm/lib/Analysis/AliasSetTracker.cpp b/llvm/lib/Analysis/AliasSetTracker.cpp --- a/llvm/lib/Analysis/AliasSetTracker.cpp +++ b/llvm/lib/Analysis/AliasSetTracker.cpp @@ -439,6 +439,7 @@ // FIXME: Add lifetime/invariant intrinsics (See: PR30807). case Intrinsic::assume: case Intrinsic::sideeffect: + case Intrinsic::pseudoprobe: return; } } diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -1911,6 +1911,10 @@ if (isa(I)) continue; + // Skip pseudo-probes. + if (isa(I)) + continue; + // Skip ephemeral values. if (EphValues.count(&*I)) continue; diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -527,6 +527,7 @@ // FIXME: This list is repeated from NoTTI::getIntrinsicCost. case Intrinsic::assume: case Intrinsic::sideeffect: + case Intrinsic::pseudoprobe: case Intrinsic::dbg_declare: case Intrinsic::dbg_value: case Intrinsic::dbg_label: diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -125,7 +125,7 @@ if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start || ID == Intrinsic::lifetime_end || ID == Intrinsic::assume || - ID == Intrinsic::sideeffect) + ID == Intrinsic::sideeffect || ID == Intrinsic::pseudoprobe) return ID; return Intrinsic::not_intrinsic; } diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp --- a/llvm/lib/CodeGen/Analysis.cpp +++ b/llvm/lib/CodeGen/Analysis.cpp @@ -537,6 +537,9 @@ // Debug info intrinsics do not get in the way of tail call optimization. if (isa(BBI)) continue; + // Pseudo probe intrinsics do not block tail call optimization either. + if (isa(BBI)) + continue; // A lifetime end or assume intrinsic should not stop tail call // optimization. if (const IntrinsicInst *II = dyn_cast(BBI)) diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2241,13 +2241,12 @@ // Skip over debug and the bitcast. do { ++BI; - } while (isa(BI) || &*BI == BCI || &*BI == EVI); + } while (isa(BI) || &*BI == BCI || &*BI == EVI || + isa(BI)); if (&*BI != RetI) return false; } else { - BasicBlock::iterator BI = BB->begin(); - while (isa(BI)) ++BI; - if (&*BI != RetI) + if (BB->getFirstNonPHIOrDbg(true) != RetI) return false; } @@ -2272,18 +2271,12 @@ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { if (!VisitedBBs.insert(*PI).second) continue; - - BasicBlock::InstListType &InstList = (*PI)->getInstList(); - BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin(); - BasicBlock::InstListType::reverse_iterator RE = InstList.rend(); - do { ++RI; } while (RI != RE && isa(&*RI)); - if (RI == RE) - continue; - - CallInst *CI = dyn_cast(&*RI); - if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) && - attributesPermitTailCall(F, CI, RetI, *TLI)) - TailCallBBs.push_back(*PI); + if (Instruction *I = (*PI)->rbegin()->getPrevNonDebugInstruction(true)) { + CallInst *CI = dyn_cast(I); + if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) && + attributesPermitTailCall(F, CI, RetI, *TLI)) + TailCallBBs.push_back(*PI); + } } } diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp --- a/llvm/lib/IR/BasicBlock.cpp +++ b/llvm/lib/IR/BasicBlock.cpp @@ -97,18 +97,20 @@ iterator_range>> -BasicBlock::instructionsWithoutDebug() const { - std::function Fn = [](const Instruction &I) { - return !isa(I); +BasicBlock::instructionsWithoutDebug(bool SkipPseudoOp) const { + std::function Fn = [=](const Instruction &I) { + return !isa(I) && + !(SkipPseudoOp && isa(I)); }; return make_filter_range(*this, Fn); } -iterator_range>> -BasicBlock::instructionsWithoutDebug() { - std::function Fn = [](Instruction &I) { - return !isa(I); +iterator_range< + filter_iterator>> +BasicBlock::instructionsWithoutDebug(bool SkipPseudoOp) { + std::function Fn = [=](Instruction &I) { + return !isa(I) && + !(SkipPseudoOp && isa(I)); }; return make_filter_range(*this, Fn); } @@ -218,14 +220,21 @@ return nullptr; } -const Instruction* BasicBlock::getFirstNonPHIOrDbg() const { - for (const Instruction &I : *this) - if (!isa(I) && !isa(I)) - return &I; +const Instruction *BasicBlock::getFirstNonPHIOrDbg(bool SkipPseudoOp) const { + for (const Instruction &I : *this) { + if (isa(I) || isa(I)) + continue; + + if (SkipPseudoOp && isa(I)) + continue; + + return &I; + } return nullptr; } -const Instruction* BasicBlock::getFirstNonPHIOrDbgOrLifetime() const { +const Instruction * +BasicBlock::getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp) const { for (const Instruction &I : *this) { if (isa(I) || isa(I)) continue; @@ -233,6 +242,9 @@ if (I.isLifetimeStartOrEnd()) continue; + if (SkipPseudoOp && isa(I)) + continue; + return &I; } return nullptr; diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -641,16 +641,18 @@ return ID == Intrinsic::lifetime_start || ID == Intrinsic::lifetime_end; } -const Instruction *Instruction::getNextNonDebugInstruction() const { +const Instruction * +Instruction::getNextNonDebugInstruction(bool SkipPseudoOp) const { for (const Instruction *I = getNextNode(); I; I = I->getNextNode()) - if (!isa(I)) + if (!isa(I) && !(SkipPseudoOp && isa(I))) return I; return nullptr; } -const Instruction *Instruction::getPrevNonDebugInstruction() const { +const Instruction * +Instruction::getPrevNonDebugInstruction(bool SkipPseudoOp) const { for (const Instruction *I = getPrevNode(); I; I = I->getPrevNode()) - if (!isa(I)) + if (!isa(I) && !(SkipPseudoOp && isa(I))) return I; return nullptr; } diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -543,6 +543,10 @@ // Debugger intrinsics don't incur code size. if (isa(I)) continue; + // Pseudo-probes don't incur code size. + if (isa(I)) + continue; + // If this is a pointer->pointer bitcast, it is free. if (isa(I) && I->getType()->isPointerTy()) continue; diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp --- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -240,7 +240,11 @@ Escaped = ESCAPED; CallInst *CI = dyn_cast(&I); - if (!CI || CI->isTailCall() || isa(&I)) + // A PseudoProbeInst has the IntrInaccessibleMemOnly tag hence it is + // considered accessing memory and will be marked as a tail call if we + // don't bail out here. + if (!CI || CI->isTailCall() || isa(&I) || + isa(&I)) continue; bool IsNoTail = CI->isNoTailCall() || CI->hasOperandBundles(); @@ -752,7 +756,7 @@ return false; BasicBlock *Succ = BI->getSuccessor(0); - ReturnInst *Ret = dyn_cast(Succ->getFirstNonPHIOrDbg()); + ReturnInst *Ret = dyn_cast(Succ->getFirstNonPHIOrDbg(true)); if (!Ret) return false; diff --git a/llvm/lib/Transforms/Utils/Evaluator.cpp b/llvm/lib/Transforms/Utils/Evaluator.cpp --- a/llvm/lib/Transforms/Utils/Evaluator.cpp +++ b/llvm/lib/Transforms/Utils/Evaluator.cpp @@ -551,6 +551,10 @@ LLVM_DEBUG(dbgs() << "Skipping sideeffect intrinsic.\n"); ++CurInst; continue; + } else if (II->getIntrinsicID() == Intrinsic::pseudoprobe) { + LLVM_DEBUG(dbgs() << "Skipping pseudoprobe intrinsic.\n"); + ++CurInst; + continue; } LLVM_DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n"); diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1981,7 +1981,9 @@ // Look for a store to the same pointer in BrBB. unsigned MaxNumInstToLookAt = 9; - for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug())) { + // Skip pseudo probe intrinsic calls which are not really killing any memory + // accesses. + for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) { if (!MaxNumInstToLookAt) break; --MaxNumInstToLookAt; @@ -2141,6 +2143,14 @@ continue; } + // Skip pseudo probes. The consequence is we lose track of the branch + // probability for ThenBB, which is fine since the optimization here takes + // place regardless of the branch probability. + if (isa(I)) { + SpeculatedDbgIntrinsics.push_back(I); + continue; + } + // Only speculatively execute a single instruction (not counting the // terminator) for now. ++SpeculatedInstructions; @@ -2495,7 +2505,8 @@ } else { DomBlock = *pred_begin(IfBlock1); for (BasicBlock::iterator I = IfBlock1->begin(); !I->isTerminator(); ++I) - if (!AggressiveInsts.count(&*I) && !isa(I)) { + if (!AggressiveInsts.count(&*I) && !isa(I) && + !isa(I)) { // This is not an aggressive instruction that we can promote. // Because of this, we won't be able to get rid of the control flow, so // the xform is not worth it. @@ -2508,7 +2519,8 @@ } else { DomBlock = *pred_begin(IfBlock2); for (BasicBlock::iterator I = IfBlock2->begin(); !I->isTerminator(); ++I) - if (!AggressiveInsts.count(&*I) && !isa(I)) { + if (!AggressiveInsts.count(&*I) && !isa(I) && + !isa(I)) { // This is not an aggressive instruction that we can promote. // Because of this, we won't be able to get rid of the control flow, so // the xform is not worth it. diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -666,6 +666,10 @@ cast(&I)->getIntrinsicID() == Intrinsic::sideeffect) { // Ignore llvm.sideeffect calls. + } else if (isa(&I) && + cast(&I)->getIntrinsicID() == + Intrinsic::pseudoprobe) { + // Ignore llvm.pseudoprobe calls. } else if (IsLoadChain && (I.mayWriteToMemory() || I.mayThrow())) { LLVM_DEBUG(dbgs() << "LSV: Found may-write/throw operation: " << I << '\n'); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7407,7 +7407,8 @@ Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); if (ID && (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end || - ID == Intrinsic::lifetime_start || ID == Intrinsic::sideeffect)) + ID == Intrinsic::lifetime_start || ID == Intrinsic::sideeffect || + ID == Intrinsic::pseudoprobe)) return nullptr; auto willWiden = [&](ElementCount VF) -> bool { diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5261,7 +5261,9 @@ if (I->mayReadOrWriteMemory() && (!isa(I) || - cast(I)->getIntrinsicID() != Intrinsic::sideeffect)) { + (cast(I)->getIntrinsicID() != Intrinsic::sideeffect && + cast(I)->getIntrinsicID() != + Intrinsic::pseudoprobe))) { // Update the linked list of memory accessing instructions. if (CurrentLoadStore) { CurrentLoadStore->NextLoadStore = SD;