diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -365,6 +365,9 @@ /// Whether emit extra debug info for sample pgo profile collection. CODEGENOPT(DebugInfoForProfiling, 1, 0) +/// Whether emit pseudo probes for sample pgo profile collection. +CODEGENOPT(PseudoProbeForProfiling, 1, 0) + /// Whether 3-component vector type is preserved. CODEGENOPT(PreserveVec3Type, 1, 0) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -843,6 +843,12 @@ def fprofile_exclude_files_EQ : Joined<["-"], "fprofile-exclude-files=">, Group, Flags<[CC1Option, CoreOption]>, HelpText<"Instrument only functions from files where names don't match all the regexes separated by a semi-colon">; +def fpseudo_probe_for_profiling : Flag<["-"], "fpseudo-probe-for-profiling">, + Group, Flags<[DriverOption, CC1Option]>, + HelpText<"Emit pseudo probes for sample profiler">; +def fno_pseudo_probe_for_profiling : Flag<["-"], "fno-pseudo-probe-for-profiling">, + Group, Flags<[DriverOption, CC1Option]>, + HelpText<"Do not emit pseudo probes for sample profiler.">; def forder_file_instrumentation : Flag<["-"], "forder-file-instrumentation">, Group, Flags<[CC1Option, CoreOption]>, HelpText<"Generate instrumented code to collect order file into default.profraw file (overridden by '=' form of option or LLVM_PROFILE_FILE env var)">; diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1112,10 +1112,15 @@ CSAction, CodeGenOpts.DebugInfoForProfiling); } else if (!CodeGenOpts.SampleProfileFile.empty()) // -fprofile-sample-use + PGOOpt = PGOOptions( + CodeGenOpts.SampleProfileFile, "", CodeGenOpts.ProfileRemappingFile, + PGOOptions::SampleUse, PGOOptions::NoCSAction, + CodeGenOpts.DebugInfoForProfiling, CodeGenOpts.PseudoProbeForProfiling); + else if (CodeGenOpts.PseudoProbeForProfiling) + // -fpseudo-probe-for-profiling PGOOpt = - PGOOptions(CodeGenOpts.SampleProfileFile, "", - CodeGenOpts.ProfileRemappingFile, PGOOptions::SampleUse, - PGOOptions::NoCSAction, CodeGenOpts.DebugInfoForProfiling); + PGOOptions("", "", "", PGOOptions::NoAction, PGOOptions::NoCSAction, + CodeGenOpts.DebugInfoForProfiling, true); else if (CodeGenOpts.DebugInfoForProfiling) // -fdebug-info-for-profiling PGOOpt = PGOOptions("", "", "", PGOOptions::NoAction, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5443,6 +5443,10 @@ } Args.AddLastArg(CmdArgs, options::OPT_fprofile_remapping_file_EQ); + if (Args.hasFlag(options::OPT_fpseudo_probe_for_profiling, + options::OPT_fno_pseudo_probe_for_profiling, false)) + CmdArgs.push_back("-fpseudo-probe-for-profiling"); + RenderBuiltinOptions(TC, RawTriple, Args, CmdArgs); if (!Args.hasFlag(options::OPT_fassume_sane_operator_new, diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -872,6 +872,9 @@ std::string(Args.getLastArgValue(OPT_fprofile_sample_use_EQ)); Opts.DebugInfoForProfiling = Args.hasFlag( OPT_fdebug_info_for_profiling, OPT_fno_debug_info_for_profiling, false); + Opts.PseudoProbeForProfiling = + Args.hasFlag(OPT_fpseudo_probe_for_profiling, + OPT_fno_pseudo_probe_for_profiling, false); Opts.DebugNameTable = static_cast( Args.hasArg(OPT_ggnu_pubnames) ? llvm::DICompileUnit::DebugNameTableKind::GNU diff --git a/clang/test/CodeGen/emit-pseudo-probe.c b/clang/test/CodeGen/emit-pseudo-probe.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/emit-pseudo-probe.c @@ -0,0 +1,33 @@ +// RUN: %clang -O2 -fexperimental-new-pass-manager -fpseudo-probe-for-profiling -g -emit-llvm -S -o - %s | FileCheck %s --check-prefix=CHECK + +// Check the generation of pseudoprobe intrinsic call + +void bar(); +void go(); + +void foo(int x) { + // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1) + if (x == 0) + // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 2) + bar(); + else + // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 3) + go(); + // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4) +} + +void foo2(void (*f)()) { + // Check pseudo_probe metadata attached to the indirect call instruction. + // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1) + // CHECK: {{.*}} call {{.*}} !dbg ![[#PROBE0:]] + f(); +} + +// Check the generation of module - level metadata that contains function info necessary to +// synthesize probe - based sample counts.The format is; +// { FunctionName, FunctionGUID, FunctionHash } +// CHECK : !llvm.pseudo_probe_desc = !{![[#DESC0:]], ![[#DESC1:]]}; +// CHECK : ![[#DESC0]] = !{!"foo", i64[[#GUID]], i64[[#HASH:]]}; +// CHECK : ![[#DESC1]] = !{ !"foo2", i64[[#GUID2]], i64[[#HASH2:]] } +// CHECK : ![[#PROBE0]] = !DILocation(line: [[#]], column: [[#]], scope: ![[#SCOPE:]]) +// CHECK : ![[#SCOPE]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 16777218) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -516,6 +516,7 @@ case Intrinsic::annotation: case Intrinsic::assume: case Intrinsic::sideeffect: + case Intrinsic::pseudoprobe: case Intrinsic::dbg_declare: case Intrinsic::dbg_value: case Intrinsic::dbg_label: diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1398,6 +1398,7 @@ case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::sideeffect: + case Intrinsic::pseudoprobe: return 0; case Intrinsic::masked_store: { Type *Ty = Tys[0]; diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h b/llvm/include/llvm/CodeGen/CommandFlags.h --- a/llvm/include/llvm/CodeGen/CommandFlags.h +++ b/llvm/include/llvm/CodeGen/CommandFlags.h @@ -114,6 +114,8 @@ bool getEmitCallSiteInfo(); +bool getPseudoProbeForProfiling(); + bool getEnableDebugEntryValues(); bool getForceDwarfFrameSection(); diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1086,6 +1086,10 @@ /// known nonzero constant. The only operand here is the chain. GET_DYNAMIC_AREA_OFFSET, + /// Pseudo probe for AutoFDO, as a place holder in a basic block to improve + /// the sample counts quality. + PSEUDO_PROBE, + /// VSCALE(IMM) - Returns the runtime scaling factor used to calculate the /// number of elements within a scalable vector. IMM is a constant integer /// multiplier that is applied to the runtime value. diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -1241,6 +1241,7 @@ case TargetOpcode::DBG_LABEL: case TargetOpcode::LIFETIME_START: case TargetOpcode::LIFETIME_END: + case TargetOpcode::PSEUDO_PROBE: return true; } } diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -471,6 +471,9 @@ /// Create Hardware Loop pass. \see HardwareLoops.cpp FunctionPass *createHardwareLoopsPass(); + /// This pass inserts pseudo probe annotation for value profiling. + FunctionPass *createPseudoProbeInserter(); + /// Create IR Type Promotion pass. \see TypePromotion.cpp FunctionPass *createTypePromotionPass(); diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1169,6 +1169,11 @@ SDValue getLifetimeNode(bool IsStart, const SDLoc &dl, SDValue Chain, int FrameIndex, int64_t Size, int64_t Offset = -1); + /// Creates a PseudoProbeSDNode with function GUID `Guid` and + /// the index of the block `Index` it is probing. + SDValue getPseudoProbeNode(const SDLoc &dl, SDValue Chain, uint64_t Guid, + uint64_t Index); + /// Create a MERGE_VALUES node from the given operands. SDValue getMergeValues(ArrayRef Ops, const SDLoc &dl); diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -1784,6 +1784,31 @@ } }; +/// This SDNode is used for PSEUDO_PROBE values, which indicate +/// the function guid and the index of the basic block that is +/// being probed. A pseudo probe serves as a place holder and +/// will be removed at the end of compilation. It does not have +/// any operand because we do not want the instruction selection +/// to deal with any. +class PseudoProbeSDNode : public SDNode { + friend class SelectionDAG; + uint64_t Guid; + uint64_t Index; + + PseudoProbeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, + SDVTList VTs, uint64_t Guid, uint64_t Index) + : SDNode(Opcode, Order, dl, VTs), Guid(Guid), Index(Index) {} + +public: + int64_t getGuid() const { return Guid; } + uint64_t getIndex() const { return Index; } + + // Methods to support isa and dyn_cast + static bool classof(const SDNode *N) { + return N->getOpcode() == ISD::PSEUDO_PROBE; + } +}; + class JumpTableSDNode : public SDNode { friend class SelectionDAG; diff --git a/llvm/include/llvm/IR/BasicBlock.h b/llvm/include/llvm/IR/BasicBlock.h --- a/llvm/include/llvm/IR/BasicBlock.h +++ b/llvm/include/llvm/IR/BasicBlock.h @@ -180,6 +180,14 @@ static_cast(this)->getFirstNonPHIOrDbgOrLifetime()); } + /// Returns a pointer to the first instruction in this block that is not a + /// PHINode, a debug intrinsic, or a pseudo probe intrinsic. + const Instruction *getFirstNonPHIOrDbgOrPseudoProbe() const; + Instruction *getFirstNonPHIOrDbgOrPseudoProbe() { + return const_cast(static_cast(this) + ->getFirstNonPHIOrDbgOrPseudoProbe()); + } + /// Returns an iterator to the first instruction in this block that is /// suitable for inserting a non-PHI instruction. /// diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -937,6 +937,23 @@ } }; +class PseudoProbeInst : public IntrinsicInst { +public: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::pseudoprobe; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + + ConstantInt *getFuncGuid() const { + return cast(const_cast(getArgOperand(0))); + } + + ConstantInt *getIndex() const { + return cast(const_cast(getArgOperand(1))); + } +}; } // end namespace llvm #endif // LLVM_IR_INTRINSICINST_H diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1237,6 +1237,12 @@ // which specify that infinite loops must be preserved. def int_sideeffect : Intrinsic<[], [], [IntrInaccessibleMemOnly, IntrWillReturn]>; +// The pseudoprobe intrinsic works as a place holder to the block it probes. +// Like the sideeffect intrinsic defined above, this intrinsic is treated by the +// optimizer as having opaque side effects so that it won't be get rid of or moved +// out of the block it probes. +def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty], [IntrInaccessibleMemOnly, IntrWillReturn]>; + // Intrinsics to support half precision floating point format let IntrProperties = [IntrNoMem, IntrWillReturn] in { def int_convert_to_fp16 : Intrinsic<[llvm_i16_ty], [llvm_anyfloat_ty]>; diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -354,6 +354,7 @@ void initializeProfileSummaryInfoWrapperPassPass(PassRegistry&); void initializePromoteLegacyPassPass(PassRegistry&); void initializePruneEHPass(PassRegistry&); +void initializePseudoProbeInserterPass(PassRegistry &); void initializeRABasicPass(PassRegistry&); void initializeRAGreedyPass(PassRegistry&); void initializeReachingDefAnalysisPass(PassRegistry&); diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -36,11 +36,15 @@ enum CSPGOAction { NoCSAction, CSIRInstr, CSIRUse }; PGOOptions(std::string ProfileFile = "", std::string CSProfileGenFile = "", std::string ProfileRemappingFile = "", PGOAction Action = NoAction, - CSPGOAction CSAction = NoCSAction, bool SamplePGOSupport = false) + CSPGOAction CSAction = NoCSAction, + bool DebugInfoForProfiling = false, + bool PseudoProbeForProfiling = false) : ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile), ProfileRemappingFile(ProfileRemappingFile), Action(Action), - CSAction(CSAction), - SamplePGOSupport(SamplePGOSupport || Action == SampleUse) { + CSAction(CSAction), DebugInfoForProfiling(DebugInfoForProfiling || + (Action == SampleUse && + !PseudoProbeForProfiling)), + PseudoProbeForProfiling(PseudoProbeForProfiling) { // Note, we do allow ProfileFile.empty() for Action=IRUse LTO can // callback with IRUse action without ProfileFile. @@ -55,16 +59,22 @@ // a profile. assert(this->CSAction != CSIRUse || this->Action == IRUse); - // If neither Action nor CSAction, SamplePGOSupport needs to be true. + // If neither Action nor CSAction, DebugInfoForProfiling or + // PseudoProbeForProfiling needs to be true. assert(this->Action != NoAction || this->CSAction != NoCSAction || - this->SamplePGOSupport); + this->DebugInfoForProfiling || this->PseudoProbeForProfiling); + + // Pseudo probe instrumentation should only work with autoFDO or no FDO. + assert(!this->PseudoProbeForProfiling || this->Action == NoAction || + this->Action == SampleUse); } std::string ProfileFile; std::string CSProfileGenFile; std::string ProfileRemappingFile; PGOAction Action; CSPGOAction CSAction; - bool SamplePGOSupport; + bool DebugInfoForProfiling; + bool PseudoProbeForProfiling; }; /// Tunable parameters for passes in the default pipelines. diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -106,6 +106,9 @@ HANDLE_TARGET_OPCODE(LIFETIME_START) HANDLE_TARGET_OPCODE(LIFETIME_END) +/// Pseudo probe +HANDLE_TARGET_OPCODE(PSEUDO_PROBE) + /// A Stackmap instruction captures the location of live variables at its /// position in the instruction stream. It is followed by a shadow of bytes /// that must lie within the function and not contain another stackmap. diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td --- a/llvm/include/llvm/Target/Target.td +++ b/llvm/include/llvm/Target/Target.td @@ -1139,6 +1139,13 @@ let AsmString = "LIFETIME_END"; let hasSideEffects = 0; } +def PSEUDO_PROBE : StandardPseudoInstruction { + let OutOperandList = (outs); + let InOperandList = (ins i64imm:$guid, i64imm:$index, i64imm:$type); + let AsmString = "PSEUDO_PROBE"; + let hasSideEffects = 1; +} + def STACKMAP : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins i64imm:$id, i32imm:$nbytes, variable_ops); diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h @@ -0,0 +1,65 @@ +//===- Transforms/IPO/SampleProfileProbe.h ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file provides the interface for the pseudo probe implementation for +/// AutoFDO. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_SAMPLEPROFILEPROBER_H +#define LLVM_TRANSFORMS_IPO_SAMPLEPROFILEPROBER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Target/TargetMachine.h" +#include + +namespace llvm { + +class Module; + +using BlockIdMap = std::unordered_map; + +enum class PseudoProbeReservedId { Invalid = 0, Last = Invalid }; + +enum class PseudoProbeType { Block = 0 }; + +/// Sample profile pseudo prober. +/// +/// Insert pseudo probes for block sampling and value sampling. +class SampleProfileProber { +public: + // Give an empty module id when the prober is not used for instrumentation. + SampleProfileProber(Function &F); + void instrumentOneFunc(Function &F, TargetMachine *TM); + +private: + Function *getFunction() const { return F; } + uint32_t getBlockId(const BasicBlock *BB) const; + void computeProbeIdForBlocks(); + + Function *F; + + /// Map basic blocks to the their pseudo probe ids. + BlockIdMap BlockProbeIds; + + /// The ID of the last probe, Can be used to number a new probe. + uint32_t LastProbeId; +}; + +class SampleProfileProbePass : public PassInfoMixin { + TargetMachine *TM; + +public: + SampleProfileProbePass(TargetMachine *TM) : TM(TM) {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +} // end namespace llvm +#endif // LLVM_TRANSFORMS_IPO_SAMPLEPROFILEPROBER_H diff --git a/llvm/lib/Analysis/AliasSetTracker.cpp b/llvm/lib/Analysis/AliasSetTracker.cpp --- a/llvm/lib/Analysis/AliasSetTracker.cpp +++ b/llvm/lib/Analysis/AliasSetTracker.cpp @@ -444,6 +444,7 @@ // FIXME: Add lifetime/invariant intrinsics (See: PR30807). case Intrinsic::assume: case Intrinsic::sideeffect: + case Intrinsic::pseudoprobe: return; } } diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -601,6 +601,7 @@ // FIXME: This list is repeated from NoTTI::getIntrinsicCost. case Intrinsic::assume: case Intrinsic::sideeffect: + case Intrinsic::pseudoprobe: case Intrinsic::dbg_declare: case Intrinsic::dbg_value: case Intrinsic::dbg_label: diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -125,7 +125,7 @@ if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start || ID == Intrinsic::lifetime_end || ID == Intrinsic::assume || - ID == Intrinsic::sideeffect) + ID == Intrinsic::sideeffect || ID == Intrinsic::pseudoprobe) return ID; return Intrinsic::not_intrinsic; } diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp --- a/llvm/lib/CodeGen/Analysis.cpp +++ b/llvm/lib/CodeGen/Analysis.cpp @@ -537,6 +537,9 @@ // Debug info intrinsics do not get in the way of tail call optimization. if (isa(BBI)) continue; + // Pseudo probe intrinsics do not block tail call optimization either. + if (isa(BBI)) + continue; // A lifetime end or assume intrinsic should not stop tail call // optimization. if (const IntrinsicInst *II = dyn_cast(BBI)) diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2225,12 +2225,14 @@ // Skip over debug and the bitcast. do { ++BI; - } while (isa(BI) || &*BI == BCI || &*BI == EVI); + } while (isa(BI) || &*BI == BCI || &*BI == EVI || + isa(BI)); if (&*BI != RetI) return false; } else { BasicBlock::iterator BI = BB->begin(); - while (isa(BI)) ++BI; + while (isa(BI) || isa(BI)) + ++BI; if (&*BI != RetI) return false; } @@ -2260,7 +2262,10 @@ BasicBlock::InstListType &InstList = (*PI)->getInstList(); BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin(); BasicBlock::InstListType::reverse_iterator RE = InstList.rend(); - do { ++RI; } while (RI != RE && isa(&*RI)); + do { + ++RI; + } while (RI != RE && + (isa(&*RI) || isa(&*RI))); if (RI == RE) continue; diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -496,6 +496,7 @@ SDValue visitSTORE(SDNode *N); SDValue visitLIFETIME_END(SDNode *N); + SDValue visitPSEUDO_PROBE(SDNode *N); SDValue visitINSERT_VECTOR_ELT(SDNode *N); SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); SDValue visitBUILD_VECTOR(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -1082,6 +1082,18 @@ break; } + case ISD::PSEUDO_PROBE: { + unsigned TarOp = TargetOpcode::PSEUDO_PROBE; + auto Guid = cast(Node)->getGuid(); + auto Index = cast(Node)->getIndex(); + + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TarOp)) + .addImm(Guid) + .addImm(Index) + .addImm(0); // 0 for block probes + break; + } + case ISD::INLINEASM: case ISD::INLINEASM_BR: { unsigned NumOps = Node->getNumOperands(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -536,6 +536,10 @@ ID.AddInteger(cast(N)->getOffset()); } break; + case ISD::PSEUDO_PROBE: + ID.AddInteger(cast(N)->getGuid()); + ID.AddInteger(cast(N)->getIndex()); + break; case ISD::JumpTable: case ISD::TargetJumpTable: ID.AddInteger(cast(N)->getIndex()); @@ -6899,6 +6903,30 @@ return V; } +SDValue SelectionDAG::getPseudoProbeNode(const SDLoc &dl, SDValue Chain, + uint64_t Guid, uint64_t Index) { + + const unsigned Opcode = ISD::PSEUDO_PROBE; + const auto VTs = getVTList(MVT::Other); + SDValue Ops[] = {Chain}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops); + ID.AddInteger(Guid); + ID.AddInteger(Index); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) + return SDValue(E, 0); + + auto *N = newSDNode(Opcode, dl.getIROrder(), + dl.getDebugLoc(), VTs, Guid, Index); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + /// InferPointerInfo - If the specified ptr/offset is a frame index, infer a /// MachinePointerInfo record from it. This is particularly useful because the /// code generator has many cases where it doesn't bother passing in a diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6663,6 +6663,13 @@ } return; } + case Intrinsic::pseudoprobe: { + auto Guid = cast(I.getArgOperand(0))->getZExtValue(); + auto Index = cast(I.getArgOperand(1))->getZExtValue(); + Res = DAG.getPseudoProbeNode(sdl, getRoot(), Guid, Index); + DAG.setRoot(Res); + return; + } case Intrinsic::invariant_start: // Discard region information. setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout()))); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -394,6 +394,8 @@ case ISD::DEBUGTRAP: return "debugtrap"; case ISD::LIFETIME_START: return "lifetime.start"; case ISD::LIFETIME_END: return "lifetime.end"; + case ISD::PSEUDO_PROBE: + return "pseudoprobe"; case ISD::GC_TRANSITION_START: return "gc_transition.start"; case ISD::GC_TRANSITION_END: return "gc_transition.end"; case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset"; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -2786,6 +2786,7 @@ case ISD::ANNOTATION_LABEL: case ISD::LIFETIME_START: case ISD::LIFETIME_END: + case ISD::PSEUDO_PROBE: NodeToMatch->setNodeId(-1); // Mark selected. return; case ISD::AssertSext: diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp --- a/llvm/lib/IR/BasicBlock.cpp +++ b/llvm/lib/IR/BasicBlock.cpp @@ -238,6 +238,15 @@ return nullptr; } +const Instruction *BasicBlock::getFirstNonPHIOrDbgOrPseudoProbe() const { + for (const Instruction &I : *this) { + if (isa(I) || isa(I) || isa(I)) + continue; + return &I; + } + return nullptr; +} + BasicBlock::const_iterator BasicBlock::getFirstInsertionPt() const { const Instruction *FirstNonPHI = getFirstNonPHI(); if (!FirstNonPHI) diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -97,6 +97,7 @@ #include "llvm/Transforms/IPO/PartialInlining.h" #include "llvm/Transforms/IPO/SCCP.h" #include "llvm/Transforms/IPO/SampleProfile.h" +#include "llvm/Transforms/IPO/SampleProfileProbe.h" #include "llvm/Transforms/IPO/StripDeadPrototypes.h" #include "llvm/Transforms/IPO/SyntheticCountsPropagation.h" #include "llvm/Transforms/IPO/WholeProgramDevirt.h" @@ -900,6 +901,12 @@ OptimizationLevel Level, ThinLTOPhase Phase, bool DebugLogging) { ModulePassManager MPM(DebugLogging); + // Place pseudo probe instrumentation as the first pass of the pipeline to + // minimize the impact of optimization changes. + if (PGOOpt && PGOOpt->PseudoProbeForProfiling && + Phase != ThinLTOPhase::PostLink) + MPM.addPass(SampleProfileProbePass(TM)); + bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse); // In ThinLTO mode, when flattened profile is used, all the available @@ -1253,7 +1260,7 @@ for (auto &C : PipelineStartEPCallbacks) C(MPM); - if (PGOOpt && PGOOpt->SamplePGOSupport) + if (PGOOpt && PGOOpt->DebugInfoForProfiling) MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); // Add the core simplification pipeline. @@ -1277,7 +1284,7 @@ // Force any function attributes we want the rest of the pipeline to observe. MPM.addPass(ForceFunctionAttrsPass()); - if (PGOOpt && PGOOpt->SamplePGOSupport) + if (PGOOpt && PGOOpt->DebugInfoForProfiling) MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); // Apply module pipeline start EP callback. diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -85,6 +85,7 @@ MODULE_PASS("sample-profile", SampleProfileLoaderPass()) MODULE_PASS("scc-oz-module-inliner", buildInlinerPipeline(OptimizationLevel::Oz, ThinLTOPhase::None, DebugLogging)) +MODULE_PASS("pseudo-probe", SampleProfileProbePass(TM)) MODULE_PASS("strip-dead-prototypes", StripDeadPrototypesPass()) MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation()) MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass(nullptr, nullptr)) diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt --- a/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -31,6 +31,7 @@ PassManagerBuilder.cpp PruneEH.cpp SampleProfile.cpp + SampleProfileProbe.cpp SCCP.cpp StripDeadPrototypes.cpp StripSymbols.cpp diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp @@ -0,0 +1,103 @@ +//===- SampleProfileProbe.cpp - Pseudo probe Instrumentation -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the SampleProfileProber transformation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/SampleProfileProbe.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/ProfileData/SampleProf.h" +#include "llvm/Support/CRC.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" +#include + +using namespace llvm; +#define DEBUG_TYPE "sample-profile-probe" + +SampleProfileProber::SampleProfileProber(Function &Func) : F(&Func) { + BlockProbeIds.clear(); + LastProbeId = (uint32_t)PseudoProbeReservedId::Last; + computeProbeIdForBlocks(); +} + +void SampleProfileProber::computeProbeIdForBlocks() { + for (auto &BB : *F) { + BlockProbeIds[&BB] = ++LastProbeId; + } +} + +uint32_t SampleProfileProber::getBlockId(const BasicBlock *BB) const { + auto iter = BlockProbeIds.find(const_cast(BB)); + return iter == BlockProbeIds.end() ? 0 : iter->second; +} + +void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) { + Module *M = F.getParent(); + MDBuilder MDB(F.getContext()); + // Compute a GUID without considering the function's linkage type. This is + // fine for now since function name is the only key in the profile database. + // Same-named global function and static functions will have their profiles + // merged into a single one eventually anyway. This is also a workaround for + // GUID computation of callsite probes in LTO mode (see + // PseudoProbeInserter.cpp), where module name is not available and + // considering linkage type will result in an invalid GUID. + uint64_t Guid = Function::getGUID(F.getName()); + + // Probe basic blocks. + for (auto &I : BlockProbeIds) { + auto BB = I.first; + auto Index = I.second; + // Insert a probe before an instruction with a valid debug line number which + // will be assigned to the probe. The line number will be used later to + // model the inline context when the probe is inlined into other functions. + // Debug instructions, phi nodes and lifetime markers do not have an valid + // line number. Real instructions generated by optimizations may not come + // with a line number either. + auto HasValidDbgLine = [](Instruction *J) { + return !isa(J) && !isa(J) && + !J->isLifetimeStartOrEnd() && J->getDebugLoc(); + }; + + auto J = &*BB->getFirstInsertionPt(); + while (J != BB->getTerminator() && !HasValidDbgLine(J)) { + J = J->getNextNode(); + } + + IRBuilder<> Builder(J); + assert(Builder.GetInsertPoint() != BB->end() && + "Cannot get the probing point"); + Function *ProbeFn = + llvm::Intrinsic::getDeclaration(M, Intrinsic::pseudoprobe); + Value *args[] = {Builder.getInt64(Guid), Builder.getInt64(Index)}; + Builder.CreateCall(ProbeFn, args); + } +} + +PreservedAnalyses SampleProfileProbePass::run(Module &M, + ModuleAnalysisManager &AM) { + for (auto &F : M) { + if (F.isDeclaration()) + continue; + SampleProfileProber ProbeManager(F); + ProbeManager.instrumentOneFunc(F, TM); + } + + return PreservedAnalyses::none(); +} diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp --- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -237,7 +237,10 @@ Escaped = ESCAPED; CallInst *CI = dyn_cast(&I); - if (!CI || CI->isTailCall() || isa(&I)) + // A PseudoProbeInst does access memory and will be marked as a tail call + // if we don't bail out here. + if (!CI || CI->isTailCall() || isa(&I) || + isa(&I)) continue; bool IsNoTail = CI->isNoTailCall() || CI->hasOperandBundles(); @@ -679,7 +682,7 @@ bool Change = false; // Make sure this block is a trivial return block. - assert(BB->getFirstNonPHIOrDbg() == Ret && + assert(BB->getFirstNonPHIOrDbgOrPseudoProbe() == Ret && "Trying to fold non-trivial return block"); // If the return block contains nothing but the return and PHI's, @@ -837,7 +840,7 @@ BasicBlock *BB = &*BBI++; // foldReturnAndProcessPred may delete BB. if (ReturnInst *Ret = dyn_cast(BB->getTerminator())) { bool Change = TRE.processReturningBlock(Ret, !CanTRETailMarkedCall); - if (!Change && BB->getFirstNonPHIOrDbg() == Ret) + if (!Change && BB->getFirstNonPHIOrDbgOrPseudoProbe() == Ret) Change = TRE.foldReturnAndProcessPred(Ret, !CanTRETailMarkedCall); MadeChange |= Change; } diff --git a/llvm/lib/Transforms/Utils/Evaluator.cpp b/llvm/lib/Transforms/Utils/Evaluator.cpp --- a/llvm/lib/Transforms/Utils/Evaluator.cpp +++ b/llvm/lib/Transforms/Utils/Evaluator.cpp @@ -551,6 +551,10 @@ LLVM_DEBUG(dbgs() << "Skipping sideeffect intrinsic.\n"); ++CurInst; continue; + } else if (II->getIntrinsicID() == Intrinsic::pseudoprobe) { + LLVM_DEBUG(dbgs() << "Skipping pseudoprobe intrinsic.\n"); + ++CurInst; + continue; } LLVM_DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n"); diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1980,7 +1980,10 @@ --MaxNumInstToLookAt; // Could be calling an instruction that affects memory like free(). - if (CurI.mayHaveSideEffects() && !isa(CurI)) + // Skip pseudo probe intrinsic calls which are not really killing any memory + // accesses. + if (CurI.mayHaveSideEffects() && !isa(CurI) && + !isa(CurI)) return nullptr; if (auto *SI = dyn_cast(&CurI)) { @@ -2073,6 +2076,14 @@ continue; } + // Skip pseudo probes. The consequence is we lose track of the branch + // probability for ThenBB, which is fine since the optimization here takes + // place regardless of the branch probability. + if (isa(I)) { + SpeculatedDbgIntrinsics.push_back(I); + continue; + } + // Only speculatively execute a single instruction (not counting the // terminator) for now. ++SpeculatedInstructions; @@ -2464,7 +2475,8 @@ } else { DomBlock = *pred_begin(IfBlock1); for (BasicBlock::iterator I = IfBlock1->begin(); !I->isTerminator(); ++I) - if (!AggressiveInsts.count(&*I) && !isa(I)) { + if (!AggressiveInsts.count(&*I) && !isa(I) && + !isa(I)) { // This is not an aggressive instruction that we can promote. // Because of this, we won't be able to get rid of the control flow, so // the xform is not worth it. @@ -2477,7 +2489,8 @@ } else { DomBlock = *pred_begin(IfBlock2); for (BasicBlock::iterator I = IfBlock2->begin(); !I->isTerminator(); ++I) - if (!AggressiveInsts.count(&*I) && !isa(I)) { + if (!AggressiveInsts.count(&*I) && !isa(I) && + !isa(I)) { // This is not an aggressive instruction that we can promote. // Because of this, we won't be able to get rid of the control flow, so // the xform is not worth it. diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -666,6 +666,10 @@ cast(&I)->getIntrinsicID() == Intrinsic::sideeffect) { // Ignore llvm.sideeffect calls. + } else if (isa(&I) && + cast(&I)->getIntrinsicID() == + Intrinsic::pseudoprobe) { + // Ignore llvm.pseudoprobe calls. } else if (IsLoadChain && (I.mayWriteToMemory() || I.mayThrow())) { LLVM_DEBUG(dbgs() << "LSV: Found may-write/throw operation: " << I << '\n'); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7167,7 +7167,8 @@ Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); if (ID && (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end || - ID == Intrinsic::lifetime_start || ID == Intrinsic::sideeffect)) + ID == Intrinsic::lifetime_start || ID == Intrinsic::sideeffect || + ID == Intrinsic::pseudoprobe)) return nullptr; auto willWiden = [&](unsigned VF) -> bool { diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5122,7 +5122,9 @@ if (I->mayReadOrWriteMemory() && (!isa(I) || - cast(I)->getIntrinsicID() != Intrinsic::sideeffect)) { + (cast(I)->getIntrinsicID() != Intrinsic::sideeffect && + cast(I)->getIntrinsicID() != + Intrinsic::pseudoprobe))) { // Update the linked list of memory accessing instructions. if (CurrentLoadStore) { CurrentLoadStore->NextLoadStore = SD; diff --git a/llvm/test/Transforms/SampleProfile/emit-pseudo-probe.ll b/llvm/test/Transforms/SampleProfile/emit-pseudo-probe.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/emit-pseudo-probe.ll @@ -0,0 +1,29 @@ +; RUN: opt < %s -passes=pseudo-probe -function-sections -S -o %t +; RUN: FileCheck %s < %t --check-prefix=CHECK-IL +; RUN: llc %t -stop-after=instruction-select -o - | FileCheck %s --check-prefix=CHECK-MIR +; +;; Check the generation of pseudoprobe intrinsic call. + +define void @foo(i32 %x) { +bb0: + %cmp = icmp eq i32 %x, 0 +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1) +; CHECK-MIR: PSEUDO_PROBE [[#GUID:]], 1, 0 + br i1 %cmp, label %bb1, label %bb2 + +bb1: +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 2) +; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 3, 0 +; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 4, 0 + br label %bb3 + +bb2: +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 3) +; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 2, 0 +; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 4, 0 + br label %bb3 + +bb3: +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4) + ret void +}