diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -25,6 +25,7 @@ #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/PseudoProbe.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ArrayRecycler.h" @@ -1159,7 +1160,7 @@ bool isPseudoProbe() const { return getOpcode() == TargetOpcode::PSEUDO_PROBE; } - + // True if the instruction represents a position in the function. bool isPosition() const { return isLabel() || isCFIInstruction(); } @@ -1800,6 +1801,17 @@ } } + PseudoProbeAttributes getPseudoProbeAttribute() const { + assert(isPseudoProbe() && "Must be a pseudo probe instruction"); + return (PseudoProbeAttributes)getOperand(3).getImm(); + } + + void addPseudoProbeAttribute(PseudoProbeAttributes Attr) { + assert(isPseudoProbe() && "Must be a pseudo probe instruction"); + MachineOperand &AttrOperand = getOperand(3); + AttrOperand.setImm(AttrOperand.getImm() | (uint32_t)Attr); + } + private: /// If this instruction is embedded into a MachineFunction, return the /// MachineRegisterInfo object for the current function, otherwise diff --git a/llvm/include/llvm/IR/PseudoProbe.h b/llvm/include/llvm/IR/PseudoProbe.h --- a/llvm/include/llvm/IR/PseudoProbe.h +++ b/llvm/include/llvm/IR/PseudoProbe.h @@ -27,6 +27,11 @@ enum class PseudoProbeType { Block = 0, IndirectCall, DirectCall }; +enum class PseudoProbeAttributes { + Reserved = 0x1, // Reserved for future use. + Dangling = 0x2, // The probe is dangling. +}; + // The saturated distrution factor representing 100% for block probes. constexpr static uint64_t PseudoProbeFullDistributionFactor = std::numeric_limits::max(); diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h --- a/llvm/include/llvm/MC/MCPseudoProbe.h +++ b/llvm/include/llvm/MC/MCPseudoProbe.h @@ -27,7 +27,7 @@ // TYPE (uint4) // 0 - block probe, 1 - indirect call, 2 - direct call // ATTRIBUTE (uint3) -// reserved +// 1 - reserved, 2 - dangling // ADDRESS_TYPE (uint1) // 0 - code address, 1 - address delta // CODE_ADDRESS (uint64 or ULEB128) diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -359,14 +359,7 @@ /// Merge the samples in \p Other into this record. /// Optionally scale sample counts by \p Weight. - sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1) { - sampleprof_error Result = addSamples(Other.getSamples(), Weight); - for (const auto &I : Other.getCallTargets()) { - MergeResult(Result, addCalledTarget(I.first(), I.second, Weight)); - } - return Result; - } - + sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1); void print(raw_ostream &OS, unsigned Indent) const; void dump() const; @@ -569,16 +562,15 @@ // For CSSPGO, in order to conserve profile size, we no longer write out // locations profile for those not hit during training, so we need to // treat them as zero instead of error here. - if (ProfileIsCS) - return 0; - return std::error_code(); - // A missing counter for a probe likely means the probe was not executed. - // Treat it as a zero count instead of an unknown count to help edge - // weight inference. - if (FunctionSamples::ProfileIsProbeBased) + if (FunctionSamples::ProfileIsCS || FunctionSamples::ProfileIsProbeBased) return 0; return std::error_code(); } else { + // Return error for an invalid sample count which is usually assigned to + // dangling probe. + if (FunctionSamples::ProfileIsProbeBased && + ret->second.getSamples() == FunctionSamples::InvalidProbeCount) + return std::error_code(); return ret->second.getSamples(); } } @@ -845,6 +837,10 @@ const DILocation *DIL, SampleProfileReaderItaniumRemapper *Remapper = nullptr) const; + // The invalid sample count is used to represent samples collected for a + // dangling probe. + static constexpr uint64_t InvalidProbeCount = UINT64_MAX; + static bool ProfileIsProbeBased; static bool ProfileIsCS; diff --git a/llvm/lib/CodeGen/PseudoProbeInserter.cpp b/llvm/lib/CodeGen/PseudoProbeInserter.cpp --- a/llvm/lib/CodeGen/PseudoProbeInserter.cpp +++ b/llvm/lib/CodeGen/PseudoProbeInserter.cpp @@ -20,6 +20,7 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/PseudoProbe.h" #include "llvm/InitializePasses.h" +#include "llvm/MC/MCPseudoProbe.h" #include "llvm/Target/TargetMachine.h" #include @@ -47,7 +48,10 @@ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); bool Changed = false; for (MachineBasicBlock &MBB : MF) { + MachineInstr *FirstInstr = nullptr; for (MachineInstr &MI : MBB) { + if (!MI.isPseudo()) + FirstInstr = &MI; if (MI.isCall()) { if (DILocation *DL = MI.getDebugLoc()) { auto Value = DL->getDiscriminator(); @@ -65,6 +69,47 @@ } } } + + // Walk the block backwards, move PSEUDO_PROBE before the first real + // instruction to fix out-of-order probes. There is a problem with probes + // as the terminator of the block. During the offline counts processing, + // the samples collected on the first physical instruction following a + // probe will be counted towards the probe. This logically equals to + // treating the instruction next to a probe as if it is from the same + // block of the probe. This is accurate most of the time unless the + // instruction can be reached from multiple flows, which means it actually + // starts a new block. Samples collected on such probes may cause + // imprecision with the counts inference algorithm. Fortunately, if + // there are still other native instructions preceding the probe we can + // use them as a place holder to collect samples for the probe. + if (FirstInstr) { + auto MII = MBB.rbegin(); + while (MII != MBB.rend()) { + // Skip all pseudo probes followed by a real instruction since they + // are not dangling. + if (!MII->isPseudo()) + break; + auto Cur = MII++; + if (Cur->getOpcode() != TargetOpcode::PSEUDO_PROBE) + continue; + // Move the dangling probe before FirstInstr. + auto *ProbeInstr = &*Cur; + MBB.remove(ProbeInstr); + MBB.insert(FirstInstr, ProbeInstr); + Changed = true; + } + } else { + // Probes not surrounded by any real instructions in the same block are + // called dangling probes. Since there's no good way to pick up a sample + // collection point for dangling probes at compile time, they are being + // tagged so that the profile correlation tool will not report any + // samples collected for them and it's up to the counts inference tool + // to get them a reasonable count. + for (MachineInstr &MI : MBB) { + if (MI.isPseudoProbe()) + MI.addPseudoProbeAttribute(PseudoProbeAttributes::Dangling); + } + } } return Changed; diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp --- a/llvm/lib/ProfileData/SampleProf.cpp +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -112,6 +112,27 @@ return OS; } +/// Merge the samples in \p Other into this record. +/// Optionally scale sample counts by \p Weight. +sampleprof_error SampleRecord::merge(const SampleRecord &Other, + uint64_t Weight) { + sampleprof_error Result; + // With pseudo probes, merge a dangling sample with a non-dangling sample + // should result in a dangling sample. + if (FunctionSamples::ProfileIsProbeBased && + (getSamples() == FunctionSamples::InvalidProbeCount || + Other.getSamples() == FunctionSamples::InvalidProbeCount)) { + NumSamples = FunctionSamples::InvalidProbeCount; + Result = sampleprof_error::success; + } else { + Result = addSamples(Other.getSamples(), Weight); + } + for (const auto &I : Other.getCallTargets()) { + MergeResult(Result, addCalledTarget(I.first(), I.second, Weight)); + } + return Result; +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void LineLocation::dump() const { print(dbgs()); } #endif diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-dangling.mir b/llvm/test/Transforms/SampleProfile/pseudo-probe-dangling.mir new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-dangling.mir @@ -0,0 +1,27 @@ + +# REQUIRES: x86-registered-target +# Ensure llc can read and parse MIR pseudo probe operations. +# RUN: llc -mtriple x86_64-- -run-pass=pseudo-probe-inserter %s -o - | FileCheck %s + +# CHECK: PSEUDO_PROBE 6699318081062747564, 1, 0, 0 +# check probe 2 is moved before the test instruction. +# CHECK: PSEUDO_PROBE 6699318081062747564, 2, 0, 0 +# CHECK: TEST32rr +# check probe 3 is dangling. +# CHECK: PSEUDO_PROBE 6699318081062747564, 3, 0, 2 + +name: foo +body: | + bb.0: + TEST32rr killed renamable $edi, renamable $edi, implicit-def $eflags + PSEUDO_PROBE 6699318081062747564, 1, 0, 0 + JCC_1 %bb.1, 4, implicit $eflags + + bb.2: + TEST32rr killed renamable $edi, renamable $edi, implicit-def $eflags + PSEUDO_PROBE 6699318081062747564, 2, 0, 0 + + bb.1: + PSEUDO_PROBE 6699318081062747564, 3, 0, 0 + +... diff --git a/llvm/test/tools/llvm-profdata/Inputs/pseudo-probe-profile.proftext b/llvm/test/tools/llvm-profdata/Inputs/pseudo-probe-profile.proftext --- a/llvm/test/tools/llvm-profdata/Inputs/pseudo-probe-profile.proftext +++ b/llvm/test/tools/llvm-profdata/Inputs/pseudo-probe-profile.proftext @@ -1,7 +1,7 @@ foo:3200:13 1: 13 2: 7 - 3: 6 + 3: 18446744073709551615 4: 13 5: 7 _Z3foov:5 _Z3barv:2 6: 6 _Z3barv:4 _Z3foov:2 diff --git a/llvm/test/tools/llvm-profdata/merge-probe-profile.test b/llvm/test/tools/llvm-profdata/merge-probe-profile.test --- a/llvm/test/tools/llvm-profdata/merge-probe-profile.test +++ b/llvm/test/tools/llvm-profdata/merge-probe-profile.test @@ -1,11 +1,12 @@ # Tests for merge of probe-based profile files. +# Check the dangling probe 3 ends up with 18446744073709551615 (INT64_MAX), i.e, not aggregated. RUN: llvm-profdata merge --sample --text %p/Inputs/pseudo-probe-profile.proftext -o - | FileCheck %s --check-prefix=MERGE1 RUN: llvm-profdata merge --sample --extbinary %p/Inputs/pseudo-probe-profile.proftext -o %t && llvm-profdata merge --sample --text %t -o - | FileCheck %s --check-prefix=MERGE1 MERGE1: foo:3200:13 MERGE1: 1: 13 MERGE1: 2: 7 -MERGE1: 3: 6 +MERGE1: 3: 18446744073709551615 MERGE1: 4: 13 MERGE1: 5: 7 _Z3foov:5 _Z3barv:2 MERGE1: 6: 6 _Z3barv:4 _Z3foov:2 @@ -16,7 +17,7 @@ MERGE2: foo:6400:26 MERGE2: 1: 26 MERGE2: 2: 14 -MERGE2: 3: 12 +MERGE2: 3: 18446744073709551615 MERGE2: 4: 26 MERGE2: 5: 14 _Z3foov:10 _Z3barv:4 MERGE2: 6: 12 _Z3barv:8 _Z3foov:4