diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc --- a/compiler-rt/include/profile/InstrProfData.inc +++ b/compiler-rt/include/profile/InstrProfData.inc @@ -650,7 +650,7 @@ /* Raw profile format version (start from 1). */ #define INSTR_PROF_RAW_VERSION 8 /* Indexed profile format version (start from 1). */ -#define INSTR_PROF_INDEX_VERSION 9 +#define INSTR_PROF_INDEX_VERSION 10 /* Coverage mapping format version (start from 0). */ #define INSTR_PROF_COVMAP_VERSION 5 @@ -663,6 +663,7 @@ * The 60th bit indicates single byte coverage instrumentation. * The 61st bit indicates function entry instrumentation only. * The 62nd bit indicates whether memory profile information is present. + * The 63rd bit indicates if this is a temporal profile. */ #define VARIANT_MASKS_ALL 0xff00000000000000ULL #define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL) @@ -673,9 +674,11 @@ #define VARIANT_MASK_BYTE_COVERAGE (0x1ULL << 60) #define VARIANT_MASK_FUNCTION_ENTRY_ONLY (0x1ULL << 61) #define VARIANT_MASK_MEMPROF (0x1ULL << 62) +#define VARIANT_MASK_TEMPORAL_PROF (0x1ULL << 63) #define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version #define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime #define INSTR_PROF_PROFILE_COUNTER_BIAS_VAR __llvm_profile_counter_bias +#define INSTR_PROF_PROFILE_SET_TIMESTAMP __llvm_profile_set_timestamp /* The variable that holds the name of the profile data * specified via command line. */ diff --git a/compiler-rt/lib/profile/InstrProfiling.c b/compiler-rt/lib/profile/InstrProfiling.c --- a/compiler-rt/lib/profile/InstrProfiling.c +++ b/compiler-rt/lib/profile/InstrProfiling.c @@ -20,6 +20,14 @@ #define INSTR_PROF_VALUE_PROF_DATA #include "profile/InstrProfData.inc" +uint32_t __llvm_profile_global_timestamp = 1; + +COMPILER_RT_VISIBILITY +void INSTR_PROF_PROFILE_SET_TIMESTAMP(uint64_t *Probe) { + if (*Probe == 0 || *Probe == (uint64_t)-1) + *Probe = __llvm_profile_global_timestamp++; +} + COMPILER_RT_VISIBILITY uint64_t __llvm_profile_get_magic(void) { return sizeof(void *) == sizeof(uint64_t) ? (INSTR_PROF_RAW_MAGIC_64) : (INSTR_PROF_RAW_MAGIC_32); @@ -42,6 +50,9 @@ } COMPILER_RT_VISIBILITY void __llvm_profile_reset_counters(void) { + if (__llvm_profile_get_version() & VARIANT_MASK_TEMPORAL_PROF) + __llvm_profile_global_timestamp = 1; + char *I = __llvm_profile_begin_counters(); char *E = __llvm_profile_end_counters(); diff --git a/compiler-rt/test/profile/instrprof-timestamp.c b/compiler-rt/test/profile/instrprof-timestamp.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/profile/instrprof-timestamp.c @@ -0,0 +1,47 @@ +// RUN: rm -f %t.profdata +// RUN: %clang_pgogen -o %t -mllvm -pgo-temporal-instrumentation %s +// RUN: env LLVM_PROFILE_FILE=%t.0.profraw %run %t n +// RUN: env LLVM_PROFILE_FILE=%t.1.profraw %run %t y +// RUN: llvm-profdata merge -o %t.profdata %t.0.profraw %t.1.profraw +// RUN: llvm-profdata show --function-traces %t.profdata | FileCheck %s --implicit-check-not=unused + +// RUN: rm -f %t.profdata +// RUN: %clang_pgogen -o %t -mllvm -pgo-temporal-instrumentation -mllvm -pgo-block-coverage %s +// RUN: env LLVM_PROFILE_FILE=%t.0.profraw %run %t n +// RUN: env LLVM_PROFILE_FILE=%t.1.profraw %run %t y +// RUN: llvm-profdata merge -o %t.profdata %t.0.profraw %t.1.profraw +// RUN: llvm-profdata show --function-traces %t.profdata | FileCheck %s --implicit-check-not=unused + +extern void exit(int); +extern void __llvm_profile_reset_counters(); + +void a() {} +void b() {} +void unused() { exit(1); } +void c() {} + +int main(int argc, const char *argv[]) { + if (argc != 2) + unused(); + a(); + b(); + b(); + c(); + if (*argv[1] == 'y') + __llvm_profile_reset_counters(); + a(); + c(); + b(); + return 0; +} + +// CHECK: Function Traces (samples=2 seen=2): +// CHECK: Trace 0 (count=4): +// CHECK: main +// CHECK: a +// CHECK: b +// CHECK: c +// CHECK: Trace 1 (count=3): +// CHECK: a +// CHECK: c +// CHECK: b diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -13648,6 +13648,33 @@ """""""""" See description of '``llvm.instrprof.increment``' intrinsic. +'``llvm.instrprof.timestamp``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.instrprof.timestamp(i8* , i64 , + i32 , i32 ) + +Overview: +""""""""" + +The '``llvm.instrprof.timestamp``' intrinsic is used to implement function +timestamp instrumentation. + +Arguments: +"""""""""" +The arguments are the same as '``llvm.instrprof.increment``'. The ``index`` is +expected to always be zero. + +Semantics: +"""""""""" +Similar to the '``llvm.instrprof.increment``' intrinsic, but it stores a +timestamp representing when this function was executed for the first time. + '``llvm.instrprof.cover``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -1410,6 +1410,17 @@ } }; +/// This represents the llvm.instrprof.timestamp intrinsic. +class InstrProfTimestampInst : public InstrProfInstBase { +public: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::instrprof_timestamp; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + /// This represents the llvm.instrprof.value.profile intrinsic. class InstrProfValueProfileInst : public InstrProfInstBase { public: diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -625,6 +625,10 @@ [llvm_ptr_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty]>; +// A timestamp for instrumentation based profiling. +def int_instrprof_timestamp : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty, + llvm_i32_ty, llvm_i32_ty]>; + // A call to profile runtime for value profiling of target expressions // through instrumentation based profiling. def int_instrprof_value_profile : Intrinsic<[], diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -300,7 +300,9 @@ FunctionEntryOnly = 0x20, // A memory profile collected using -fprofile=memory. MemProf = 0x40, - LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/MemProf) + // A temporal profile. + TemporalProfile = 0x80, + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/TemporalProfile) }; const std::error_category &instrprof_category(); @@ -331,6 +333,10 @@ zlib_unavailable }; +/// An ordered list of functions identified by their NameRef found in +/// INSTR_PROF_DATA +using InstrProfTraceTy = std::vector; + inline std::error_code make_error_code(instrprof_error E) { return std::error_code(static_cast(E), instrprof_category()); } @@ -1052,7 +1058,9 @@ Version8 = 8, // Binary ids are added. Version9 = 9, - // The current version is 9. + // An additional (optional) function traces section is added. + Version10 = 10, + // The current version is 10. CurrentVersion = INSTR_PROF_INDEX_VERSION }; const uint64_t Version = ProfVersion::CurrentVersion; @@ -1071,6 +1079,7 @@ uint64_t HashOffset; uint64_t MemProfOffset; uint64_t BinaryIdOffset; + uint64_t FunctionTracesOffset; // New fields should only be added at the end to ensure that the size // computation is correct. The methods below need to be updated to ensure that // the new field is read correctly. diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc --- a/llvm/include/llvm/ProfileData/InstrProfData.inc +++ b/llvm/include/llvm/ProfileData/InstrProfData.inc @@ -650,7 +650,7 @@ /* Raw profile format version (start from 1). */ #define INSTR_PROF_RAW_VERSION 8 /* Indexed profile format version (start from 1). */ -#define INSTR_PROF_INDEX_VERSION 9 +#define INSTR_PROF_INDEX_VERSION 10 /* Coverage mapping format version (start from 0). */ #define INSTR_PROF_COVMAP_VERSION 5 @@ -663,6 +663,7 @@ * The 60th bit indicates single byte coverage instrumentation. * The 61st bit indicates function entry instrumentation only. * The 62nd bit indicates whether memory profile information is present. + * The 63rd bit indicates if this is a temporal profile. */ #define VARIANT_MASKS_ALL 0xff00000000000000ULL #define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL) @@ -673,9 +674,11 @@ #define VARIANT_MASK_BYTE_COVERAGE (0x1ULL << 60) #define VARIANT_MASK_FUNCTION_ENTRY_ONLY (0x1ULL << 61) #define VARIANT_MASK_MEMPROF (0x1ULL << 62) +#define VARIANT_MASK_TEMPORAL_PROF (0x1ULL << 63) #define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version #define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime #define INSTR_PROF_PROFILE_COUNTER_BIAS_VAR __llvm_profile_counter_bias +#define INSTR_PROF_PROFILE_SET_TIMESTAMP __llvm_profile_set_timestamp /* The variable that holds the name of the profile data * specified via command line. */ diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -135,6 +135,9 @@ /// Return true if profile includes a memory profile. virtual bool hasMemoryProfile() const = 0; + /// Return true if this is a temporal profile. + virtual bool hasTemporalProfile() const = 0; + /// Returns a BitsetEnum describing the attributes of the profile. To check /// individual attributes prefer using the helpers above. virtual InstrProfKind getProfileKind() const = 0; @@ -156,6 +159,10 @@ protected: std::unique_ptr Symtab; + /// A list of function traces. + SmallVector Traces; + /// The total number of function traces seen. + uint64_t TraceStreamSize = 0; /// Set the current error and return same. Error error(instrprof_error Err, const std::string &ErrMsg = "") { @@ -200,6 +207,13 @@ static Expected> create(std::unique_ptr Buffer, const InstrProfCorrelator *Correlator = nullptr); + + /// Returns a list of function traces. + virtual const SmallVector &getFunctionTraces() { + return Traces; + } + /// Returns the total number of function traces seen. + uint64_t getTraceStreamSize() { return TraceStreamSize; } }; /// Reader for the simple text based instrprof format. @@ -221,6 +235,8 @@ Error readValueProfileData(InstrProfRecord &Record); + Error readTraceData(); + public: TextInstrProfReader(std::unique_ptr DataBuffer_) : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {} @@ -259,6 +275,10 @@ return false; } + bool hasTemporalProfile() const override { + return static_cast(ProfileKind & InstrProfKind::TemporalProfile); + } + InstrProfKind getProfileKind() const override { return ProfileKind; } /// Read the header. @@ -288,6 +308,8 @@ /// If available, this hold the ProfileData array used to correlate raw /// instrumentation data to their functions. const InstrProfCorrelatorImpl *Correlator; + /// A list of timestamps paired with a function name reference. + std::vector> FunctionTimestamps; bool ShouldSwapBytes; // The value of the version field of the raw profile data header. The lower 56 // bits specifies the format version and the most significant 8 bits specify @@ -359,6 +381,10 @@ return false; } + bool hasTemporalProfile() const override { + return (Version & VARIANT_MASK_TEMPORAL_PROF) != 0; + } + /// Returns a BitsetEnum describing the attributes of the raw instr profile. InstrProfKind getProfileKind() const override; @@ -367,6 +393,8 @@ return *Symtab.get(); } + const SmallVector &getFunctionTraces() override; + private: Error createSymtab(InstrProfSymtab &Symtab); Error readNextHeader(const char *CurrentPos); @@ -504,6 +532,7 @@ virtual bool hasSingleByteCoverage() const = 0; virtual bool functionEntryOnly() const = 0; virtual bool hasMemoryProfile() const = 0; + virtual bool hasTemporalProfile() const = 0; virtual InstrProfKind getProfileKind() const = 0; virtual Error populateSymtab(InstrProfSymtab &) = 0; }; @@ -574,6 +603,10 @@ return (FormatVersion & VARIANT_MASK_MEMPROF) != 0; } + bool hasTemporalProfile() const override { + return (FormatVersion & VARIANT_MASK_TEMPORAL_PROF) != 0; + } + InstrProfKind getProfileKind() const override; Error populateSymtab(InstrProfSymtab &Symtab) override { @@ -653,6 +686,10 @@ bool hasMemoryProfile() const override { return Index->hasMemoryProfile(); } + bool hasTemporalProfile() const override { + return Index->hasTemporalProfile(); + } + /// Returns a BitsetEnum describing the attributes of the indexed instr /// profile. InstrProfKind getProfileKind() const override { diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h --- a/llvm/include/llvm/ProfileData/InstrProfWriter.h +++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h @@ -25,6 +25,7 @@ #include "llvm/Support/Error.h" #include #include +#include namespace llvm { @@ -41,6 +42,15 @@ private: bool Sparse; StringMap FunctionData; + /// The maximum length of a single trace. + uint64_t MaxTraceLength; + /// The maximum number of stored traces. + uint64_t TraceReservoirSize; + /// The total number of function traces seen. + uint64_t TraceStreamSize = 0; + /// The list of traces. + SmallVector Traces; + std::mt19937 RNG; // A map to hold memprof data per function. The lower 64 bits obtained from // the md5 hash of the function name is used to index into the map. @@ -60,7 +70,8 @@ InstrProfRecordWriterTrait *InfoObj; public: - InstrProfWriter(bool Sparse = false); + InstrProfWriter(bool Sparse = false, uint64_t TraceReservoirSize = 0, + uint64_t MaxTraceLength = 0); ~InstrProfWriter(); StringMap &getProfileData() { return FunctionData; } @@ -74,6 +85,11 @@ addRecord(std::move(I), 1, Warn); } + /// Add \p SrcTraces using reservoir sampling where \p SrcTraceStreamSize is + /// the total number of traces the source has seen. + void addFunctionTraces(SmallVector SrcTraces, + uint64_t SrcTraceStreamSize); + /// Add a memprof record for a function identified by its \p Id. void addMemProfRecord(const GlobalValue::GUID Id, const memprof::IndexedMemProfRecord &Record); @@ -96,6 +112,9 @@ /// Write the profile in text format to \c OS Error writeText(raw_fd_ostream &OS); + /// Write function trace data to the header in text format to \c OS + void writeTextTraceData(raw_fd_ostream &OS, InstrProfSymtab &Symtab); + Error validateRecord(const InstrProfRecord &Func); /// Write \c Record in text format to \c OS @@ -158,6 +177,8 @@ void addRecord(StringRef Name, uint64_t Hash, InstrProfRecord &&I, uint64_t Weight, function_ref Warn); bool shouldEncodeData(const ProfilingData &PD); + /// Add \p Trace using reservoir sampling. + void addFunctionTrace(InstrProfTraceTy Trace); Error writeImpl(ProfOStream &OS); }; diff --git a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h --- a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h +++ b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h @@ -95,6 +95,10 @@ /// Replace instrprof.cover with a store instruction to the coverage byte. void lowerCover(InstrProfCoverInst *Inc); + /// Replace instrprof.timestamp with a call to + /// INSTR_PROF_PROFILE_SET_TIMESTAMP. + void lowerTimestamp(InstrProfTimestampInst *TimestampInstruction); + /// Replace instrprof.increment with an increment of the appropriate value. void lowerIncrement(InstrProfIncrementInst *Inc); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7033,6 +7033,8 @@ llvm_unreachable("instrprof failed to lower a cover"); case Intrinsic::instrprof_increment: llvm_unreachable("instrprof failed to lower an increment"); + case Intrinsic::instrprof_timestamp: + llvm_unreachable("instrprof failed to lower a timestamp"); case Intrinsic::instrprof_value_profile: llvm_unreachable("instrprof failed to lower a value profiling call"); case Intrinsic::localescape: { diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -1376,9 +1376,13 @@ // When a new field is added in the header add a case statement here to // populate it. static_assert( - IndexedInstrProf::ProfVersion::CurrentVersion == Version9, + IndexedInstrProf::ProfVersion::CurrentVersion == Version10, "Please update the reading code below if a new field has been added, " "if not add a case statement to fall through to the latest version."); + case 10ull: + H.FunctionTracesOffset = + read(Buffer, offsetOf(&Header::FunctionTracesOffset)); + LLVM_FALLTHROUGH; case 9ull: H.BinaryIdOffset = read(Buffer, offsetOf(&Header::BinaryIdOffset)); [[fallthrough]]; @@ -1398,10 +1402,13 @@ // When a new field is added to the header add a case statement here to // compute the size as offset of the new field + size of the new field. This // relies on the field being added to the end of the list. - static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version9, + static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version10, "Please update the size computation below if a new field has " "been added to the header, if not add a case statement to " "fall through to the latest version."); + case 10ull: + return offsetOf(&Header::FunctionTracesOffset) + + sizeof(Header::FunctionTracesOffset); case 9ull: return offsetOf(&Header::BinaryIdOffset) + sizeof(Header::BinaryIdOffset); case 8ull: diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -60,6 +60,9 @@ if (Version & VARIANT_MASK_MEMPROF) { ProfileKind |= InstrProfKind::MemProf; } + if (Version & VARIANT_MASK_TEMPORAL_PROF) { + ProfileKind |= InstrProfKind::TemporalProfile; + } return ProfileKind; } @@ -264,13 +267,49 @@ ProfileKind |= InstrProfKind::FunctionEntryInstrumentation; else if (Str.equals_insensitive("not_entry_first")) ProfileKind &= ~InstrProfKind::FunctionEntryInstrumentation; - else + else if (Str.equals_insensitive("traces")) { + ProfileKind |= InstrProfKind::TemporalProfile; + if (auto Err = readTraceData()) + return error(std::move(Err)); + } else return error(instrprof_error::bad_header); ++Line; } return success(); } +/// Trace data is stored in the header immediately after `:traces`. The first +/// integer is NumTraces, the second integer is TraceStreamSize, then the +/// following lines are the actual traces which consist of a comma separated +/// list of function names. +Error TextInstrProfReader::readTraceData() { + if ((++Line).is_at_end()) + return error(instrprof_error::eof); + + uint32_t NumTraces; + if (Line->getAsInteger(0, NumTraces)) + return error(instrprof_error::malformed); + + if ((++Line).is_at_end()) + return error(instrprof_error::eof); + + if (Line->getAsInteger(0, TraceStreamSize)) + return error(instrprof_error::malformed); + + for (uint32_t i = 0; i < NumTraces; i++) { + if ((++Line).is_at_end()) + return error(instrprof_error::eof); + + InstrProfTraceTy Trace; + SmallVector FuncNames; + Line->split(FuncNames, ",", /*MaxSplit=*/-1, /*KeepEmpty=*/false); + for (auto &FuncName : FuncNames) + Trace.push_back(IndexedInstrProf::ComputeHash(FuncName.trim())); + Traces.push_back(std::move(Trace)); + } + return success(); +} + Error TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) { @@ -398,6 +437,22 @@ return getProfileKindFromVersion(Version); } +template +const SmallVector & +RawInstrProfReader::getFunctionTraces() { + if (FunctionTimestamps.empty()) { + assert(Traces.empty()); + return Traces; + } + // Sort functions by their timestamps to build the trace. + std::sort(FunctionTimestamps.begin(), FunctionTimestamps.end()); + InstrProfTraceTy Trace; + for (auto &[TimestampValue, NameRef] : FunctionTimestamps) + Trace.push_back(NameRef); + Traces = {std::move(Trace)}; + return Traces; +} + template bool RawInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) { if (DataBuffer.getBufferSize() < sizeof(uint64_t)) @@ -582,6 +637,22 @@ for (uint32_t I = 0; I < NumCounters; I++) { const char *Ptr = CountersStart + CounterBaseOffset + I * getCounterTypeSize(); + if (I == 0 && hasTemporalProfile()) { + uint64_t TimestampValue = swap(*reinterpret_cast(Ptr)); + if (TimestampValue != 0 && + TimestampValue != std::numeric_limits::max()) { + FunctionTimestamps.emplace_back(TimestampValue, swap(Data->NameRef)); + TraceStreamSize = 1; + } + if (hasSingleByteCoverage()) { + // In coverage mode, getCounterTypeSize() returns 1 byte but our + // timestamp field has size uint64_t. Increment I so that the next + // iteration of this for loop points to the byte after the timestamp + // field, i.e., I += 8. + I += 7; + } + continue; + } if (hasSingleByteCoverage()) { // A value of zero signifies the block is covered. Record.Counts.push_back(*Ptr == 0 ? 1 : 0); @@ -632,7 +703,7 @@ if (Error E = readNextHeader(getNextHeaderPos())) return error(std::move(E)); - // Read name ad set it in Record. + // Read name and set it in Record. if (Error E = readName(Record)) return error(std::move(E)); @@ -1061,6 +1132,38 @@ "corrupted binary ids"); } + if (GET_VERSION(Header->formatVersion()) >= 10 && + Header->formatVersion() & VARIANT_MASK_TEMPORAL_PROF) { + uint64_t FunctionTracesOffset = + endian::byte_swap(Header->FunctionTracesOffset); + const unsigned char *Ptr = Start + FunctionTracesOffset; + const auto *PtrEnd = (const unsigned char *)DataBuffer->getBufferEnd(); + // Expect at least two 64 bit fields: NumTraces, and TraceStreamSize + if (Ptr + 2 * sizeof(uint64_t) > PtrEnd) + return error(instrprof_error::truncated); + const uint64_t NumTraces = + support::endian::readNext(Ptr); + TraceStreamSize = + support::endian::readNext(Ptr); + for (unsigned i = 0; i < NumTraces; i++) { + // Expect at least one 64 bit field: NumFunctions + if (Ptr + sizeof(uint64_t) > PtrEnd) + return error(instrprof_error::truncated); + const uint64_t NumFunctions = + support::endian::readNext(Ptr); + // Expect at least NumFunctions 64 bit fields + if (Ptr + NumFunctions * sizeof(uint64_t) > PtrEnd) + return error(instrprof_error::truncated); + InstrProfTraceTy Trace; + for (unsigned j = 0; j < NumFunctions; j++) { + const uint64_t NameRef = + support::endian::readNext(Ptr); + Trace.push_back(NameRef); + } + Traces.push_back(std::move(Trace)); + } + } + // Load the remapping table now if requested. if (RemappingBuffer) { Remapper = diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -13,6 +13,7 @@ #include "llvm/ProfileData/InstrProfWriter.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/InstrProf.h" @@ -171,8 +172,11 @@ } // end namespace llvm -InstrProfWriter::InstrProfWriter(bool Sparse) - : Sparse(Sparse), InfoObj(new InstrProfRecordWriterTrait()) {} +InstrProfWriter::InstrProfWriter(bool Sparse, uint64_t TraceReservoirSize, + uint64_t MaxTraceLength) + : Sparse(Sparse), MaxTraceLength(MaxTraceLength), + TraceReservoirSize(TraceReservoirSize), + InfoObj(new InstrProfRecordWriterTrait()) {} InstrProfWriter::~InstrProfWriter() { delete InfoObj; } @@ -285,6 +289,59 @@ llvm::append_range(BinaryIds, BIs); } +void InstrProfWriter::addFunctionTrace(InstrProfTraceTy Trace) { + if (Trace.size() > MaxTraceLength) + Trace.resize(MaxTraceLength); + if (Trace.empty()) + return; + + if (TraceStreamSize < TraceReservoirSize) { + // Simply append the trace if we have not yet hit our reservoir size limit. + Traces.push_back(std::move(Trace)); + } else { + // Otherwise, replace a random trace in the stream. + std::uniform_int_distribution Distribution(0, TraceStreamSize); + uint64_t RandomIndex = Distribution(RNG); + if (RandomIndex < Traces.size()) + Traces[RandomIndex] = std::move(Trace); + } + ++TraceStreamSize; +} + +void InstrProfWriter::addFunctionTraces(SmallVector SrcTraces, + uint64_t SrcTraceStreamSize) { + // Assume that the source has the same reservoir size as the destination to + // avoid needing to record it in the indexed profile format. + bool IsDestSampled = (TraceStreamSize > TraceReservoirSize); + bool IsSrcSampled = (SrcTraceStreamSize > TraceReservoirSize); + if (!IsDestSampled && IsSrcSampled) { + // If one of the traces are sampled, ensure that it belongs to Dest. + std::swap(Traces, SrcTraces); + std::swap(TraceStreamSize, SrcTraceStreamSize); + std::swap(IsDestSampled, IsSrcSampled); + } + if (!IsSrcSampled) { + // If the source stream is not sampled, we add each source trace normally. + for (auto &Trace : SrcTraces) + addFunctionTrace(std::move(Trace)); + return; + } + // Otherwise, we find the traces that would have been removed if we added + // the whole source stream. + SmallSetVector IndicesToReplace; + for (uint64_t I = 0; I < SrcTraceStreamSize; I++) { + std::uniform_int_distribution Distribution(0, TraceStreamSize); + uint64_t RandomIndex = Distribution(RNG); + if (RandomIndex < Traces.size()) + IndicesToReplace.insert(RandomIndex); + ++TraceStreamSize; + } + // Then we insert a random sample of the source traces. + llvm::shuffle(SrcTraces.begin(), SrcTraces.end(), RNG); + for (const auto &[Index, Trace] : llvm::zip(IndicesToReplace, SrcTraces)) + Traces[Index] = std::move(Trace); +} + void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW, function_ref Warn) { for (auto &I : IPW.FunctionData) @@ -295,6 +352,8 @@ for (auto &I : IPW.BinaryIds) addBinaryIds(I); + addFunctionTraces(std::move(IPW.Traces), IPW.TraceStreamSize); + MemProfFrameData.reserve(IPW.MemProfFrameData.size()); for (auto &I : IPW.MemProfFrameData) { // If we weren't able to add the frame mappings then it doesn't make sense @@ -370,18 +429,21 @@ Header.Version |= VARIANT_MASK_FUNCTION_ENTRY_ONLY; if (static_cast(ProfileKind & InstrProfKind::MemProf)) Header.Version |= VARIANT_MASK_MEMPROF; + if (static_cast(ProfileKind & InstrProfKind::TemporalProfile)) + Header.Version |= VARIANT_MASK_TEMPORAL_PROF; Header.Unused = 0; Header.HashType = static_cast(IndexedInstrProf::HashType); Header.HashOffset = 0; Header.MemProfOffset = 0; Header.BinaryIdOffset = 0; + Header.FunctionTracesOffset = 0; int N = sizeof(IndexedInstrProf::Header) / sizeof(uint64_t); - // Only write out all the fields except 'HashOffset', 'MemProfOffset' and - // 'BinaryIdOffset'. We need to remember the offset of these fields to allow - // back patching later. - for (int I = 0; I < N - 3; I++) + // Only write out all the fields except 'HashOffset', 'MemProfOffset', + // 'BinaryIdOffset' and `FunctionTracesOffset`. We need to remember the offset + // of these fields to allow back patching later. + for (int I = 0; I < N - 4; I++) OS.write(reinterpret_cast(&Header)[I]); // Save the location of Header.HashOffset field in \c OS. @@ -402,6 +464,9 @@ // profile contains binary ids. OS.write(0); + uint64_t FunctionTracesOffset = OS.tell(); + OS.write(0); + // Reserve space to write profile summary data. uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size(); uint32_t SummarySize = Summary::getSize(Summary::NumKinds, NumEntries); @@ -515,6 +580,26 @@ OS.writeByte(0); } + uint64_t FunctionTracesSectionStart = 0; + if (static_cast(ProfileKind & InstrProfKind::TemporalProfile)) { + FunctionTracesSectionStart = OS.tell(); + // { + // NumTraces: u64 + // TraceStreamSize: u64 + // Traces[NumTraces]: { + // NumFunctions: u64 + // Trace[NumFunctions]: u64 + // } + // } + OS.write(Traces.size()); + OS.write(TraceStreamSize); + for (auto &Trace : Traces) { + OS.write(Trace.size()); + for (auto &NameRef : Trace) + OS.write(NameRef); + } + } + // Allocate space for data to be serialized out. std::unique_ptr TheSummary = IndexedInstrProf::allocSummary(SummarySize); @@ -542,6 +627,8 @@ {MemProfSectionOffset, &MemProfSectionStart, 1}, // Patch the Header.BinaryIdSectionOffset. {BinaryIdSectionOffset, &BinaryIdSectionStart, 1}, + // Patch the Header.FunctionTracesOffset (=0 for profiles without traces). + {FunctionTracesOffset, &FunctionTracesSectionStart, 1}, // Patch the summary data. {SummaryOffset, reinterpret_cast(TheSummary.get()), (int)(SummarySize / sizeof(uint64_t))}, @@ -664,6 +751,9 @@ } } + if (static_cast(ProfileKind & InstrProfKind::TemporalProfile)) + writeTextTraceData(OS, Symtab); + llvm::sort(OrderedFuncData, [](const RecordType &A, const RecordType &B) { return std::tie(A.first, A.second.first) < std::tie(B.first, B.second.first); @@ -683,3 +773,16 @@ return Error::success(); } + +void InstrProfWriter::writeTextTraceData(raw_fd_ostream &OS, + InstrProfSymtab &Symtab) { + OS << ":traces\n"; + OS << "# Num Traces:\n" << Traces.size() << "\n"; + OS << "# Trace Stream Size:\n" << TraceStreamSize << "\n"; + for (auto &Trace : Traces) { + for (auto &NameRef : Trace) + OS << Symtab.getFuncName(NameRef) << ","; + OS << "\n"; + } + OS << "\n"; +} diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -421,6 +421,9 @@ } else if (auto *IPI = dyn_cast(&Instr)) { lowerIncrement(IPI); MadeChange = true; + } else if (auto *IPC = dyn_cast(&Instr)) { + lowerTimestamp(IPC); + MadeChange = true; } else if (auto *IPC = dyn_cast(&Instr)) { lowerCover(IPC); MadeChange = true; @@ -510,6 +513,7 @@ return containsIntrinsic(llvm::Intrinsic::instrprof_cover) || containsIntrinsic(llvm::Intrinsic::instrprof_increment) || containsIntrinsic(llvm::Intrinsic::instrprof_increment_step) || + containsIntrinsic(llvm::Intrinsic::instrprof_timestamp) || containsIntrinsic(llvm::Intrinsic::instrprof_value_profile); } @@ -670,6 +674,9 @@ auto *Counters = getOrCreateRegionCounters(I); IRBuilder<> Builder(I); + if (isa(I)) + Counters->setAlignment(Align(8)); + auto *Addr = Builder.CreateConstInBoundsGEP2_32( Counters->getValueType(), Counters, 0, I->getIndex()->getZExtValue()); @@ -711,6 +718,21 @@ CoverInstruction->eraseFromParent(); } +void InstrProfiling::lowerTimestamp( + InstrProfTimestampInst *TimestampInstruction) { + assert(TimestampInstruction->getIndex()->isZeroValue() && + "timestamp probes are always the first probe for a function"); + auto &Ctx = M->getContext(); + auto *TimestampAddr = getCounterAddress(TimestampInstruction); + IRBuilder<> Builder(TimestampInstruction); + auto *CalleeTy = + FunctionType::get(Type::getVoidTy(Ctx), TimestampAddr->getType(), false); + auto Callee = M->getOrInsertFunction( + INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SET_TIMESTAMP), CalleeTy); + Builder.CreateCall(Callee, {TimestampAddr}); + TimestampInstruction->eraseFromParent(); +} + void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) { auto *Addr = getCounterAddress(Inc); diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -280,6 +280,10 @@ cl::desc("Create a dot file of CFGs with block " "coverage inference information")); +static cl::opt PGOTemporalInstrumentation( + "pgo-temporal-instrumentation", + cl::desc("Use this option to enable temporal instrumentation")); + static cl::opt PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, cl::desc("Fix function entry count in profile use.")); @@ -397,6 +401,8 @@ VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY; if (PGOBlockCoverage) ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE; + if (PGOTemporalInstrumentation) + ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF; auto IRLevelVersionVariable = new GlobalVariable( M, IntTy64, true, GlobalValue::WeakAnyLinkage, Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName); @@ -924,6 +930,18 @@ InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); uint32_t I = 0; + if (PGOTemporalInstrumentation) { + NumCounters += PGOBlockCoverage ? 8 : 1; + auto &EntryBB = F.getEntryBlock(); + IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt()); + // llvm.instrprof.timestamp(i8* , i64 , i32 , + // i32 ) + Builder.CreateCall( + Intrinsic::getDeclaration(M, Intrinsic::instrprof_timestamp), + {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I)}); + I += PGOBlockCoverage ? 8 : 1; + } + for (auto *InstrBB : InstrumentBBs) { IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); assert(Builder.GetInsertPoint() != InstrBB->end() && diff --git a/llvm/test/Instrumentation/InstrProfiling/timestamp-coverage.ll b/llvm/test/Instrumentation/InstrProfiling/timestamp-coverage.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Instrumentation/InstrProfiling/timestamp-coverage.ll @@ -0,0 +1,16 @@ +; RUN: opt < %s -passes=instrprof -S | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +@__profn_foo = private constant [3 x i8] c"foo" +; CHECK: @__profc_foo = private global [9 x i8] c"\FF\FF\FF\FF\FF\FF\FF\FF\FF", section "__llvm_prf_cnts", comdat, align 8 + +define void @_Z3foov() { + call void @llvm.instrprof.timestamp(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12345678, i32 9, i32 0) + ; CHECK: call void @__llvm_profile_set_timestamp(ptr @__profc_foo) + call void @llvm.instrprof.cover(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12345678, i32 9, i32 8) + ret void +} + +declare void @llvm.instrprof.timestamp(i8*, i64, i32, i32) +declare void @llvm.instrprof.cover(i8*, i64, i32, i32) diff --git a/llvm/test/Instrumentation/InstrProfiling/timestamp.ll b/llvm/test/Instrumentation/InstrProfiling/timestamp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Instrumentation/InstrProfiling/timestamp.ll @@ -0,0 +1,16 @@ +; RUN: opt < %s -passes=instrprof -S | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +@__profn_foo = private constant [3 x i8] c"foo" +; CHECK: @__profc_foo = private global [2 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat, align 8 + +define void @_Z3foov() { + call void @llvm.instrprof.timestamp(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12345678, i32 2, i32 0) + ; CHECK: call void @__llvm_profile_set_timestamp(ptr @__profc_foo) + call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12345678, i32 2, i32 1) + ret void +} + +declare void @llvm.instrprof.timestamp(i8*, i64, i32, i32) +declare void @llvm.instrprof.increment(i8*, i64, i32, i32) diff --git a/llvm/test/Transforms/PGOProfile/timestamp.ll b/llvm/test/Transforms/PGOProfile/timestamp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/timestamp.ll @@ -0,0 +1,12 @@ +; RUN: opt < %s -passes=pgo-instr-gen -pgo-temporal-instrumentation -S | FileCheck %s +; RUN: opt < %s -passes=pgo-instr-gen -pgo-temporal-instrumentation -pgo-block-coverage -S | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @foo() { +entry: + ; CHECK: call void @llvm.instrprof.timestamp({{.*}}) + ret void +} + +; CHECK: declare void @llvm.instrprof.timestamp( diff --git a/llvm/test/tools/llvm-profdata/merge-traces.proftext b/llvm/test/tools/llvm-profdata/merge-traces.proftext new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/merge-traces.proftext @@ -0,0 +1,49 @@ +# RUN: llvm-profdata merge --trace-reservoir-size=2 %s -o %t.profdata +# RUN: llvm-profdata show --function-traces %t.profdata | FileCheck %s --check-prefixes=SAMPLE1,SEEN1 +# RUN: llvm-profdata merge --trace-reservoir-size=2 %s %t.profdata -o %t.profdata +# RUN: llvm-profdata show --function-traces %t.profdata | FileCheck %s --check-prefixes=SAMPLE2,SEEN2 +# RUN: llvm-profdata merge --trace-reservoir-size=2 %s %t.profdata -o %t.profdata +# RUN: llvm-profdata show --function-traces %t.profdata | FileCheck %s --check-prefixes=SAMPLE2,SEEN3 +# RUN: llvm-profdata merge --trace-reservoir-size=2 %s %t.profdata -o %t.profdata +# RUN: llvm-profdata show --function-traces %t.profdata | FileCheck %s --check-prefixes=SAMPLE2,SEEN4 + +# SEEN1: Function Traces (samples=1 seen=1): +# SEEN2: Function Traces (samples=2 seen=2): +# SEEN3: Function Traces (samples=2 seen=3): +# SEEN4: Function Traces (samples=2 seen=4): +# SAMPLE1: Trace 0 (count=3): +# SAMPLE1: a +# SAMPLE1: b +# SAMPLE1: c +# SAMPLE2: Trace 1 (count=3): +# SAMPLE2: a +# SAMPLE2: b +# SAMPLE2: c + +# Header +:ir +:traces +# Num Traces +1 +# Trace Stream Size: +1 +a, b, c + + +a +# Func Hash: +0x1234 +# Num Counters: +1 +# Counter Values: +101 + +b +0x5678 +1 +202 + +c +0xabcd +1 +303 diff --git a/llvm/test/tools/llvm-profdata/read-traces.proftext b/llvm/test/tools/llvm-profdata/read-traces.proftext new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/read-traces.proftext @@ -0,0 +1,46 @@ +# RUN: llvm-profdata merge -text %s -o %t.1.proftext +# RUN: llvm-profdata merge -binary %t.1.proftext -o %t.2.profdata +# RUN: llvm-profdata merge -text %t.2.profdata -o %t.3.proftext +# RUN: diff %t.1.proftext %t.3.proftext + +# RUN: llvm-profdata show --function-traces %t.1.proftext | FileCheck %s + +# CHECK: Function Traces (samples=3 seen=3): +# CHECK: Trace 0 (count=3): +# CHECK: foo +# CHECK: bar +# CHECK: goo +# CHECK: Trace 1 (count=3): +# CHECK: foo +# CHECK: goo +# CHECK: bar +# CHECK: Trace 2 (count=1): +# CHECK: goo + +:ir +:traces +# Num Traces: +3 +# Trace Stream Size: +3 +foo, bar, goo +foo,goo,bar, +goo + +foo +# Func Hash: +0x1234 +# Num Counters: +1 +# Counter Values: +101 + +bar +0x5678 +1 +202 + +goo +0xabcd +1 +303 diff --git a/llvm/test/tools/llvm-profdata/trace-limit.proftext b/llvm/test/tools/llvm-profdata/trace-limit.proftext new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/trace-limit.proftext @@ -0,0 +1,41 @@ +# RUN: llvm-profdata merge --max-trace-length=0 %s -o %t.profdata +# RUN: llvm-profdata show --function-traces %t.profdata | FileCheck %s --check-prefix=NONE + +# RUN: llvm-profdata merge --max-trace-length=2 %s -o %t.profdata +# RUN: llvm-profdata show --function-traces %t.profdata | FileCheck %s --check-prefixes=CHECK,SOME + +# RUN: llvm-profdata merge --max-trace-length=1000 %s -o %t.profdata +# RUN: llvm-profdata show --function-traces %t.profdata | FileCheck %s --check-prefixes=CHECK,ALL + +# NONE: Function Traces (samples=0 seen=0): +# CHECK: Function Traces (samples=1 seen=1): +# SOME: Trace 0 (count=2): +# ALL: Trace 0 (count=3): + +# Header +:ir +:traces +# Num Traces +1 +# Trace Stream Size: +1 +a, b, c + + +a +# Func Hash: +0x1234 +# Num Counters: +1 +# Counter Values: +101 + +b +0x5678 +1 +202 + +c +0xabcd +1 +303 diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -216,9 +216,10 @@ SmallSet &WriterErrorCodes; WriterContext(bool IsSparse, std::mutex &ErrLock, - SmallSet &WriterErrorCodes) - : Writer(IsSparse), ErrLock(ErrLock), WriterErrorCodes(WriterErrorCodes) { - } + SmallSet &WriterErrorCodes, + uint64_t TraceReservoirSize = 0, uint64_t MaxTraceLength = 0) + : Writer(IsSparse, TraceReservoirSize, MaxTraceLength), ErrLock(ErrLock), + WriterErrorCodes(WriterErrorCodes) {} }; /// Computer the overlap b/w profile BaseFilename and TestFileName, @@ -304,7 +305,7 @@ auto FS = vfs::getRealFileSystem(); auto ReaderOrErr = InstrProfReader::create(Input.Filename, *FS, Correlator); if (Error E = ReaderOrErr.takeError()) { - // Skip the empty profiles by returning sliently. + // Skip the empty profiles by returning silently. instrprof_error IPE = InstrProfError::take(std::move(E)); if (IPE != instrprof_error::empty_raw_profile) WC->Errors.emplace_back(make_error(IPE), Filename); @@ -342,6 +343,11 @@ }); } + if (Reader->hasTemporalProfile()) { + auto &Traces = Reader->getFunctionTraces(); + if (!Traces.empty()) + WC->Writer.addFunctionTraces(Traces, Reader->getTraceStreamSize()); + } if (Reader->hasError()) { if (Error E = Reader->getError()) WC->Errors.emplace_back(std::move(E), Filename); @@ -392,13 +398,13 @@ } } -static void mergeInstrProfile(const WeightedFileVector &Inputs, - StringRef DebugInfoFilename, - SymbolRemapper *Remapper, - StringRef OutputFilename, - ProfileFormat OutputFormat, bool OutputSparse, - unsigned NumThreads, FailureMode FailMode, - const StringRef ProfiledBinary) { +static void +mergeInstrProfile(const WeightedFileVector &Inputs, StringRef DebugInfoFilename, + SymbolRemapper *Remapper, StringRef OutputFilename, + ProfileFormat OutputFormat, uint64_t TraceReservoirSize, + uint64_t MaxTraceLength, bool OutputSparse, + unsigned NumThreads, FailureMode FailMode, + const StringRef ProfiledBinary) { if (OutputFormat != PF_Binary && OutputFormat != PF_Compact_Binary && OutputFormat != PF_Ext_Binary && OutputFormat != PF_Text) exitWithError("unknown format is specified"); @@ -424,7 +430,8 @@ SmallVector, 4> Contexts; for (unsigned I = 0; I < NumThreads; ++I) Contexts.emplace_back(std::make_unique( - OutputSparse, ErrorLock, WriterErrorCodes)); + OutputSparse, ErrorLock, WriterErrorCodes, TraceReservoirSize, + MaxTraceLength)); if (NumThreads == 1) { for (const auto &Input : Inputs) @@ -1264,6 +1271,15 @@ "drop-profile-symbol-list", cl::init(false), cl::Hidden, cl::desc("Drop the profile symbol list when merging AutoFDO profiles " "(only meaningful for -sample)")); + // WARNING: This reservoir size value is propagated to any input indexed + // profiles for simplicity. Changing this value between invocations could + // result in sample bias. + cl::opt TraceReservoirSize( + "trace-reservoir-size", cl::init(100), + cl::desc("The maximum number of stored traces (default: 100)")); + cl::opt MaxTraceLength( + "max-trace-length", cl::init(10000), + cl::desc("The maximum length of a single trace (default: 10000)")); cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n"); @@ -1305,8 +1321,9 @@ if (ProfileKind == instr) mergeInstrProfile(WeightedInputs, DebugInfoFilename, Remapper.get(), - OutputFilename, OutputFormat, OutputSparse, NumThreads, - FailureMode, ProfiledBinary); + OutputFilename, OutputFormat, TraceReservoirSize, + MaxTraceLength, OutputSparse, NumThreads, FailureMode, + ProfiledBinary); else mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename, OutputFormat, ProfileSymbolListFile, CompressAllSections, @@ -2392,8 +2409,8 @@ uint64_t ValueCutoff, bool OnlyListBelow, const std::string &ShowFunction, bool TextFormat, bool ShowBinaryIds, bool ShowCovered, - bool ShowProfileVersion, ShowFormat SFormat, - raw_fd_ostream &OS) { + bool ShowProfileVersion, bool ShowFunctionTraces, + ShowFormat SFormat, raw_fd_ostream &OS) { if (SFormat == ShowFormat::Json) exitWithError("JSON output is not supported for instr profiles"); if (SFormat == ShowFormat::Yaml) @@ -2610,6 +2627,18 @@ if (ShowProfileVersion) OS << "Profile version: " << Reader->getVersion() << "\n"; + + if (ShowFunctionTraces) { + auto &Traces = Reader->getFunctionTraces(); + OS << "Function Traces (samples=" << Traces.size() + << " seen=" << Reader->getTraceStreamSize() << "):\n"; + for (unsigned i = 0; i < Traces.size(); i++) { + OS << " Trace " << i << " (count=" << Traces[i].size() << "):\n"; + for (auto &NameRef : Traces[i]) + OS << " " << Reader->getSymtab().getFuncName(NameRef) << "\n"; + } + } + return 0; } @@ -2945,6 +2974,8 @@ "extbinary format")); cl::opt ShowBinaryIds("binary-ids", cl::init(false), cl::desc("Show binary ids in the profile. ")); + cl::opt ShowFunctionTraces( + "function-traces", cl::desc("Show function traces in the profile.")); cl::opt DebugInfoFilename( "debug-info", cl::init(""), cl::desc("Read and extract profile metadata from debug info and show " @@ -2989,8 +3020,8 @@ Filename, ShowCounts, TopNFunctions, ShowIndirectCallTargets, ShowMemOPSizes, ShowDetailedSummary, DetailedSummaryCutoffs, ShowAllFunctions, ShowCS, ValueCutoff, OnlyListBelow, ShowFunction, - TextFormat, ShowBinaryIds, ShowCovered, ShowProfileVersion, SFormat, - OS); + TextFormat, ShowBinaryIds, ShowCovered, ShowProfileVersion, + ShowFunctionTraces, SFormat, OS); if (ProfileKind == sample) return showSampleProfile(Filename, ShowCounts, TopNFunctions, ShowAllFunctions, ShowDetailedSummary, diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp --- a/llvm/unittests/ProfileData/InstrProfTest.cpp +++ b/llvm/unittests/ProfileData/InstrProfTest.cpp @@ -22,6 +22,9 @@ #include using namespace llvm; +using ::testing::IsSubsetOf; +using ::testing::SizeIs; +using ::testing::UnorderedElementsAre; [[nodiscard]] static ::testing::AssertionResult ErrorEquals(instrprof_error Expected, Error E) { @@ -224,6 +227,85 @@ ASSERT_EQ(0U, R->Counts[1]); } +TEST_F(InstrProfTest, test_merge_traces_truncated) { + uint64_t TraceReservoirSize = 10; + uint64_t MaxTraceLength = 2; + InstrProfWriter Writer(/*Sparse=*/false, TraceReservoirSize, MaxTraceLength); + ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::TemporalProfile), + Succeeded()); + + auto LargeTrace = {IndexedInstrProf::ComputeHash("foo"), + IndexedInstrProf::ComputeHash("bar"), + IndexedInstrProf::ComputeHash("goo")}; + auto SmallTrace = {IndexedInstrProf::ComputeHash("foo"), + IndexedInstrProf::ComputeHash("bar")}; + + Writer.addFunctionTraces({LargeTrace, SmallTrace}, 2); + + auto Profile = Writer.writeBuffer(); + readProfile(std::move(Profile)); + + ASSERT_TRUE(Reader->hasTemporalProfile()); + EXPECT_EQ(Reader->getTraceStreamSize(), 2ull); + EXPECT_THAT(Reader->getFunctionTraces(), + UnorderedElementsAre(SmallTrace, SmallTrace)); +} + +TEST_F(InstrProfTest, test_merge_traces_from_writer) { + uint64_t TraceReservoirSize = 10; + uint64_t MaxTraceLength = 10; + InstrProfWriter Writer(/*Sparse=*/false, TraceReservoirSize, MaxTraceLength); + InstrProfWriter Writer2(/*Sparse=*/false, TraceReservoirSize, MaxTraceLength); + ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::TemporalProfile), + Succeeded()); + ASSERT_THAT_ERROR(Writer2.mergeProfileKind(InstrProfKind::TemporalProfile), + Succeeded()); + + auto FooTrace = {IndexedInstrProf::ComputeHash("foo")}; + auto BarTrace = {IndexedInstrProf::ComputeHash("bar")}; + + Writer.addFunctionTraces({FooTrace}, 1); + Writer2.addFunctionTraces({BarTrace}, 1); + Writer.mergeRecordsFromWriter(std::move(Writer2), Err); + + auto Profile = Writer.writeBuffer(); + readProfile(std::move(Profile)); + + ASSERT_TRUE(Reader->hasTemporalProfile()); + EXPECT_EQ(Reader->getTraceStreamSize(), 2ull); + EXPECT_THAT(Reader->getFunctionTraces(), + UnorderedElementsAre(FooTrace, BarTrace)); +} + +TEST_F(InstrProfTest, test_merge_traces_sampled) { + uint64_t TraceReservoirSize = 3; + uint64_t MaxTraceLength = 10; + InstrProfWriter Writer(/*Sparse=*/false, TraceReservoirSize, MaxTraceLength); + ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::TemporalProfile), + Succeeded()); + + auto FooTrace = {IndexedInstrProf::ComputeHash("foo")}; + auto BarTrace = {IndexedInstrProf::ComputeHash("bar")}; + auto GooTrace = {IndexedInstrProf::ComputeHash("Goo")}; + + // Add some sampled traces + Writer.addFunctionTraces({FooTrace, BarTrace, GooTrace}, 5); + // Add some unsampled traces + Writer.addFunctionTraces({BarTrace, GooTrace}, 2); + Writer.addFunctionTraces({FooTrace}, 1); + + auto Profile = Writer.writeBuffer(); + readProfile(std::move(Profile)); + + ASSERT_TRUE(Reader->hasTemporalProfile()); + EXPECT_EQ(Reader->getTraceStreamSize(), 8ull); + // Check that we have a subset of all the traces we added + EXPECT_THAT(Reader->getFunctionTraces(), SizeIs(TraceReservoirSize)); + EXPECT_THAT( + Reader->getFunctionTraces(), + IsSubsetOf({FooTrace, BarTrace, GooTrace, BarTrace, GooTrace, FooTrace})); +} + using ::llvm::memprof::IndexedMemProfRecord; using ::llvm::memprof::MemInfoBlock; using FrameIdMapTy = @@ -526,7 +608,7 @@ N, T); ASSERT_FALSE(Res); - // Remove the MD_prof metadata + // Remove the MD_prof metadata Inst->setMetadata(LLVMContext::MD_prof, 0); // Annotate 5 records this time. annotateValueSite(*M, *Inst, R.get(), IPVK_IndirectCallTarget, 0, 5); @@ -546,7 +628,7 @@ ASSERT_EQ(2000U, ValueData[4].Value); ASSERT_EQ(2U, ValueData[4].Count); - // Remove the MD_prof metadata + // Remove the MD_prof metadata Inst->setMetadata(LLVMContext::MD_prof, 0); // Annotate with 4 records. InstrProfValueData VD0Sorted[] = {{1000, 6}, {2000, 5}, {3000, 4}, {4000, 3},