diff --git a/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h b/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h --- a/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h +++ b/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h @@ -41,6 +41,7 @@ namespace llvm { +using namespace sampleprof; class MIRAddFSDiscriminators : public MachineFunctionPass { MachineFunction *MF; unsigned LowBit; @@ -48,10 +49,11 @@ public: static char ID; - /// FS bits that will be used in this pass (numbers are 0 based and - /// inclusive). - MIRAddFSDiscriminators(unsigned LowBit = 0, unsigned HighBit = 0) - : MachineFunctionPass(ID), LowBit(LowBit), HighBit(HighBit) { + /// PassNum is the sequence number this pass is called, start from 1. + MIRAddFSDiscriminators(FSDiscriminatorPass P = FSDiscriminatorPass::Pass1) + : MachineFunctionPass(ID) { + LowBit = getFSPassBitBegin(P); + HighBit = getFSPassBitEnd(P); assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit"); } diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -15,6 +15,7 @@ #define LLVM_CODEGEN_PASSES_H #include "llvm/Support/CodeGen.h" +#include "llvm/Support/Discriminator.h" #include #include @@ -490,9 +491,10 @@ /// Create IR Type Promotion pass. \see TypePromotion.cpp FunctionPass *createTypePromotionPass(); - /// Add Flow Sensitive Discriminators. - FunctionPass *createMIRAddFSDiscriminatorsPass(unsigned LowBit, - unsigned HighBit); + /// Add Flow Sensitive Discriminators. PassNum specifies the + /// sequence number of this pass (starting from 1). + FunctionPass * + createMIRAddFSDiscriminatorsPass(sampleprof::FSDiscriminatorPass P); /// Creates MIR Debugify pass. \see MachineDebugify.cpp ModulePass *createDebugifyMachineModulePass(); diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h --- a/llvm/include/llvm/IR/DebugInfoMetadata.h +++ b/llvm/include/llvm/IR/DebugInfoMetadata.h @@ -1741,7 +1741,7 @@ } /// Return the bits used for base discriminators. - static unsigned getBaseDiscriminatorBits() { return BASE_DIS_BIT_END; } + static unsigned getBaseDiscriminatorBits() { return getBaseFSBitEnd(); } /// Returns the base discriminator for a given encoded discriminator \p D. static unsigned getBaseDiscriminatorFromDiscriminator(unsigned D) { diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -190,7 +190,10 @@ SecFlagPartial = (1 << 0), /// SecFlagContext means this is context-sensitive profile for /// CSSPGO - SecFlagFullContext = (1 << 1) + SecFlagFullContext = (1 << 1), + /// SecFlagFSDiscriminator means this profile uses flow-sensitive + /// discriminators. + SecFlagFSDiscriminator = (1 << 2) }; enum class SecFuncMetadataFlags : uint32_t { @@ -891,6 +894,9 @@ /// Whether the profile contains any ".__uniq." suffix in a name. static bool HasUniqSuffix; + /// If this profile uses flow sensitive discriminators. + static bool ProfileIsFS; + /// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for /// all the function symbols defined or declared in current module. DenseMap *GUIDToFuncNameMap = nullptr; diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -236,6 +236,7 @@ #include "llvm/ProfileData/GCOV.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Discriminator.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SymbolRemappingReader.h" @@ -350,6 +351,26 @@ /// Read and validate the file header. virtual std::error_code readHeader() = 0; + /// Set the bits for FS discriminators. Parameter Pass specify the sequence + /// number, Pass == i is for the i-th round of adding FS discriminators. + /// Pass == 0 is for using base discriminators. + void setDiscriminatorMaskedBitFrom(FSDiscriminatorPass P) { + MaskedBitFrom = getFSPassBitEnd(P); + } + /// Set the bits for using base discriminators. + void setBaseDiscriminatorMask() { + setDiscriminatorMaskedBitFrom(FSDiscriminatorPass::Base); + } + + /// Get the bitmask the discriminators: For FS profiles, return the bit + /// mask for this pass. For non FS profiles, return (unsigned) -1. + uint32_t getDiscriminatorMask() const { + if (!ProfileIsFS) + return 0xFFFFFFFF; + assert((MaskedBitFrom != 0) && "MaskedBitFrom is not set properly"); + return getN1Bits(MaskedBitFrom); + } + /// The interface to read sample profiles from the associated file. std::error_code read() { if (std::error_code EC = readImpl()) @@ -505,6 +526,9 @@ /// Number of context-sensitive profiles. uint32_t CSProfileCount = 0; + /// Whether the function profiles use FS discriminators. + bool ProfileIsFS = false; + /// \brief The format of sample. SampleProfileFormat Format = SPF_None; @@ -512,6 +536,10 @@ /// is used by compiler. If SampleProfileReader is used by other /// tools which are not compiler, M is usually nullptr. const Module *M = nullptr; + + /// Zero out the discriminator bits higher than bit MaskedBitFrom (0 based). + /// The default is to keep all the bits. + uint32_t MaskedBitFrom = 31; }; class SampleProfileReaderText : public SampleProfileReader { diff --git a/llvm/include/llvm/Support/Discriminator.h b/llvm/include/llvm/Support/Discriminator.h --- a/llvm/include/llvm/Support/Discriminator.h +++ b/llvm/include/llvm/Support/Discriminator.h @@ -13,6 +13,9 @@ #ifndef LLVM_SUPPORT_DISCRIMINATOR_H #define LLVM_SUPPORT_DISCRIMINATOR_H +#include "llvm/Support/Error.h" +#include + // Utility functions for encoding / decoding discriminators. /// With a given unsigned int \p U, use up to 13 bits to represent it. /// old_bit 1~5 --> new_bit 1~5 @@ -48,26 +51,83 @@ } // Some constants used in FS Discriminators. -#define BASE_DIS_BIT_BEG 0 -#define BASE_DIS_BIT_END 7 +// +namespace llvm { +namespace sampleprof { +enum class FSDiscriminatorPass : unsigned { + Base = 0, + Pass0 = 0, + Pass1 = 1, + Pass2 = 2, + Pass3 = 3, + Pass4 = 4, + PassLast = 4, +}; +} // namespace sampleprof + +using namespace sampleprof; + +// The number of bits reserved for the base discrimininator. The base +// discriminaitor starts from bit 0. +static const unsigned BaseDiscriminatorBitWidth = 8; + +// The number of bits reserved for each FS discriminator pass. +static const unsigned FSDiscriminatorBitWidth = 6; + +// Return the number of FS passes, excluding the pass adding the base +// discriminators. +// The number of passes for FS discriminators. Note that the total +// number of discriminaitor bits, i.e. +// BaseDiscriminatorBitWidth +// + FSDiscriminatorBitWidth * getNumFSPasses() +// needs to fit in an unsigned int type. +static inline unsigned getNumFSPasses() { + return static_cast(FSDiscriminatorPass::PassLast); +} + +// Return the ending bit for FSPass P. +static inline unsigned getFSPassBitEnd(FSDiscriminatorPass P) { + unsigned I = static_cast(P); + assert(I <= getNumFSPasses() && "Invalid FS discriminator pass number."); + return BaseDiscriminatorBitWidth + I * FSDiscriminatorBitWidth - 1; +} + +// Return the begining bit for FSPass P. +static inline unsigned getFSPassBitBegin(FSDiscriminatorPass P) { + if (P == FSDiscriminatorPass::Base) + return 0; + unsigned I = static_cast(P); + assert(I <= getNumFSPasses() && "Invalid FS discriminator pass number."); + return getFSPassBitEnd(static_cast(I - 1)) + 1; +} -#define PASS_1_DIS_BIT_BEG 8 -#define PASS_1_DIS_BIT_END 13 +// Return the beginning bit for the last FSPass. +static inline int getLastFSPassBitBegin() { + return getFSPassBitBegin(static_cast(getNumFSPasses())); +} -#define PASS_2_DIS_BIT_BEG 14 -#define PASS_2_DIS_BIT_END 19 +// Return the ending bit for the last FSPass. +static inline unsigned getLastFSPassBitEnd() { + return getFSPassBitEnd(static_cast(getNumFSPasses())); +} -#define PASS_3_DIS_BIT_BEG 20 -#define PASS_3_DIS_BIT_END 25 +// Return the beginning bit for the base (first) FSPass. +static inline unsigned getBaseFSBitBegin() { return 0; } -#define PASS_LAST_DIS_BIT_BEG 26 -#define PASS_LAST_DIS_BIT_END 31 +// Return the ending bit for the base (first) FSPass. +static inline unsigned getBaseFSBitEnd() { + return BaseDiscriminatorBitWidth - 1; +} -// Set bits range [0 .. n] to 1. Used in FS Discriminators. +// Set bits in range of [0 .. n] to 1. Used in FS Discriminators. static inline unsigned getN1Bits(int N) { - if (N >= 31) + // Work around the g++ bug that folding "(1U << (N + 1)) - 1" to 0. + if (N == 31) return 0xFFFFFFFF; - return (1 << (N + 1)) - 1; + assert((N < 32) && "N is invalid"); + return (1U << (N + 1)) - 1; } +} // namespace llvm + #endif /* LLVM_SUPPORT_DISCRIMINATOR_H */ diff --git a/llvm/lib/CodeGen/MIRFSDiscriminator.cpp b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp --- a/llvm/lib/CodeGen/MIRFSDiscriminator.cpp +++ b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp @@ -22,6 +22,7 @@ #include using namespace llvm; +using namespace sampleprof; #define DEBUG_TYPE "mirfs-discriminators" @@ -33,9 +34,8 @@ char &llvm::MIRAddFSDiscriminatorsID = MIRAddFSDiscriminators::ID; -FunctionPass *llvm::createMIRAddFSDiscriminatorsPass(unsigned LowBit, - unsigned HighBit) { - return new MIRAddFSDiscriminators(LowBit, HighBit); +FunctionPass *llvm::createMIRAddFSDiscriminatorsPass(FSDiscriminatorPass P) { + return new MIRAddFSDiscriminators(P); } // Compute a hash value using debug line number, and the line numbers from the diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1177,8 +1177,12 @@ addPass(&PatchableFunctionID); if (EnableFSDiscriminator && !FSNoFinalDiscrim) - addPass(createMIRAddFSDiscriminatorsPass(PASS_LAST_DIS_BIT_BEG, - PASS_LAST_DIS_BIT_END)); + // Add FS discriminators here so that all the instruction duplicates + // in different BBs get their own discriminators. With this, we can "sum" + // the SampleFDO counters instead of using MAX. This will improve the + // SampleFDO profile quality. + addPass(createMIRAddFSDiscriminatorsPass( + sampleprof::FSDiscriminatorPass::PassLast)); addPreEmitPass(); diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp --- a/llvm/lib/ProfileData/SampleProf.cpp +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -42,6 +42,7 @@ bool FunctionSamples::ProfileIsCS = false; bool FunctionSamples::UseMD5 = false; bool FunctionSamples::HasUniqSuffix = true; +bool FunctionSamples::ProfileIsFS = false; } // namespace sampleprof } // namespace llvm @@ -232,9 +233,15 @@ const DILocation *PrevDIL = DIL; for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) { - S.push_back(std::make_pair( - LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()), - PrevDIL->getScope()->getSubprogram()->getLinkageName())); + unsigned Discriminator; + if (ProfileIsFS) + Discriminator = DIL->getDiscriminator(); + else + Discriminator = DIL->getBaseDiscriminator(); + + S.push_back( + std::make_pair(LineLocation(getOffset(DIL), Discriminator), + PrevDIL->getScope()->getSubprogram()->getLinkageName())); PrevDIL = DIL; } if (S.size() == 0) diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/ProfileData/SampleProf.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compression.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/LEB128.h" @@ -45,6 +46,15 @@ using namespace llvm; using namespace sampleprof; +#define DEBUG_TYPE "samplepgo-reader" + +// This internal option specifies if the profile uses FS discriminators. +// It only applies to text, binary and compact binary format profiles. +// For ext-binary format profiles, the flag is set in the summary. +static cl::opt ProfileIsFSDisciminator( + "profile-isfs", cl::Hidden, cl::init(false), + cl::desc("Profile uses flow senstive discriminators")); + /// Dump the function profile for \p FName. /// /// \param FName Name of the function to print. @@ -238,6 +248,7 @@ // top-level function profile. bool SeenMetadata = false; + ProfileIsFS = ProfileIsFSDisciminator; for (; !LineIt.is_at_eof(); ++LineIt) { if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#') continue; @@ -295,6 +306,10 @@ "Found non-metadata after metadata: " + *LineIt); return sampleprof_error::malformed; } + + // Here we handle FS discriminators. + Discriminator &= getDiscriminatorMask(); + while (InlineStack.size() > Depth) { InlineStack.pop_back(); } @@ -504,6 +519,9 @@ if (std::error_code EC = NumCalls.getError()) return EC; + // Here we handle FS discriminators: + uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask(); + for (uint32_t J = 0; J < *NumCalls; ++J) { auto CalledFunction(readStringFromTable()); if (std::error_code EC = CalledFunction.getError()) @@ -513,11 +531,11 @@ if (std::error_code EC = CalledFunctionSamples.getError()) return EC; - FProfile.addCalledTargetSamples(*LineOffset, *Discriminator, + FProfile.addCalledTargetSamples(*LineOffset, DiscriminatorVal, *CalledFunction, *CalledFunctionSamples); } - FProfile.addBodySamples(*LineOffset, *Discriminator, *NumSamples); + FProfile.addBodySamples(*LineOffset, DiscriminatorVal, *NumSamples); } // Read all the samples for inlined function calls. @@ -538,8 +556,11 @@ if (std::error_code EC = FName.getError()) return EC; + // Here we handle FS discriminators: + uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask(); + FunctionSamples &CalleeProfile = FProfile.functionSamplesAt( - LineLocation(*LineOffset, *Discriminator))[std::string(*FName)]; + LineLocation(*LineOffset, DiscriminatorVal))[std::string(*FName)]; CalleeProfile.setName(*FName); if (std::error_code EC = readProfile(CalleeProfile)) return EC; @@ -575,6 +596,7 @@ } std::error_code SampleProfileReaderBinary::readImpl() { + ProfileIsFS = ProfileIsFSDisciminator; while (!at_eof()) { if (std::error_code EC = readFuncProfile(Data)) return EC; @@ -595,6 +617,8 @@ Summary->setPartialProfile(true); if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext)) FunctionSamples::ProfileIsCS = ProfileIsCS = true; + if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator)) + FunctionSamples::ProfileIsFS = ProfileIsFS = true; break; case SecNameTable: { FixedLengthMD5 = @@ -860,7 +884,7 @@ // Collect functions used by current module if the Reader has been // given a module. bool LoadFuncsToBeUsed = collectFuncsFromModule(); - + ProfileIsFS = ProfileIsFSDisciminator; std::vector OffsetsToUse; if (!LoadFuncsToBeUsed) { // load all the function profiles. @@ -1105,6 +1129,8 @@ Flags.append("partial,"); if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext)) Flags.append("context,"); + if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator)) + Flags.append("fs-discriminator,"); break; default: break; @@ -1521,6 +1547,7 @@ /// This format is generated by the Linux Perf conversion tool at /// https://github.com/google/autofdo. std::error_code SampleProfileReaderGCC::readImpl() { + assert(!ProfileIsFSDisciminator && "Gcc profiles not support FSDisciminator"); // Read the string table. if (std::error_code EC = readNameTable()) return EC; diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp --- a/llvm/lib/ProfileData/SampleProfWriter.cpp +++ b/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -252,6 +252,8 @@ addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFullContext); if (Type == SecFuncMetadata && FunctionSamples::ProfileIsCS) addSectionFlag(SecFuncMetadata, SecFuncMetadataFlags::SecFlagHasAttribute); + if (Type == SecProfSummary && FunctionSamples::ProfileIsFS) + addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFSDiscriminator); uint64_t SectionStart = markSectionStart(Type, LayoutIdx); switch (Type) { diff --git a/llvm/lib/Target/X86/X86InsertPrefetch.cpp b/llvm/lib/Target/X86/X86InsertPrefetch.cpp --- a/llvm/lib/Target/X86/X86InsertPrefetch.cpp +++ b/llvm/lib/Target/X86/X86InsertPrefetch.cpp @@ -167,6 +167,7 @@ return false; } Reader = std::move(ReaderOrErr.get()); + Reader->setBaseDiscriminatorMask(); Reader->read(); return true; } diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -1769,6 +1769,7 @@ // set module before reading the profile so reader may be able to only // read the function profiles which are used by the current module. Reader->setModule(&M); + Reader->setBaseDiscriminatorMask(); if (std::error_code EC = Reader->read()) { std::string Msg = "profile reading failed: " + EC.message(); Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); diff --git a/llvm/test/Transforms/SampleProfile/Inputs/fsafdo.extbinary.afdo b/llvm/test/Transforms/SampleProfile/Inputs/fsafdo.extbinary.afdo new file mode 100644 index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@ for.cond1.preheader probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +for.cond1.preheader: + %j.012 = phi i32 [ 0, %entry ], [ %inc11, %if.end9.3 ] + %mul = mul nuw nsw i32 %j.012, 48 + %call = tail call i32 @bar(i32 %mul), !dbg !32 + %0 = and i32 %call, 1, !dbg !33 + %tobool.not = icmp eq i32 %0, 0, !dbg !33 + br i1 %tobool.not, label %if.end, label %if.then, !dbg !35 +; CHECK: edge for.cond1.preheader -> if.end probability is 0x3f6262b8 / 0x80000000 = 49.52% +; CHECK: edge for.cond1.preheader -> if.then probability is 0x409d9d48 / 0x80000000 = 50.48% + + +if.then: + %mul4 = shl nsw i32 %call, 1, !dbg !36 + tail call void @work(i32 %mul4), !dbg !37 + br label %if.end, !dbg !38 +; CHECK: edge if.then -> if.end probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +if.end: + %1 = and i32 %call, 3, !dbg !39 + %tobool6.not = icmp eq i32 %1, 0, !dbg !39 + br i1 %tobool6.not, label %if.end9, label %if.then7, !dbg !40 +; CHECK: edge if.end -> if.end9 probability is 0x22c6bac3 / 0x80000000 = 27.17% +; CHECK: edge if.end -> if.then7 probability is 0x5d39453d / 0x80000000 = 72.83% + + +if.then7: + %mul8 = mul nsw i32 %call, 3, !dbg !41 + tail call void @work(i32 %mul8), !dbg !42 + br label %if.end9, !dbg !43 +; CHECK: edge if.then7 -> if.end9 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +if.end9: + %add.1 = or i32 %mul, 1, !dbg !44 + %call.1 = tail call i32 @bar(i32 %add.1), !dbg !32 + %2 = and i32 %call.1, 1, !dbg !33 + %tobool.not.1 = icmp eq i32 %2, 0, !dbg !33 + br i1 %tobool.not.1, label %if.end.1, label %if.then.1, !dbg !35 +; CHECK: edge if.end9 -> if.end.1 probability is 0x3f6262b8 / 0x80000000 = 49.52% +; CHECK: edge if.end9 -> if.then.1 probability is 0x409d9d48 / 0x80000000 = 50.48% + +for.end12: + ret void, !dbg !45 + +if.then.1: + %mul4.1 = shl nsw i32 %call.1, 1, !dbg !36 + tail call void @work(i32 %mul4.1), !dbg !37 + br label %if.end.1, !dbg !38 +; CHECK: edge if.then.1 -> if.end.1 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +if.end.1: + %3 = and i32 %call.1, 3, !dbg !39 + %tobool6.not.1 = icmp eq i32 %3, 0, !dbg !39 + br i1 %tobool6.not.1, label %if.end9.1, label %if.then7.1, !dbg !40 +; CHECK: edge if.end.1 -> if.end9.1 probability is 0x22c6bac3 / 0x80000000 = 27.17% +; CHECK: edge if.end.1 -> if.then7.1 probability is 0x5d39453d / 0x80000000 = 72.83% + +if.then7.1: + %mul8.1 = mul nsw i32 %call.1, 3, !dbg !41 + tail call void @work(i32 %mul8.1), !dbg !42 + br label %if.end9.1, !dbg !43 +; CHECK: edge if.then7.1 -> if.end9.1 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +if.end9.1: + %add.2 = or i32 %mul, 2, !dbg !44 + %call.2 = tail call i32 @bar(i32 %add.2), !dbg !32 + %4 = and i32 %call.2, 1, !dbg !33 + %tobool.not.2 = icmp eq i32 %4, 0, !dbg !33 + br i1 %tobool.not.2, label %if.end.2, label %if.then.2, !dbg !35 +; CHECK: edge if.end9.1 -> if.end.2 probability is 0x3f6262b8 / 0x80000000 = 49.52% +; CHECK: edge if.end9.1 -> if.then.2 probability is 0x409d9d48 / 0x80000000 = 50.48% + +if.then.2: + %mul4.2 = shl nsw i32 %call.2, 1, !dbg !36 + tail call void @work(i32 %mul4.2), !dbg !37 + br label %if.end.2, !dbg !38 +; CHECK: edge if.then.2 -> if.end.2 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +if.end.2: + %5 = and i32 %call.2, 3, !dbg !39 + %tobool6.not.2 = icmp eq i32 %5, 0, !dbg !39 + br i1 %tobool6.not.2, label %if.end9.2, label %if.then7.2, !dbg !40 +; CHECK: edge if.end.2 -> if.end9.2 probability is 0x22c6bac3 / 0x80000000 = 27.17% +; CHECK: edge if.end.2 -> if.then7.2 probability is 0x5d39453d / 0x80000000 = 72.83% + +if.then7.2: + %mul8.2 = mul nsw i32 %call.2, 3, !dbg !41 + tail call void @work(i32 %mul8.2), !dbg !42 + br label %if.end9.2, !dbg !43 +; CHECK: edge if.then7.2 -> if.end9.2 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +if.end9.2: + %add.3 = or i32 %mul, 3, !dbg !44 + %call.3 = tail call i32 @bar(i32 %add.3), !dbg !32 + %6 = and i32 %call.3, 1, !dbg !33 + %tobool.not.3 = icmp eq i32 %6, 0, !dbg !33 + br i1 %tobool.not.3, label %if.end.3, label %if.then.3, !dbg !35 +; CHECK: edge if.end9.2 -> if.end.3 probability is 0x3f6262b8 / 0x80000000 = 49.52% +; CHECK: edge if.end9.2 -> if.then.3 probability is 0x409d9d48 / 0x80000000 = 50.48% + +if.then.3: + %mul4.3 = shl nsw i32 %call.3, 1, !dbg !36 + tail call void @work(i32 %mul4.3), !dbg !37 + br label %if.end.3, !dbg !38 +; CHECK: edge if.then.3 -> if.end.3 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +if.end.3: + %7 = and i32 %call.3, 3, !dbg !39 + %tobool6.not.3 = icmp eq i32 %7, 0, !dbg !39 + br i1 %tobool6.not.3, label %if.end9.3, label %if.then7.3, !dbg !40 +; CHECK: edge if.end.3 -> if.end9.3 probability is 0x22c6bac3 / 0x80000000 = 27.17% +; CHECK: edge if.end.3 -> if.then7.3 probability is 0x5d39453d / 0x80000000 = 72.83% + +if.then7.3: + %mul8.3 = mul nsw i32 %call.3, 3, !dbg !41 + tail call void @work(i32 %mul8.3), !dbg !42 + br label %if.end9.3, !dbg !43 +; CHECK: edge if.then7.3 -> if.end9.3 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +if.end9.3: + %inc11 = add nuw nsw i32 %j.012, 1, !dbg !46 + %exitcond.not = icmp eq i32 %inc11, 48, !dbg !48 + br i1 %exitcond.not, label %for.end12, label %for.cond1.preheader, !dbg !30, !llvm.loop !49 +; CHECK: edge if.end9.3 -> for.end12 probability is 0x00834dd9 / 0x80000000 = 0.40% +; CHECK: edge if.end9.3 -> for.cond1.preheader probability is 0x7f7cb227 / 0x80000000 = 99.60% [HOT edge] +} + +define dso_local i32 @main() #3 !dbg !52 { +entry: + br label %for.body, !dbg !53 + +for.body: + %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + tail call void @foo(), !dbg !55 + %inc = add nuw nsw i32 %i.03, 1, !dbg !56 + %exitcond.not = icmp eq i32 %inc, 10000000, !dbg !58 + br i1 %exitcond.not, label %for.end, label %for.body, !dbg !53, !llvm.loop !60 + +for.end: + ret i32 0, !dbg !63 +} + + +attributes #0 = { noinline nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile"} +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { nofree noinline norecurse nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) +!1 = !DIFile(filename: "unroll.c", directory: "a/") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!7 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !2) +!9 = !DILocation(line: 4, column: 3, scope: !7) +!10 = !DILocation(line: 5, column: 5, scope: !7) +!11 = !{!12, !12, i64 0} +!12 = !{!"int", !13, i64 0} +!13 = !{!"omnipotent char", !14, i64 0} +!14 = !{!"Simple C/C++ TBAA"} +!15 = !DILocation(line: 6, column: 10, scope: !7) +!16 = !DILocation(line: 7, column: 1, scope: !7) +!17 = !DILocation(line: 6, column: 3, scope: !18) +!18 = !DILexicalBlockFile(scope: !7, file: !1, discriminator: 1) +!19 = distinct !DISubprogram(name: "work", scope: !1, file: !1, line: 10, type: !8, scopeLine: 10, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!20 = !DILocation(line: 11, column: 7, scope: !19) +!21 = !DILocation(line: 11, column: 11, scope: !22) +!22 = !DILexicalBlockFile(scope: !19, file: !1, discriminator: 1) +!23 = !DILocation(line: 11, column: 11, scope: !24) +!24 = !DILexicalBlockFile(scope: !19, file: !1, discriminator: 2) +!25 = !DILocation(line: 11, column: 7, scope: !26) +!26 = !DILexicalBlockFile(scope: !19, file: !1, discriminator: 3) +!27 = !DILocation(line: 0, scope: !22) +!28 = !DILocation(line: 15, column: 1, scope: !19) +!29 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 17, type: !8, scopeLine: 17, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!30 = !DILocation(line: 19, column: 3, scope: !31) +!31 = !DILexicalBlockFile(scope: !29, file: !1, discriminator: 2) +!32 = !DILocation(line: 21, column: 16, scope: !31) +!33 = !DILocation(line: 22, column: 14, scope: !34) +!34 = !DILexicalBlockFile(scope: !29, file: !1, discriminator: 1) +!35 = !DILocation(line: 22, column: 11, scope: !31) +!36 = !DILocation(line: 23, column: 16, scope: !29) +!37 = !DILocation(line: 23, column: 9, scope: !34) +!38 = !DILocation(line: 23, column: 9, scope: !31) +!39 = !DILocation(line: 24, column: 14, scope: !34) +!40 = !DILocation(line: 24, column: 11, scope: !31) +!41 = !DILocation(line: 25, column: 16, scope: !29) +!42 = !DILocation(line: 25, column: 9, scope: !34) +!43 = !DILocation(line: 25, column: 9, scope: !31) +!44 = !DILocation(line: 21, column: 21, scope: !34) +!45 = !DILocation(line: 27, column: 1, scope: !29) +!46 = !DILocation(line: 19, column: 24, scope: !47) +!47 = !DILexicalBlockFile(scope: !29, file: !1, discriminator: 3) +!48 = !DILocation(line: 19, column: 17, scope: !34) +!49 = distinct !{!49, !50, !51} +!50 = !DILocation(line: 19, column: 3, scope: !29) +!51 = !DILocation(line: 26, column: 3, scope: !29) +!52 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 29, type: !8, scopeLine: 29, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!53 = !DILocation(line: 31, column: 3, scope: !54) +!54 = !DILexicalBlockFile(scope: !52, file: !1, discriminator: 2) +!55 = !DILocation(line: 32, column: 5, scope: !52) +!56 = !DILocation(line: 31, column: 30, scope: !57) +!57 = !DILexicalBlockFile(scope: !52, file: !1, discriminator: 3) +!58 = !DILocation(line: 31, column: 17, scope: !59) +!59 = !DILexicalBlockFile(scope: !52, file: !1, discriminator: 1) +!60 = distinct !{!60, !61, !62} +!61 = !DILocation(line: 31, column: 3, scope: !52) +!62 = !DILocation(line: 33, column: 3, scope: !52) +!63 = !DILocation(line: 34, column: 1, scope: !52) diff --git a/llvm/unittests/ProfileData/SampleProfTest.cpp b/llvm/unittests/ProfileData/SampleProfTest.cpp --- a/llvm/unittests/ProfileData/SampleProfTest.cpp +++ b/llvm/unittests/ProfileData/SampleProfTest.cpp @@ -9,6 +9,7 @@ #include "llvm/ProfileData/SampleProf.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" @@ -61,6 +62,7 @@ ASSERT_TRUE(NoError(ReaderOrErr.getError())); Reader = std::move(ReaderOrErr.get()); Reader->setModule(&M); + Reader->setBaseDiscriminatorMask(); } TempFile createRemapFile() {