Index: llvm/include/llvm/IR/DebugInfoMetadata.h =================================================================== --- llvm/include/llvm/IR/DebugInfoMetadata.h +++ llvm/include/llvm/IR/DebugInfoMetadata.h @@ -1599,254 +1599,6 @@ } }; -/// Debug location. -/// -/// A debug location in source code, used for debug info and otherwise. -class DILocation : public MDNode { - friend class LLVMContextImpl; - friend class MDNode; - - DILocation(LLVMContext &C, StorageType Storage, unsigned Line, - unsigned Column, ArrayRef MDs, bool ImplicitCode); - ~DILocation() { dropAllReferences(); } - - static DILocation *getImpl(LLVMContext &Context, unsigned Line, - unsigned Column, Metadata *Scope, - Metadata *InlinedAt, bool ImplicitCode, - StorageType Storage, bool ShouldCreate = true); - static DILocation *getImpl(LLVMContext &Context, unsigned Line, - unsigned Column, DILocalScope *Scope, - DILocation *InlinedAt, bool ImplicitCode, - StorageType Storage, bool ShouldCreate = true) { - return getImpl(Context, Line, Column, static_cast(Scope), - static_cast(InlinedAt), ImplicitCode, Storage, - ShouldCreate); - } - - TempDILocation cloneImpl() const { - // Get the raw scope/inlinedAt since it is possible to invoke this on - // a DILocation containing temporary metadata. - return getTemporary(getContext(), getLine(), getColumn(), getRawScope(), - getRawInlinedAt(), isImplicitCode()); - } - -public: - // Disallow replacing operands. - void replaceOperandWith(unsigned I, Metadata *New) = delete; - - DEFINE_MDNODE_GET(DILocation, - (unsigned Line, unsigned Column, Metadata *Scope, - Metadata *InlinedAt = nullptr, bool ImplicitCode = false), - (Line, Column, Scope, InlinedAt, ImplicitCode)) - DEFINE_MDNODE_GET(DILocation, - (unsigned Line, unsigned Column, DILocalScope *Scope, - DILocation *InlinedAt = nullptr, - bool ImplicitCode = false), - (Line, Column, Scope, InlinedAt, ImplicitCode)) - - /// Return a (temporary) clone of this. - TempDILocation clone() const { return cloneImpl(); } - - unsigned getLine() const { return SubclassData32; } - unsigned getColumn() const { return SubclassData16; } - DILocalScope *getScope() const { return cast(getRawScope()); } - - DILocation *getInlinedAt() const { - return cast_or_null(getRawInlinedAt()); - } - - /// Check if the location corresponds to an implicit code. - /// When the ImplicitCode flag is true, it means that the Instruction - /// with this DILocation has been added by the front-end but it hasn't been - /// written explicitly by the user (e.g. cleanup stuff in C++ put on a closing - /// bracket). It's useful for code coverage to not show a counter on "empty" - /// lines. - bool isImplicitCode() const { return SubclassData1; } - void setImplicitCode(bool ImplicitCode) { SubclassData1 = ImplicitCode; } - - DIFile *getFile() const { return getScope()->getFile(); } - StringRef getFilename() const { return getScope()->getFilename(); } - StringRef getDirectory() const { return getScope()->getDirectory(); } - std::optional getSource() const { return getScope()->getSource(); } - - /// Get the scope where this is inlined. - /// - /// Walk through \a getInlinedAt() and return \a getScope() from the deepest - /// location. - DILocalScope *getInlinedAtScope() const { - if (auto *IA = getInlinedAt()) - return IA->getInlinedAtScope(); - return getScope(); - } - - /// Get the DWARF discriminator. - /// - /// DWARF discriminators distinguish identical file locations between - /// instructions that are on different basic blocks. - /// - /// There are 3 components stored in discriminator, from lower bits: - /// - /// Base discriminator: assigned by AddDiscriminators pass to identify IRs - /// that are defined by the same source line, but - /// different basic blocks. - /// Duplication factor: assigned by optimizations that will scale down - /// the execution frequency of the original IR. - /// Copy Identifier: assigned by optimizations that clones the IR. - /// Each copy of the IR will be assigned an identifier. - /// - /// Encoding: - /// - /// The above 3 components are encoded into a 32bit unsigned integer in - /// order. If the lowest bit is 1, the current component is empty, and the - /// next component will start in the next bit. Otherwise, the current - /// component is non-empty, and its content starts in the next bit. The - /// value of each components is either 5 bit or 12 bit: if the 7th bit - /// is 0, the bit 2~6 (5 bits) are used to represent the component; if the - /// 7th bit is 1, the bit 2~6 (5 bits) and 8~14 (7 bits) are combined to - /// represent the component. Thus, the number of bits used for a component - /// is either 0 (if it and all the next components are empty); 1 - if it is - /// empty; 7 - if its value is up to and including 0x1f (lsb and msb are both - /// 0); or 14, if its value is up to and including 0x1ff. Note that the last - /// component is also capped at 0x1ff, even in the case when both first - /// components are 0, and we'd technically have 29 bits available. - /// - /// For precise control over the data being encoded in the discriminator, - /// use encodeDiscriminator/decodeDiscriminator. - - inline unsigned getDiscriminator() const; - - // For the regular discriminator, it stands for all empty components if all - // the lowest 3 bits are non-zero and all higher 29 bits are unused(zero by - // default). Here we fully leverage the higher 29 bits for pseudo probe use. - // This is the format: - // [2:0] - 0x7 - // [31:3] - pseudo probe fields guaranteed to be non-zero as a whole - // So if the lower 3 bits is non-zero and the others has at least one - // non-zero bit, it guarantees to be a pseudo probe discriminator - inline static bool isPseudoProbeDiscriminator(unsigned Discriminator) { - return ((Discriminator & 0x7) == 0x7) && (Discriminator & 0xFFFFFFF8); - } - - /// Returns a new DILocation with updated \p Discriminator. - inline const DILocation *cloneWithDiscriminator(unsigned Discriminator) const; - - /// Returns a new DILocation with updated base discriminator \p BD. Only the - /// base discriminator is set in the new DILocation, the other encoded values - /// are elided. - /// If the discriminator cannot be encoded, the function returns std::nullopt. - inline std::optional - cloneWithBaseDiscriminator(unsigned BD) const; - - /// Returns the duplication factor stored in the discriminator, or 1 if no - /// duplication factor (or 0) is encoded. - inline unsigned getDuplicationFactor() const; - - /// Returns the copy identifier stored in the discriminator. - inline unsigned getCopyIdentifier() const; - - /// Returns the base discriminator stored in the discriminator. - inline unsigned getBaseDiscriminator() const; - - /// Returns a new DILocation with duplication factor \p DF * current - /// duplication factor encoded in the discriminator. The current duplication - /// factor is as defined by getDuplicationFactor(). - /// Returns std::nullopt if encoding failed. - inline std::optional - cloneByMultiplyingDuplicationFactor(unsigned DF) const; - - /// When two instructions are combined into a single instruction we also - /// need to combine the original locations into a single location. - /// When the locations are the same we can use either location. - /// When they differ, we need a third location which is distinct from either. - /// If they share a common scope, use this scope and compare the line/column - /// pair of the locations with the common scope: - /// * if both match, keep the line and column; - /// * if only the line number matches, keep the line and set the column as 0; - /// * otherwise set line and column as 0. - /// If they do not share a common scope the location is ambiguous and can't be - /// represented in a line entry. In this case, set line and column as 0 and - /// use the scope of any location. - /// - /// \p LocA \p LocB: The locations to be merged. - static const DILocation *getMergedLocation(const DILocation *LocA, - const DILocation *LocB); - - /// Try to combine the vector of locations passed as input in a single one. - /// This function applies getMergedLocation() repeatedly left-to-right. - /// - /// \p Locs: The locations to be merged. - static const DILocation * - getMergedLocations(ArrayRef Locs); - - /// Return the masked discriminator value for an input discrimnator value D - /// (i.e. zero out the (B+1)-th and above bits for D (B is 0-base). - // Example: an input of (0x1FF, 7) returns 0xFF. - static unsigned getMaskedDiscriminator(unsigned D, unsigned B) { - return (D & getN1Bits(B)); - } - - /// Return the bits used for base discriminators. - static unsigned getBaseDiscriminatorBits() { return getBaseFSBitEnd(); } - - /// Returns the base discriminator for a given encoded discriminator \p D. - static unsigned - getBaseDiscriminatorFromDiscriminator(unsigned D, - bool IsFSDiscriminator = false) { - if (IsFSDiscriminator) - return getMaskedDiscriminator(D, getBaseDiscriminatorBits()); - return getUnsignedFromPrefixEncoding(D); - } - - /// Raw encoding of the discriminator. APIs such as cloneWithDuplicationFactor - /// have certain special case behavior (e.g. treating empty duplication factor - /// as the value '1'). - /// This API, in conjunction with cloneWithDiscriminator, may be used to - /// encode the raw values provided. - /// - /// \p BD: base discriminator - /// \p DF: duplication factor - /// \p CI: copy index - /// - /// The return is std::nullopt if the values cannot be encoded in 32 bits - - /// for example, values for BD or DF larger than 12 bits. Otherwise, the - /// return is the encoded value. - static std::optional encodeDiscriminator(unsigned BD, unsigned DF, - unsigned CI); - - /// Raw decoder for values in an encoded discriminator D. - static void decodeDiscriminator(unsigned D, unsigned &BD, unsigned &DF, - unsigned &CI); - - /// Returns the duplication factor for a given encoded discriminator \p D, or - /// 1 if no value or 0 is encoded. - static unsigned getDuplicationFactorFromDiscriminator(unsigned D) { - if (EnableFSDiscriminator) - return 1; - D = getNextComponentInDiscriminator(D); - unsigned Ret = getUnsignedFromPrefixEncoding(D); - if (Ret == 0) - return 1; - return Ret; - } - - /// Returns the copy identifier for a given encoded discriminator \p D. - static unsigned getCopyIdentifierFromDiscriminator(unsigned D) { - return getUnsignedFromPrefixEncoding( - getNextComponentInDiscriminator(getNextComponentInDiscriminator(D))); - } - - Metadata *getRawScope() const { return getOperand(0); } - Metadata *getRawInlinedAt() const { - if (getNumOperands() == 2) - return getOperand(1); - return nullptr; - } - - static bool classof(const Metadata *MD) { - return MD->getMetadataID() == DILocationKind; - } -}; - /// Subprogram description. class DISubprogram : public DILocalScope { friend class LLVMContextImpl; @@ -2115,6 +1867,266 @@ } }; +/// Debug location. +/// +/// A debug location in source code, used for debug info and otherwise. +class DILocation : public MDNode { + friend class LLVMContextImpl; + friend class MDNode; + + DILocation(LLVMContext &C, StorageType Storage, unsigned Line, + unsigned Column, ArrayRef MDs, bool ImplicitCode); + ~DILocation() { dropAllReferences(); } + + static DILocation *getImpl(LLVMContext &Context, unsigned Line, + unsigned Column, Metadata *Scope, + Metadata *InlinedAt, bool ImplicitCode, + StorageType Storage, bool ShouldCreate = true); + static DILocation *getImpl(LLVMContext &Context, unsigned Line, + unsigned Column, DILocalScope *Scope, + DILocation *InlinedAt, bool ImplicitCode, + StorageType Storage, bool ShouldCreate = true) { + return getImpl(Context, Line, Column, static_cast(Scope), + static_cast(InlinedAt), ImplicitCode, Storage, + ShouldCreate); + } + + TempDILocation cloneImpl() const { + // Get the raw scope/inlinedAt since it is possible to invoke this on + // a DILocation containing temporary metadata. + return getTemporary(getContext(), getLine(), getColumn(), getRawScope(), + getRawInlinedAt(), isImplicitCode()); + } + +public: + // Disallow replacing operands. + void replaceOperandWith(unsigned I, Metadata *New) = delete; + + DEFINE_MDNODE_GET(DILocation, + (unsigned Line, unsigned Column, Metadata *Scope, + Metadata *InlinedAt = nullptr, bool ImplicitCode = false), + (Line, Column, Scope, InlinedAt, ImplicitCode)) + DEFINE_MDNODE_GET(DILocation, + (unsigned Line, unsigned Column, DILocalScope *Scope, + DILocation *InlinedAt = nullptr, + bool ImplicitCode = false), + (Line, Column, Scope, InlinedAt, ImplicitCode)) + + /// Return a (temporary) clone of this. + TempDILocation clone() const { return cloneImpl(); } + + unsigned getLine() const { return SubclassData32; } + unsigned getColumn() const { return SubclassData16; } + DILocalScope *getScope() const { return cast(getRawScope()); } + + /// Return the linkage name of Subprogram. If the linkage name is empty, + /// return scope name (the demangled name). + const StringRef getName() const { + DISubprogram *SP = getScope()->getSubprogram(); + if (!SP) + return ""; + auto Name = SP->getLinkageName(); + if (!Name.empty()) + return Name; + return SP->getName(); + } + + DILocation *getInlinedAt() const { + return cast_or_null(getRawInlinedAt()); + } + + /// Check if the location corresponds to an implicit code. + /// When the ImplicitCode flag is true, it means that the Instruction + /// with this DILocation has been added by the front-end but it hasn't been + /// written explicitly by the user (e.g. cleanup stuff in C++ put on a closing + /// bracket). It's useful for code coverage to not show a counter on "empty" + /// lines. + bool isImplicitCode() const { return SubclassData1; } + void setImplicitCode(bool ImplicitCode) { SubclassData1 = ImplicitCode; } + + DIFile *getFile() const { return getScope()->getFile(); } + StringRef getFilename() const { return getScope()->getFilename(); } + StringRef getDirectory() const { return getScope()->getDirectory(); } + std::optional getSource() const { return getScope()->getSource(); } + + /// Get the scope where this is inlined. + /// + /// Walk through \a getInlinedAt() and return \a getScope() from the deepest + /// location. + DILocalScope *getInlinedAtScope() const { + if (auto *IA = getInlinedAt()) + return IA->getInlinedAtScope(); + return getScope(); + } + + /// Get the DWARF discriminator. + /// + /// DWARF discriminators distinguish identical file locations between + /// instructions that are on different basic blocks. + /// + /// There are 3 components stored in discriminator, from lower bits: + /// + /// Base discriminator: assigned by AddDiscriminators pass to identify IRs + /// that are defined by the same source line, but + /// different basic blocks. + /// Duplication factor: assigned by optimizations that will scale down + /// the execution frequency of the original IR. + /// Copy Identifier: assigned by optimizations that clones the IR. + /// Each copy of the IR will be assigned an identifier. + /// + /// Encoding: + /// + /// The above 3 components are encoded into a 32bit unsigned integer in + /// order. If the lowest bit is 1, the current component is empty, and the + /// next component will start in the next bit. Otherwise, the current + /// component is non-empty, and its content starts in the next bit. The + /// value of each components is either 5 bit or 12 bit: if the 7th bit + /// is 0, the bit 2~6 (5 bits) are used to represent the component; if the + /// 7th bit is 1, the bit 2~6 (5 bits) and 8~14 (7 bits) are combined to + /// represent the component. Thus, the number of bits used for a component + /// is either 0 (if it and all the next components are empty); 1 - if it is + /// empty; 7 - if its value is up to and including 0x1f (lsb and msb are both + /// 0); or 14, if its value is up to and including 0x1ff. Note that the last + /// component is also capped at 0x1ff, even in the case when both first + /// components are 0, and we'd technically have 29 bits available. + /// + /// For precise control over the data being encoded in the discriminator, + /// use encodeDiscriminator/decodeDiscriminator. + + inline unsigned getDiscriminator() const; + + // For the regular discriminator, it stands for all empty components if all + // the lowest 3 bits are non-zero and all higher 29 bits are unused(zero by + // default). Here we fully leverage the higher 29 bits for pseudo probe use. + // This is the format: + // [2:0] - 0x7 + // [31:3] - pseudo probe fields guaranteed to be non-zero as a whole + // So if the lower 3 bits is non-zero and the others has at least one + // non-zero bit, it guarantees to be a pseudo probe discriminator + inline static bool isPseudoProbeDiscriminator(unsigned Discriminator) { + return ((Discriminator & 0x7) == 0x7) && (Discriminator & 0xFFFFFFF8); + } + + /// Returns a new DILocation with updated \p Discriminator. + inline const DILocation *cloneWithDiscriminator(unsigned Discriminator) const; + + /// Returns a new DILocation with updated base discriminator \p BD. Only the + /// base discriminator is set in the new DILocation, the other encoded values + /// are elided. + /// If the discriminator cannot be encoded, the function returns std::nullopt. + inline std::optional + cloneWithBaseDiscriminator(unsigned BD) const; + + /// Returns the duplication factor stored in the discriminator, or 1 if no + /// duplication factor (or 0) is encoded. + inline unsigned getDuplicationFactor() const; + + /// Returns the copy identifier stored in the discriminator. + inline unsigned getCopyIdentifier() const; + + /// Returns the base discriminator stored in the discriminator. + inline unsigned getBaseDiscriminator() const; + + /// Returns a new DILocation with duplication factor \p DF * current + /// duplication factor encoded in the discriminator. The current duplication + /// factor is as defined by getDuplicationFactor(). + /// Returns std::nullopt if encoding failed. + inline std::optional + cloneByMultiplyingDuplicationFactor(unsigned DF) const; + + /// When two instructions are combined into a single instruction we also + /// need to combine the original locations into a single location. + /// When the locations are the same we can use either location. + /// When they differ, we need a third location which is distinct from either. + /// If they share a common scope, use this scope and compare the line/column + /// pair of the locations with the common scope: + /// * if both match, keep the line and column; + /// * if only the line number matches, keep the line and set the column as 0; + /// * otherwise set line and column as 0. + /// If they do not share a common scope the location is ambiguous and can't be + /// represented in a line entry. In this case, set line and column as 0 and + /// use the scope of any location. + /// + /// \p LocA \p LocB: The locations to be merged. + static const DILocation *getMergedLocation(const DILocation *LocA, + const DILocation *LocB); + + /// Try to combine the vector of locations passed as input in a single one. + /// This function applies getMergedLocation() repeatedly left-to-right. + /// + /// \p Locs: The locations to be merged. + static const DILocation * + getMergedLocations(ArrayRef Locs); + + /// Return the masked discriminator value for an input discrimnator value D + /// (i.e. zero out the (B+1)-th and above bits for D (B is 0-base). + // Example: an input of (0x1FF, 7) returns 0xFF. + static unsigned getMaskedDiscriminator(unsigned D, unsigned B) { + return (D & getN1Bits(B)); + } + + /// Return the bits used for base discriminators. + static unsigned getBaseDiscriminatorBits() { return getBaseFSBitEnd(); } + + /// Returns the base discriminator for a given encoded discriminator \p D. + static unsigned + getBaseDiscriminatorFromDiscriminator(unsigned D, + bool IsFSDiscriminator = false) { + if (IsFSDiscriminator) + return getMaskedDiscriminator(D, getBaseDiscriminatorBits()); + return getUnsignedFromPrefixEncoding(D); + } + + /// Raw encoding of the discriminator. APIs such as cloneWithDuplicationFactor + /// have certain special case behavior (e.g. treating empty duplication factor + /// as the value '1'). + /// This API, in conjunction with cloneWithDiscriminator, may be used to + /// encode the raw values provided. + /// + /// \p BD: base discriminator + /// \p DF: duplication factor + /// \p CI: copy index + /// + /// The return is std::nullopt if the values cannot be encoded in 32 bits - + /// for example, values for BD or DF larger than 12 bits. Otherwise, the + /// return is the encoded value. + static std::optional encodeDiscriminator(unsigned BD, unsigned DF, + unsigned CI); + + /// Raw decoder for values in an encoded discriminator D. + static void decodeDiscriminator(unsigned D, unsigned &BD, unsigned &DF, + unsigned &CI); + + /// Returns the duplication factor for a given encoded discriminator \p D, or + /// 1 if no value or 0 is encoded. + static unsigned getDuplicationFactorFromDiscriminator(unsigned D) { + if (EnableFSDiscriminator) + return 1; + D = getNextComponentInDiscriminator(D); + unsigned Ret = getUnsignedFromPrefixEncoding(D); + if (Ret == 0) + return 1; + return Ret; + } + + /// Returns the copy identifier for a given encoded discriminator \p D. + static unsigned getCopyIdentifierFromDiscriminator(unsigned D) { + return getUnsignedFromPrefixEncoding( + getNextComponentInDiscriminator(getNextComponentInDiscriminator(D))); + } + + Metadata *getRawScope() const { return getOperand(0); } + Metadata *getRawInlinedAt() const { + if (getNumOperands() == 2) + return getOperand(1); + return nullptr; + } + + static bool classof(const Metadata *MD) { + return MD->getMetadataID() == DILocationKind; + } +}; + class DILexicalBlockBase : public DILocalScope { protected: DILexicalBlockBase(LLVMContext &C, unsigned ID, StorageType Storage, Index: llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp =================================================================== --- llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp +++ llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp @@ -32,11 +32,7 @@ SmallVector ReversedInlineStack; auto *InlinedAt = DebugLoc ? DebugLoc->getInlinedAt() : nullptr; while (InlinedAt) { - const DISubprogram *SP = InlinedAt->getScope()->getSubprogram(); - // Use linkage name for C++ if possible. - auto Name = SP->getLinkageName(); - if (Name.empty()) - Name = SP->getName(); + auto Name = InlinedAt->getName(); // Use caching to avoid redundant md5 computation for build speed. uint64_t &CallerGuid = NameGuidMap[Name]; if (!CallerGuid) Index: llvm/lib/CodeGen/MIRFSDiscriminator.cpp =================================================================== --- llvm/lib/CodeGen/MIRFSDiscriminator.cpp +++ llvm/lib/CodeGen/MIRFSDiscriminator.cpp @@ -30,6 +30,13 @@ #define DEBUG_TYPE "mirfs-discriminators" +// TODO(xur): Remove this option and related code once we make true as the +// default. +cl::opt ImprovedFSDiscriminator( + "improved-fs-discriminator", cl::Hidden, cl::init(false), + cl::desc("New FS discriminators encoding (incompatible with the original " + "encoding)")); + char MIRAddFSDiscriminators::ID = 0; INITIALIZE_PASS(MIRAddFSDiscriminators, DEBUG_TYPE, @@ -42,11 +49,12 @@ return new MIRAddFSDiscriminators(P); } +// TODO(xur): Remove this once we switch to ImprovedFSDiscriminator. // Compute a hash value using debug line number, and the line numbers from the // inline stack. -static uint64_t getCallStackHash(const MachineBasicBlock &BB, - const MachineInstr &MI, - const DILocation *DIL) { +static uint64_t getCallStackHashV0(const MachineBasicBlock &BB, + const MachineInstr &MI, + const DILocation *DIL) { auto updateHash = [](const StringRef &Str) -> uint64_t { if (Str.empty()) return 0; @@ -62,6 +70,20 @@ return Ret; } +static uint64_t getCallStackHash(const DILocation *DIL) { + auto updateHash = [](const StringRef &Str) -> uint64_t { + if (Str.empty()) + return 0; + return MD5Hash(Str); + }; + uint64_t Ret = 0; + for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) { + Ret ^= updateHash(std::to_string(DIL->getLine())); + Ret ^= updateHash(DIL->getName()); + } + return Ret; +} + // Traverse the CFG and assign FD discriminators. If two instructions // have the same lineno and discriminator, but residing in different BBs, // the latter instruction will get a new discriminator value. The new @@ -74,7 +96,8 @@ return false; bool Changed = false; - using LocationDiscriminator = std::tuple; + using LocationDiscriminator = + std::tuple; using BBSet = DenseSet; using LocationDiscriminatorBBMap = DenseMap; using LocationDiscriminatorCurrPassMap = @@ -84,7 +107,12 @@ LocationDiscriminatorCurrPassMap LDCM; // Mask of discriminators before this pass. - unsigned BitMaskBefore = getN1Bits(LowBit); + // TODO(xur): simplify this once we switch to ImprovedFSDiscriminator. + unsigned LowBitTemp = LowBit; + assert(LowBit > 0 && "LowBit in FSDiscriminator cannot be 0"); + if (ImprovedFSDiscriminator) + LowBitTemp -= 1; + unsigned BitMaskBefore = getN1Bits(LowBitTemp); // Mask of discriminators including this pass. unsigned BitMaskNow = getN1Bits(HighBit); // Mask of discriminators for bits specific to this pass. @@ -92,9 +120,28 @@ unsigned NumNewD = 0; LLVM_DEBUG(dbgs() << "MIRAddFSDiscriminators working on Func: " - << MF.getFunction().getName() << "\n"); + << MF.getFunction().getName() << " Highbit=" << HighBit + << "\n"); + + auto BBSize = [](const MachineBasicBlock &BB) { + int Size = 0; + for (const MachineInstr &I : BB) { + if (ImprovedFSDiscriminator && I.isMetaInstruction()) + continue; + Size++; + } + return Size; + }; + for (MachineBasicBlock &BB : MF) { + uint64_t BBSizeHash = 0; + if (ImprovedFSDiscriminator) + BBSizeHash = MD5Hash(std::to_string(BBSize(BB))); + for (MachineInstr &I : BB) { + if (ImprovedFSDiscriminator && I.isMetaInstruction()) { + continue; + } const DILocation *DIL = I.getDebugLoc().get(); if (!DIL) continue; @@ -102,7 +149,12 @@ if (LineNo == 0) continue; unsigned Discriminator = DIL->getDiscriminator(); - LocationDiscriminator LD{DIL->getFilename(), LineNo, Discriminator}; + uint64_t CallStackHashVal = 0; + if (ImprovedFSDiscriminator) + CallStackHashVal = getCallStackHash(DIL); + + LocationDiscriminator LD{DIL->getFilename(), LineNo, Discriminator, + CallStackHashVal}; auto &BBMap = LDBM[LD]; auto R = BBMap.insert(&BB); if (BBMap.size() == 1) @@ -111,7 +163,10 @@ unsigned DiscriminatorCurrPass; DiscriminatorCurrPass = R.second ? ++LDCM[LD] : LDCM[LD]; DiscriminatorCurrPass = DiscriminatorCurrPass << LowBit; - DiscriminatorCurrPass += getCallStackHash(BB, I, DIL); + if (ImprovedFSDiscriminator) + DiscriminatorCurrPass += BBSizeHash; + else + DiscriminatorCurrPass += getCallStackHashV0(BB, I, DIL); DiscriminatorCurrPass &= BitMaskThisPass; unsigned NewD = Discriminator | DiscriminatorCurrPass; const auto *const NewDIL = DIL->cloneWithDiscriminator(NewD); Index: llvm/lib/CodeGen/MIRSampleProfile.cpp =================================================================== --- llvm/lib/CodeGen/MIRSampleProfile.cpp +++ llvm/lib/CodeGen/MIRSampleProfile.cpp @@ -58,6 +58,7 @@ cl::init(false), cl::desc("View BFI after MIR loader")); +extern cl::opt ImprovedFSDiscriminator; char MIRProfileLoaderPass::ID = 0; INITIALIZE_PASS_BEGIN(MIRProfileLoaderPass, DEBUG_TYPE, @@ -165,6 +166,11 @@ unsigned HighBit; bool ProfileIsValid = true; + ErrorOr getInstWeight(const MachineInstr &MI) override { + if (ImprovedFSDiscriminator && MI.isMetaInstruction()) + return std::error_code(); + return getInstWeightImpl(MI); + } }; template <> Index: llvm/lib/CodeGen/PseudoProbeInserter.cpp =================================================================== --- llvm/lib/CodeGen/PseudoProbeInserter.cpp +++ llvm/lib/CodeGen/PseudoProbeInserter.cpp @@ -128,10 +128,7 @@ private: uint64_t getFuncGUID(Module *M, DILocation *DL) { - auto *SP = DL->getScope()->getSubprogram(); - auto Name = SP->getLinkageName(); - if (Name.empty()) - Name = SP->getName(); + auto Name = DL->getName(); return Function::getGUID(Name); } Index: llvm/lib/Transforms/IPO/SampleProfileProbe.cpp =================================================================== --- llvm/lib/Transforms/IPO/SampleProfileProbe.cpp +++ llvm/lib/Transforms/IPO/SampleProfileProbe.cpp @@ -56,11 +56,7 @@ while (InlinedAt) { Hash ^= MD5Hash(std::to_string(InlinedAt->getLine())); Hash ^= MD5Hash(std::to_string(InlinedAt->getColumn())); - const DISubprogram *SP = InlinedAt->getScope()->getSubprogram(); - // Use linkage name for C++ if possible. - auto Name = SP->getLinkageName(); - if (Name.empty()) - Name = SP->getName(); + auto Name = InlinedAt->getName(); Hash ^= MD5Hash(Name); InlinedAt = InlinedAt->getInlinedAt(); } Index: llvm/test/CodeGen/X86/Inputs/fsloader_v1.afdo =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/Inputs/fsloader_v1.afdo @@ -0,0 +1,35 @@ +work:42380966:1346190 + 1: 1246499 + 5: 1246499 +foo:28798256:4267 + 0: 4267 + 2.1: 255999 + 4: 264627 bar:250018 + 4.1792: 269485 bar:278102 + 4.6656: 280297 bar:280933 + 4.6912: 278916 bar:267752 + 5: 264627 + 5.1792: 269485 + 5.6656: 260670 + 5.6912: 278916 + 6: 11541 + 6.6912: 278916 work:284547 + 6.7168: 260670 work:249428 + 6.7424: 11541 + 7: 272442 + 7.6912: 283590 + 7.7168: 234082 + 7.7424: 279149 + 8: 11541 + 8.14848: 283590 work:305061 + 8.15104: 279149 work:281368 + 8.15360: 234082 work:225786 + 10: 4050 +bar:9504180:1076805 + 2: 1056020 + 3: 1056020 +main:20360:0 + 0: 0 + 2.1: 4045 + 3: 4156 foo:4267 + 5: 0 Index: llvm/test/CodeGen/X86/fsafdo_test1.ll =================================================================== --- llvm/test/CodeGen/X86/fsafdo_test1.ll +++ llvm/test/CodeGen/X86/fsafdo_test1.ll @@ -1,10 +1,13 @@ -; RUN: llc -enable-fs-discriminator < %s | FileCheck %s +; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=false < %s | FileCheck %s --check-prefix=V0 +; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=true < %s | FileCheck %s --check-prefix=V1 ; ; Check that fs-afdo discriminators are generated. ; CHECK: .loc 1 7 3 is_stmt 0 discriminator 2 # foo.c:7:3 ; ChECK: .loc 1 9 5 is_stmt 1 discriminator 2 # foo.c:9:5 -; CHECK: .loc 1 9 5 is_stmt 0 discriminator 11266 # foo.c:9:5 -; CHECK: .loc 1 7 3 is_stmt 1 discriminator 11266 # foo.c:7:3 +; V0: .loc 1 9 5 is_stmt 0 discriminator 11266 # foo.c:9:5 +; V0: .loc 1 7 3 is_stmt 1 discriminator 11266 # foo.c:7:3 +; V1: .loc 1 9 5 is_stmt 0 discriminator 2818 # foo.c:9:5 +; V1: .loc 1 7 3 is_stmt 1 discriminator 2818 # foo.c:7:3 ; Check that variable __llvm_fs_discriminator__ is generated. ; CHECK: .type __llvm_fs_discriminator__,@object # @__llvm_fs_discriminator__ ; CHECK: .section .rodata,"a",@progbits Index: llvm/test/CodeGen/X86/fsafdo_test2.ll =================================================================== --- llvm/test/CodeGen/X86/fsafdo_test2.ll +++ llvm/test/CodeGen/X86/fsafdo_test2.ll @@ -1,7 +1,10 @@ ; REQUIRES: asserts -; RUN: llc -enable-fs-discriminator < %s | FileCheck %s -; RUN: llvm-profdata merge --sample -profile-isfs -o %t.afdo %S/Inputs/fsloader.afdo -; RUN: llc -enable-fs-discriminator -fs-profile-file=%t.afdo -show-fs-branchprob -disable-ra-fsprofile-loader=false -disable-layout-fsprofile-loader=false < %s 2>&1 | FileCheck %s --check-prefix=LOADER +; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=false < %s | FileCheck %s --check-prefixes=V0,V01 +; RUN: llvm-profdata merge --sample -profile-isfs -o %t0.afdo %S/Inputs/fsloader.afdo +; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=false -fs-profile-file=%t0.afdo -show-fs-branchprob -disable-ra-fsprofile-loader=false -disable-layout-fsprofile-loader=false < %s 2>&1 | FileCheck %s --check-prefixes=LOADERV0,LOADER +; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=true < %s | FileCheck %s --check-prefixes=V1,V01 +; RUN: llvm-profdata merge --sample -profile-isfs -o %t1.afdo %S/Inputs/fsloader_v1.afdo +; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=true -fs-profile-file=%t1.afdo -show-fs-branchprob -disable-ra-fsprofile-loader=false -disable-layout-fsprofile-loader=false < %s 2>&1 | FileCheck %s --check-prefixes=LOADERV1,LOADER ; ;; ;; C source code for the test (compiler at -O3): @@ -41,18 +44,21 @@ ;; } ;; ;; Check that fs-afdo discriminators are generated. -; CHECK: .loc 1 23 9 is_stmt 0 discriminator 1 # unroll.c:23:9 -; CHECK: .loc 1 23 9 is_stmt 0 discriminator 3585 # unroll.c:23:9 -; CHECK: .loc 1 23 9 is_stmt 0 discriminator 8705 # unroll.c:23:9 -; CHECK: .loc 1 23 9 is_stmt 0 discriminator 4097 # unroll.c:23:9 +; V01: .loc 1 23 9 is_stmt 0 discriminator 1 # unroll.c:23:9 +; V0: .loc 1 23 9 is_stmt 0 discriminator 3585 # unroll.c:23:9 +; V0: .loc 1 23 9 is_stmt 0 discriminator 8705 # unroll.c:23:9 +; V0: .loc 1 23 9 is_stmt 0 discriminator 4097 # unroll.c:23:9 +; V1: .loc 1 23 9 is_stmt 0 discriminator 6913 # unroll.c:23:9 +; V1: .loc 1 23 9 is_stmt 0 discriminator 7169 # unroll.c:23:9 +; V1: .loc 1 23 9 is_stmt 0 discriminator 7425 # unroll.c:23:9 ;; ;; Check that variable __llvm_fs_discriminator__ is generated. -; CHECK: .type __llvm_fs_discriminator__,@object # @__llvm_fs_discriminator__ -; CHECK: .section .rodata,"a",@progbits -; CHECK: .weak __llvm_fs_discriminator__ -; CHECK: __llvm_fs_discriminator__: -; CHECK: .byte 1 -; CHECK: .size __llvm_fs_discriminator__, 1 +; V01: .type __llvm_fs_discriminator__,@object # @__llvm_fs_discriminator__ +; V01: .section .rodata,"a",@progbits +; V01: .weak __llvm_fs_discriminator__ +; V01: __llvm_fs_discriminator__: +; V01: .byte 1 +; V01: .size __llvm_fs_discriminator__, 1 ;; Check that new branch probs are generated. ; LOADER: Set branch fs prob: MBB (1 -> 3): unroll.c:22:11-->unroll.c:24:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x7aca7894 / 0x80000000 = 95.93% @@ -63,16 +69,19 @@ ; LOADER: Set branch fs prob: MBB (5 -> 7): unroll.c:22:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x7de3eed2 / 0x80000000 = 98.35% ; LOADER: Set branch fs prob: MBB (8 -> 10): unroll.c:24:11-->unroll.c:22:11 W=283590 0x30000000 / 0x80000000 = 37.50% --> 0x00000000 / 0x80000000 = 0.00% ; LOADER: Set branch fs prob: MBB (8 -> 9): unroll.c:24:11 W=283590 0x50000000 / 0x80000000 = 62.50% --> 0x80000000 / 0x80000000 = 100.00% -; LOADER: Set branch fs prob: MBB (10 -> 12): unroll.c:22:11-->unroll.c:24:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x7aca7894 / 0x80000000 = 95.93% -; LOADER: Set branch fs prob: MBB (10 -> 11): unroll.c:22:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x0535876c / 0x80000000 = 4.07% +; LOADERV0: Set branch fs prob: MBB (10 -> 12): unroll.c:22:11-->unroll.c:24:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x7aca7894 / 0x80000000 = 95.93% +; LOADERV1: Set branch fs prob: MBB (10 -> 12): unroll.c:22:11-->unroll.c:24:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x0a5856e1 / 0x80000000 = 8.08% +; LOADERV0: Set branch fs prob: MBB (10 -> 11): unroll.c:22:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x0535876c / 0x80000000 = 4.07% +; LOADERV1: Set branch fs prob: MBB (10 -> 11): unroll.c:22:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x75a7a91f / 0x80000000 = 91.92% ; LOADER: Set branch fs prob: MBB (12 -> 14): unroll.c:24:11-->unroll.c:22:11 W=283590 0x30000000 / 0x80000000 = 37.50% --> 0x02012507 / 0x80000000 = 1.57% ; LOADER: Set branch fs prob: MBB (12 -> 13): unroll.c:24:11 W=283590 0x50000000 / 0x80000000 = 62.50% --> 0x7dfedaf9 / 0x80000000 = 98.43% -; LOADER: Set branch fs prob: MBB (14 -> 16): unroll.c:22:11-->unroll.c:24:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x0a5856e1 / 0x80000000 = 8.08% -; LOADER: Set branch fs prob: MBB (14 -> 15): unroll.c:22:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x75a7a91f / 0x80000000 = 91.92% +; LOADERV0: Set branch fs prob: MBB (14 -> 16): unroll.c:22:11-->unroll.c:24:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x0a5856e1 / 0x80000000 = 8.08% +; LOADERV1: Set branch fs prob: MBB (14 -> 16): unroll.c:22:11-->unroll.c:24:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x7aca7894 / 0x80000000 = 95.93% +; LOADERV0: Set branch fs prob: MBB (14 -> 15): unroll.c:22:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x75a7a91f / 0x80000000 = 91.92% +; LOADERV1: Set branch fs prob: MBB (14 -> 15): unroll.c:22:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x0535876c / 0x80000000 = 4.07% ; LOADER: Set branch fs prob: MBB (16 -> 18): unroll.c:24:11-->unroll.c:19:3 W=283590 0x30000000 / 0x80000000 = 37.50% --> 0x16588166 / 0x80000000 = 17.46% ; LOADER: Set branch fs prob: MBB (16 -> 17): unroll.c:24:11 W=283590 0x50000000 / 0x80000000 = 62.50% --> 0x69a77e9a / 0x80000000 = 82.54% - target triple = "x86_64-unknown-linux-gnu" @sum = dso_local local_unnamed_addr global i32 0, align 4 Index: llvm/test/CodeGen/X86/fsafdo_test3.ll =================================================================== --- llvm/test/CodeGen/X86/fsafdo_test3.ll +++ llvm/test/CodeGen/X86/fsafdo_test3.ll @@ -1,5 +1,7 @@ -; RUN: llvm-profdata merge --sample -profile-isfs -o %t.afdo %S/Inputs/fsloader.afdo -; RUN: llc -enable-fs-discriminator -fs-profile-file=%t.afdo -disable-ra-fsprofile-loader=false -disable-layout-fsprofile-loader=false -print-machine-bfi -print-bfi-func-name=foo -print-before=fs-profile-loader -stop-after=fs-profile-loader < %s 2>&1 | FileCheck %s --check-prefix=BFI +; RUN: llvm-profdata merge --sample -profile-isfs -o %t0.afdo %S/Inputs/fsloader.afdo +; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=false -fs-profile-file=%t0.afdo -disable-ra-fsprofile-loader=false -disable-layout-fsprofile-loader=false -print-machine-bfi -print-bfi-func-name=foo -print-before=fs-profile-loader -stop-after=fs-profile-loader < %s 2>&1 | FileCheck %s --check-prefixes=BFI,BFIV0 +; RUN: llvm-profdata merge --sample -profile-isfs -o %t1.afdo %S/Inputs/fsloader_v1.afdo +; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=true -fs-profile-file=%t1.afdo -disable-ra-fsprofile-loader=false -disable-layout-fsprofile-loader=false -print-machine-bfi -print-bfi-func-name=foo -print-before=fs-profile-loader -stop-after=fs-profile-loader < %s 2>&1 | FileCheck %s --check-prefixes=BFI,BFIV1 ; ;; ;; C source code for the test (compiler at -O3): @@ -63,7 +65,7 @@ ; ; BFI: # *** IR Dump Before SampleFDO loader in MIR (fs-profile-loader) ***: ; BFI: # End machine code for function foo. -; +; BFI-EMPTY: ; BFI: block-frequency-info: foo ; BFI: - BB0[entry]: float = 1.0, int = 8, count = 4268 ; BFI: - BB1[for.cond1.preheader]: float = 66.446, int = 531, count = 283289 @@ -75,11 +77,13 @@ ; BFI: - BB7[if.end.1]: float = 66.446, int = 531, count = 283289 ; BFI: - BB8[if.then7.1]: float = 66.446, int = 531, count = 283289 ; BFI: - BB9[if.end9.1]: float = 66.446, int = 531, count = 283289 -; BFI: - BB10[if.then.2]: float = 2.7041, int = 21, count = 11204 +; BFIV0: - BB10[if.then.2]: float = 2.7041, int = 21, count = 11204 +; BFIV1: - BB10[if.then.2]: float = 61.075, int = 488, count = 260348 ; BFI: - BB11[if.end.2]: float = 66.446, int = 531, count = 283289 ; BFI: - BB12[if.then7.2]: float = 65.405, int = 523, count = 279021 ; BFI: - BB13[if.end9.2]: float = 66.446, int = 531, count = 283289 -; BFI: - BB14[if.then.3]: float = 61.075, int = 488, count = 260348 +; BFIV0: - BB14[if.then.3]: float = 61.075, int = 488, count = 260348 +; BFIV1: - BB14[if.then.3]: float = 2.7041, int = 21, count = 11204 ; BFI: - BB15[if.end.3]: float = 66.446, int = 531, count = 283289 ; BFI: - BB16[if.then7.3]: float = 54.846, int = 438, count = 233673 ; BFI: - BB17[if.end9.3]: float = 66.446, int = 531, count = 283289 Index: llvm/test/CodeGen/X86/fsafdo_test4.ll =================================================================== --- llvm/test/CodeGen/X86/fsafdo_test4.ll +++ llvm/test/CodeGen/X86/fsafdo_test4.ll @@ -1,10 +1,11 @@ -; RUN: llc -enable-fs-discriminator < %s | FileCheck %s +; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=false < %s | FileCheck %s +; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=true < %s | FileCheck %s ; ; Check that fs-afdo discriminators are NOT generated, as debugInfoForProfiling is false (not set). ; CHECK: .loc 1 7 3 is_stmt 0 discriminator 2 # foo.c:7:3 ; CHECK: .loc 1 9 5 is_stmt 1 discriminator 2 # foo.c:9:5 -; CHECK-NOT: .loc 1 9 5 is_stmt 0 discriminator 11266 # foo.c:9:5 -; CHECK-NOT: .loc 1 7 3 is_stmt 1 discriminator 11266 # foo.c:7:3 +; CHECK-NOT: .loc 1 9 5 is_stmt 0 discriminator +; CHECK-NOT: .loc 1 7 3 is_stmt 1 discriminator ; Check that variable __llvm_fs_discriminator__ is NOT generated. ; CHECK-NOT: __llvm_fs_discriminator__: