Index: llvm/include/llvm/CodeGen/FSAFDODiscriminator.h =================================================================== --- /dev/null +++ llvm/include/llvm/CodeGen/FSAFDODiscriminator.h @@ -0,0 +1,73 @@ +//===----- FSAFDODiscriminator.h: FS Discriminator Support --0---*- c++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the supporting functions for adding Flow Sensitive +// discriminators to the instruction debug information. With this, a cloned +// machine instruction in a different MachineBasicBlock will have it's own +// discriminator value. This is done in a AddFSDiscriminators pass. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_FSAFDODISCRIMINATOR_H +#define LLVM_CODEGEN_FSAFDODISCRIMINATOR_H + +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/ProfileData/SampleProf.h" +#include "llvm/ProfileData/SampleProfReader.h" + +#include + +namespace llvm { + +class AddFSDiscriminators : public MachineFunctionPass { + MachineFunction *MF; + unsigned LowBit; + unsigned HighBit; + +public: + static char ID; + /// FS bits that will be used in this pass (numbers are 0 based and + /// inclusive). + AddFSDiscriminators(unsigned LowBit = 0, unsigned HighBit = 0) + : MachineFunctionPass(ID), LowBit(LowBit), HighBit(HighBit) { + assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit"); + } + + /// getNumFSBBs() - Return the number of machine BBs that have FS samples. + unsigned getNumFSBBs(); + + /// getNumFSSamples() - Return the number of samples that have flow sensitive + /// values. + uint64_t getNumFSSamples(); + + /// getMachineFunction - Return the current machine function. + const MachineFunction *getMachineFunction() const { return MF; } + +private: + bool runOnMachineFunction(MachineFunction &) override; +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_FSAFDODISCRIMINATOR_H Index: llvm/include/llvm/CodeGen/Passes.h =================================================================== --- llvm/include/llvm/CodeGen/Passes.h +++ llvm/include/llvm/CodeGen/Passes.h @@ -165,6 +165,9 @@ /// This pass perform post-ra machine sink for COPY instructions. extern char &PostRAMachineSinkingID; + /// This pass adds flow sensitive discriminators. + extern char &AddFSDiscriminatorsID; + /// FastRegisterAllocation Pass - This pass register allocates as fast as /// possible. It is best suited for debug code where live ranges are short. /// @@ -487,6 +490,10 @@ /// Create IR Type Promotion pass. \see TypePromotion.cpp FunctionPass *createTypePromotionPass(); + /// Add Flow Sensitive Discriminators. + FunctionPass *createAddFSDiscriminatorsPass(unsigned LowBit, + unsigned HighBit); + /// Creates MIR Debugify pass. \see MachineDebugify.cpp ModulePass *createDebugifyMachineModulePass(); Index: llvm/include/llvm/IR/DebugInfoMetadata.h =================================================================== --- llvm/include/llvm/IR/DebugInfoMetadata.h +++ llvm/include/llvm/IR/DebugInfoMetadata.h @@ -26,6 +26,8 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FSAFDODiscriminator.h" #include #include #include @@ -60,6 +62,8 @@ namespace llvm { +extern cl::opt EnableFSDiscriminator; + class DITypeRefArray { const MDTuple *N = nullptr; @@ -1762,8 +1766,22 @@ static const DILocation *getMergedLocations(ArrayRef Locs); + /// Return the masked discriminator value for an input discrimnator value D + /// (i.e. zero out the B-th and above bits for D (B is 0-based and inclusive). + // Example: an input of (0x1FF, 7) returns 0xFF. + static unsigned getMaskedDiscriminator(unsigned D, unsigned B) { + if (B == 0) + return D; + return (D & getN1Bits(B)); + } + + /// Return the bits used for base discriminators. + static unsigned getBaseDiscriminatorBits() { return BASE_DIS_BIT_END; } + /// Returns the base discriminator for a given encoded discriminator \p D. static unsigned getBaseDiscriminatorFromDiscriminator(unsigned D) { + if (EnableFSDiscriminator) + return getMaskedDiscriminator(D, getBaseDiscriminatorBits()); return getUnsignedFromPrefixEncoding(D); } @@ -1785,6 +1803,8 @@ /// Returns the duplication factor for a given encoded discriminator \p D, or /// 1 if no value or 0 is encoded. static unsigned getDuplicationFactorFromDiscriminator(unsigned D) { + if (EnableFSDiscriminator) + return 1; D = getNextComponentInDiscriminator(D); unsigned Ret = getUnsignedFromPrefixEncoding(D); if (Ret == 0) @@ -2226,9 +2246,14 @@ Optional DILocation::cloneWithBaseDiscriminator(unsigned D) const { unsigned BD, DF, CI; - decodeDiscriminator(getDiscriminator(), BD, DF, CI); + if (EnableFSDiscriminator) + BD = getBaseDiscriminator(); + else + decodeDiscriminator(getDiscriminator(), BD, DF, CI); if (D == BD) return this; + if (EnableFSDiscriminator) + return cloneWithDiscriminator(D); if (Optional Encoded = encodeDiscriminator(D, DF, CI)) return cloneWithDiscriminator(*Encoded); return None; Index: llvm/include/llvm/InitializePasses.h =================================================================== --- llvm/include/llvm/InitializePasses.h +++ llvm/include/llvm/InitializePasses.h @@ -64,6 +64,7 @@ void initializeAAResultsWrapperPassPass(PassRegistry&); void initializeADCELegacyPassPass(PassRegistry&); void initializeAddDiscriminatorsLegacyPassPass(PassRegistry&); +void initializeAddFSDiscriminatorsPass(PassRegistry&); void initializeModuleAddressSanitizerLegacyPassPass(PassRegistry &); void initializeASanGlobalsMetadataWrapperPassPass(PassRegistry &); void initializeAddressSanitizerLegacyPassPass(PassRegistry &); Index: llvm/include/llvm/LTO/Config.h =================================================================== --- llvm/include/llvm/LTO/Config.h +++ llvm/include/llvm/LTO/Config.h @@ -171,6 +171,9 @@ bool ShouldDiscardValueNames = true; DiagnosticHandlerFunction DiagHandler; + /// Add FSAFDO discriminators. + bool AddFSDiscriminator = false; + /// If this field is set, LTO will write input file paths and symbol /// resolutions here in llvm-lto2 command line flag format. This can be /// used for testing and for running the LTO pipeline outside of the linker Index: llvm/include/llvm/ProfileData/SampleProf.h =================================================================== --- llvm/include/llvm/ProfileData/SampleProf.h +++ llvm/include/llvm/ProfileData/SampleProf.h @@ -190,7 +190,9 @@ SecFlagPartial = (1 << 0), /// SecFlagContext means this is context-sensitive profile for /// CSSPGO - SecFlagFullContext = (1 << 1) + SecFlagFullContext = (1 << 1), + /// SecFlagFSDiscriminator means this profile uses flow-sensitive discriminators. + SecFlagFSDiscriminator = (1 << 2) }; enum class SecFuncMetadataFlags : uint32_t { @@ -891,6 +893,9 @@ /// Whether the profile contains any ".__uniq." suffix in a name. static bool HasUniqSuffix; + /// If this profile uses flow sensitive discriminators. + static bool ProfileIsFS; + /// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for /// all the function symbols defined or declared in current module. DenseMap *GUIDToFuncNameMap = nullptr; Index: llvm/include/llvm/ProfileData/SampleProfReader.h =================================================================== --- llvm/include/llvm/ProfileData/SampleProfReader.h +++ llvm/include/llvm/ProfileData/SampleProfReader.h @@ -237,6 +237,7 @@ #include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FSAFDODiscriminator.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SymbolRemappingReader.h" #include @@ -345,6 +346,13 @@ SampleProfileFormat Format = SPF_None) : Profiles(0), Ctx(C), Buffer(std::move(B)), Format(Format) {} + void setDiscriminatorMaskedBitFrom(uint32_t B) { MaskedBitFrom = B; } + + inline uint32_t getDiscriminatorMask() const { + assert((MaskedBitFrom != 0) && "MaskedBitFrom is not set properly"); + return getN1Bits(MaskedBitFrom); + } + virtual ~SampleProfileReader() = default; /// Read and validate the file header. @@ -505,6 +513,9 @@ /// Number of context-sensitive profiles. uint32_t CSProfileCount = 0; + /// Whether function profiles use flow sensitive discriminators. + bool ProfileIsFS = false; + /// \brief The format of sample. SampleProfileFormat Format = SPF_None; @@ -512,6 +523,10 @@ /// is used by compiler. If SampleProfileReader is used by other /// tools which are not compiler, M is usually nullptr. const Module *M = nullptr; + + /// The samples in this class zeros out the discriminator bits higher + /// than bit MaskedBitFrom (0 based). The default is to keep all the bits. + unsigned MaskedBitFrom = 31; }; class SampleProfileReaderText : public SampleProfileReader { Index: llvm/include/llvm/Support/FSAFDODiscriminator.h =================================================================== --- /dev/null +++ llvm/include/llvm/Support/FSAFDODiscriminator.h @@ -0,0 +1,104 @@ +//===- llvm/Support/FSAFDODiscriminator.h -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the constants and utility functions used by the FSAFDO +// passes. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_FSAFDODISCRIMINATOR_H +#define LLVM_SUPPORT_FSAFDODISCRIMINATOR_H + +#define BASE_DIS_BIT_BEG 0 +#define BASE_DIS_BIT_END 7 + +#define PASS_1_DIS_BIT_BEG 8 +#define PASS_1_DIS_BIT_END 13 + +#define PASS_2_DIS_BIT_BEG 14 +#define PASS_2_DIS_BIT_END 19 + +#define PASS_3_DIS_BIT_BEG 20 +#define PASS_3_DIS_BIT_END 25 + +#define PASS_LAST_DIS_BIT_BEG 26 +#define PASS_LAST_DIS_BIT_END 31 + +// Set bits range [0 .. n] to 1. +static inline unsigned getN1Bits(int N) { + if (N >= 31) + return 0xFFFFFFFF; + return (1 << (N + 1)) - 1; +} + +#ifndef NDEBUG +// Given a discriminator value DiscriminatorVal, return the number of buckets +// it's in. +inline static unsigned getFSBucket(unsigned int DiscriminatorVal) { + unsigned int N = DiscriminatorVal; + if (N == 0) + return 0; + if (N & + (getN1Bits(PASS_LAST_DIS_BIT_BEG - 1) ^ getN1Bits(PASS_LAST_DIS_BIT_END))) + return 5; + if (N & (getN1Bits(PASS_3_DIS_BIT_BEG - 1) ^ getN1Bits(PASS_3_DIS_BIT_END))) + return 4; + if (N & (getN1Bits(PASS_2_DIS_BIT_BEG - 1) ^ getN1Bits(PASS_2_DIS_BIT_END))) + return 3; + if (N & (getN1Bits(PASS_1_DIS_BIT_BEG - 1) ^ getN1Bits(PASS_1_DIS_BIT_END))) + return 2; + return 1; +} + +inline unsigned getFSBucketVal(int LowBit, int HighBit, unsigned N) { + unsigned int V = N & getN1Bits(HighBit - LowBit); + return (V >> LowBit); +} + +inline unsigned getFSBucketVal(int B, unsigned N) { + switch (B) { + case 1: + return getFSBucketVal(BASE_DIS_BIT_BEG, BASE_DIS_BIT_END, N); + case 2: + return getFSBucketVal(PASS_1_DIS_BIT_BEG, PASS_1_DIS_BIT_END, N); + case 3: + return getFSBucketVal(PASS_2_DIS_BIT_BEG, PASS_2_DIS_BIT_END, N); + case 4: + return getFSBucketVal(PASS_3_DIS_BIT_BEG, PASS_3_DIS_BIT_END, N); + case 5: + return getFSBucketVal(PASS_LAST_DIS_BIT_BEG, PASS_LAST_DIS_BIT_END, N); + default: + llvm_unreachable("Wrong FSBucket Number"); + } +} + +inline void setFSBucketVal(int LowBit, int HighBit, unsigned Val, unsigned &N) { + unsigned int V = Val & getN1Bits(HighBit - LowBit); + V = V << LowBit; + N |= V; +} + +inline void setFSBucketVal(int B, unsigned Val, unsigned &N) { + switch (B) { + case 1: + return setFSBucketVal(BASE_DIS_BIT_BEG, BASE_DIS_BIT_END, Val, N); + case 2: + return setFSBucketVal(PASS_1_DIS_BIT_BEG, PASS_1_DIS_BIT_END, Val, N); + case 3: + return setFSBucketVal(PASS_2_DIS_BIT_BEG, PASS_2_DIS_BIT_END, Val, N); + case 4: + return setFSBucketVal(PASS_3_DIS_BIT_BEG, PASS_3_DIS_BIT_END, Val, N); + case 5: + return setFSBucketVal(PASS_LAST_DIS_BIT_BEG, PASS_LAST_DIS_BIT_END, Val, N); + default: + llvm_unreachable("Wrong FSBucket Number"); + } +} +#endif + +#endif /* LLVM_SUPPORT_FSAFDODISCRIMINATOR_H */ Index: llvm/lib/CodeGen/CMakeLists.txt =================================================================== --- llvm/lib/CodeGen/CMakeLists.txt +++ llvm/lib/CodeGen/CMakeLists.txt @@ -35,6 +35,7 @@ FinalizeISel.cpp FixupStatepointCallerSaved.cpp FuncletLayout.cpp + FSAFDODiscriminator.cpp GCMetadata.cpp GCMetadataPrinter.cpp GCRootLowering.cpp Index: llvm/lib/CodeGen/FSAFDODiscriminator.cpp =================================================================== --- /dev/null +++ llvm/lib/CodeGen/FSAFDODiscriminator.cpp @@ -0,0 +1,137 @@ +//===-------- FSAFDODiscriminator.cpp: Flow Sensitive Discriminator--------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides the implementation of a machine pass that adds the flow +// sensitive discriminator to the instruction debug information. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/FSAFDODiscriminator.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using ProfileCount = Function::ProfileCount; + +#define DEBUG_TYPE "fs-discriminators" + +char AddFSDiscriminators::ID = 0; + +INITIALIZE_PASS(AddFSDiscriminators, DEBUG_TYPE, + "Add Flow Sensitive Discriminators", + /* cfg = */ false, /* is_analysis = */ false) + +char &llvm::AddFSDiscriminatorsID = AddFSDiscriminators::ID; + +FunctionPass *llvm::createAddFSDiscriminatorsPass(unsigned LowBit, + unsigned HighBit) { + return new AddFSDiscriminators(LowBit, HighBit); +} + +// Compute a hash value using debug line number, and the line numbers from the +// inline stack. +static uint64_t getCallStackHash(const MachineBasicBlock &BB, + const MachineInstr &MI, + const DILocation *DIL) { + uint64_t Ret = MD5Hash(std::to_string(DIL->getLine())); + Ret ^= MD5Hash(BB.getName()); + Ret ^= MD5Hash(DIL->getScope()->getSubprogram()->getLinkageName()); + for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) { + Ret ^= MD5Hash(std::to_string(DIL->getLine())); + Ret ^= MD5Hash(DIL->getScope()->getSubprogram()->getLinkageName()); + } + return Ret; +} + +bool AddFSDiscriminators::runOnMachineFunction(MachineFunction &mf) { + if (!EnableFSDiscriminator) + return false; + + bool Changed = false; + using Location = std::pair; + using LocationDiscriminator = std::pair; + using BBSet = DenseSet; + using LocationDiscriminatorBBMap = DenseMap; + using LocationDiscriminatorCurrPassMap = + DenseMap; + + MF = &mf; + LocationDiscriminatorBBMap LDBM; + LocationDiscriminatorCurrPassMap LDCM; + + // Mask of discriminators before this pass. + unsigned BitMaskBefore = getN1Bits(LowBit); + // Mask of discriminators including this pass. + unsigned BitMaskNow = getN1Bits(HighBit); + // Mask of discriminators for bits specific to this pass. + unsigned BitMaskThisPass = BitMaskNow ^ BitMaskBefore; + unsigned NumNewD = 0; + + LLVM_DEBUG(dbgs() << "AddFSDiscriminators working on Func: " + << MF->getFunction().getName() << "\n"); + for (MachineBasicBlock &BB : *MF) { + for (MachineInstr &I : BB) { + const DILocation *DIL = I.getDebugLoc().get(); + if (!DIL) + continue; + unsigned LineNo = DIL->getLine(); + if (LineNo == 0) + continue; + Location L = std::make_pair(DIL->getFilename(), LineNo); + unsigned Discriminator = DIL->getDiscriminator(); + Discriminator &= BitMaskBefore; + LocationDiscriminator LD = std::make_pair(L, Discriminator); + auto &BBMap = LDBM[LD]; + auto R = BBMap.insert(&BB); + if (BBMap.size() == 1) + continue; + unsigned DiscriminatorCurrPass; + + DiscriminatorCurrPass = R.second ? ++LDCM[LD] : LDCM[LD]; + DiscriminatorCurrPass = DiscriminatorCurrPass << LowBit; + DiscriminatorCurrPass += getCallStackHash(BB, I, DIL); + DiscriminatorCurrPass &= BitMaskThisPass; + unsigned NewD = Discriminator | DiscriminatorCurrPass; + auto NewDIL = DIL->cloneWithDiscriminator(NewD); + if (!NewDIL) { + LLVM_DEBUG(dbgs() << "Could not encode discriminator: " + << DIL->getFilename() << ":" << DIL->getLine() << ":" + << DIL->getColumn() << ":" << Discriminator << " " + << I << "\n"); + } else { + I.setDebugLoc(NewDIL); + NumNewD++; + LLVM_DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":" + << DIL->getColumn() << ": add FS discriminator, from " + << Discriminator << " -> " << NewD << "\n"); + } + Changed = true; + } + } + + if (Changed) { + Module *M = MF->getFunction().getParent(); + const std::string FSDisriminatorVar = "__llvm_fs_discriminator__"; + if (!M->getGlobalVariable(FSDisriminatorVar)) { + auto &Context = M->getContext(); + // Create a global variable to flag that FSDiscriminators are used. + new GlobalVariable(*M, Type::getInt1Ty(Context), true, + GlobalValue::WeakAnyLinkage, + ConstantInt::getTrue(Context), FSDisriminatorVar); + } + + LLVM_DEBUG(dbgs() << "Num of FS Discriminators: " << NumNewD << "\n"); + } + + return Changed; +} Index: llvm/lib/CodeGen/TargetPassConfig.cpp =================================================================== --- llvm/lib/CodeGen/TargetPassConfig.cpp +++ llvm/lib/CodeGen/TargetPassConfig.cpp @@ -40,6 +40,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FSAFDODiscriminator.h" #include "llvm/Support/SaveAndRestore.h" #include "llvm/Support/Threading.h" #include "llvm/Target/CGPassBuilderOption.h" @@ -165,6 +166,12 @@ clEnumValN(GlobalISelAbortMode::DisableWithDiag, "2", "Disable the abort but emit a diagnostic on failure"))); +// An option that disables inserting FS-AFDO discrmintators before emit. +static cl::opt + FSNoFinalDiscrim("fs-no-final-discrim", cl::init(false), cl::Hidden, + cl::desc("Do not insert FS-AFDO discrimnators before " + "emit.")); + // Temporary option to allow experimenting with MachineScheduler as a post-RA // scheduler. Targets can "properly" enable this with // substitutePass(&PostRASchedulerID, &PostMachineSchedulerID). @@ -334,6 +341,8 @@ namespace llvm { +extern cl::opt EnableFSDiscriminator; + class PassConfigImpl { public: // List of passes explicitly substituted by this target. Normally this is @@ -1167,6 +1176,10 @@ addPass(&XRayInstrumentationID); addPass(&PatchableFunctionID); + if (EnableFSDiscriminator && !FSNoFinalDiscrim) + addPass(createAddFSDiscriminatorsPass(PASS_LAST_DIS_BIT_BEG, + PASS_LAST_DIS_BIT_END)); + addPreEmitPass(); if (TM->Options.EnableIPRA) Index: llvm/lib/LTO/LTOBackend.cpp =================================================================== --- llvm/lib/LTO/LTOBackend.cpp +++ llvm/lib/LTO/LTOBackend.cpp @@ -215,10 +215,15 @@ PGOOptions::SampleUse, PGOOptions::NoCSAction, true); else if (Conf.RunCSIRInstr) { PGOOpt = PGOOptions("", Conf.CSIRProfile, Conf.ProfileRemapping, - PGOOptions::IRUse, PGOOptions::CSIRInstr); + PGOOptions::IRUse, PGOOptions::CSIRInstr, + Conf.AddFSDiscriminator); } else if (!Conf.CSIRProfile.empty()) { PGOOpt = PGOOptions(Conf.CSIRProfile, "", Conf.ProfileRemapping, - PGOOptions::IRUse, PGOOptions::CSIRUse); + PGOOptions::IRUse, PGOOptions::CSIRUse, + Conf.AddFSDiscriminator); + } else if (Conf.AddFSDiscriminator) { + PGOOpt = PGOOptions("", "", "", PGOOptions::NoAction, + PGOOptions::NoCSAction, true); } LoopAnalysisManager LAM; Index: llvm/lib/ProfileData/SampleProf.cpp =================================================================== --- llvm/lib/ProfileData/SampleProf.cpp +++ llvm/lib/ProfileData/SampleProf.cpp @@ -36,12 +36,19 @@ "will be used. This is very useful for performance debugging")); namespace llvm { + +// Use FS-AFDO discriminator. +cl::opt EnableFSDiscriminator( + "enable-fs-discriminator", cl::Hidden, cl::init(false), + cl::desc("Enable adding flow sensitive discriminators")); + namespace sampleprof { SampleProfileFormat FunctionSamples::Format; bool FunctionSamples::ProfileIsProbeBased = false; bool FunctionSamples::ProfileIsCS = false; bool FunctionSamples::UseMD5 = false; bool FunctionSamples::HasUniqSuffix = true; +bool FunctionSamples::ProfileIsFS = false; } // namespace sampleprof } // namespace llvm @@ -232,9 +239,15 @@ const DILocation *PrevDIL = DIL; for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) { - S.push_back(std::make_pair( - LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()), - PrevDIL->getScope()->getSubprogram()->getLinkageName())); + unsigned Discriminator; + if (ProfileIsFS) + Discriminator = DIL->getDiscriminator(); + else + Discriminator = DIL->getBaseDiscriminator(); + + S.push_back( + std::make_pair(LineLocation(getOffset(DIL), Discriminator), + PrevDIL->getScope()->getSubprogram()->getLinkageName())); PrevDIL = DIL; } if (S.size() == 0) Index: llvm/lib/ProfileData/SampleProfReader.cpp =================================================================== --- llvm/lib/ProfileData/SampleProfReader.cpp +++ llvm/lib/ProfileData/SampleProfReader.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/ProfileData/SampleProf.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compression.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/LEB128.h" @@ -45,6 +46,15 @@ using namespace llvm; using namespace sampleprof; +#define DEBUG_TYPE "samplepgo-reader" + +// This internal option specifies if the profile uses FS discriminators. +// It only applies to text, binary and compact binary format profiles. +// For ext-binary format profiles, the flag is set in the summary. +static cl::opt ProfileIsFSDisciminator( + "profile-isfs", cl::Hidden, cl::init(false), + cl::desc("Profile uses flow senstive discriminators")); + /// Dump the function profile for \p FName. /// /// \param FName Name of the function to print. @@ -238,6 +248,16 @@ // top-level function profile. bool SeenMetadata = false; +#ifndef NDEBUG + uint64_t FSBucketSamples[6]; + uint32_t FSBucketRecords[6]; + for (int i = 0; i < 6; i++) { + FSBucketSamples[i] = 0; + FSBucketRecords[i] = 0; + } +#endif + + ProfileIsFS = ProfileIsFSDisciminator; for (; !LineIt.is_at_eof(); ++LineIt) { if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#') continue; @@ -295,6 +315,19 @@ "Found non-metadata after metadata: " + *LineIt); return sampleprof_error::malformed; } + + // Here we handle FS discriminators. + if (ProfileIsFS) { + uint32_t MaskedDiscriminator = Discriminator; + MaskedDiscriminator &= getDiscriminatorMask(); +#ifndef NDEBUG + int Bucket = getFSBucket(Discriminator); + FSBucketRecords[Bucket] += 1; + FSBucketSamples[Bucket] += NumSamples; +#endif + Discriminator = MaskedDiscriminator; + } + while (InlineStack.size() > Depth) { InlineStack.pop_back(); } @@ -348,6 +381,17 @@ if (Result == sampleprof_error::success) computeSummary(); +#ifndef NDEBUG + LLVM_DEBUG(dbgs() << "Text reader is done. Statistics:\n"); + for (int i = 0; i < 6; i++) { + if (FSBucketRecords[i] == 0) + continue; + LLVM_DEBUG(dbgs() << "Bucket " << i << ": " + << "records=" << FSBucketRecords[i] + << " samples=" << FSBucketSamples[i] << "\n"); + } +#endif + return Result; } @@ -504,6 +548,13 @@ if (std::error_code EC = NumCalls.getError()) return EC; + // Here we handle FS discriminators: + uint32_t DiscriminatorVal = *Discriminator; + if (ProfileIsFS) { + uint32_t MaskedDiscriminator = DiscriminatorVal & getDiscriminatorMask(); + DiscriminatorVal = MaskedDiscriminator; + } + for (uint32_t J = 0; J < *NumCalls; ++J) { auto CalledFunction(readStringFromTable()); if (std::error_code EC = CalledFunction.getError()) @@ -513,11 +564,11 @@ if (std::error_code EC = CalledFunctionSamples.getError()) return EC; - FProfile.addCalledTargetSamples(*LineOffset, *Discriminator, + FProfile.addCalledTargetSamples(*LineOffset, DiscriminatorVal, *CalledFunction, *CalledFunctionSamples); } - FProfile.addBodySamples(*LineOffset, *Discriminator, *NumSamples); + FProfile.addBodySamples(*LineOffset, DiscriminatorVal, *NumSamples); } // Read all the samples for inlined function calls. @@ -538,8 +589,15 @@ if (std::error_code EC = FName.getError()) return EC; + // Here we handle FS discriminators: + uint32_t DiscriminatorVal = *Discriminator; + if (ProfileIsFS) { + uint32_t MaskedDiscriminator = DiscriminatorVal & getDiscriminatorMask(); + DiscriminatorVal = MaskedDiscriminator; + } + FunctionSamples &CalleeProfile = FProfile.functionSamplesAt( - LineLocation(*LineOffset, *Discriminator))[std::string(*FName)]; + LineLocation(*LineOffset, DiscriminatorVal))[std::string(*FName)]; CalleeProfile.setName(*FName); if (std::error_code EC = readProfile(CalleeProfile)) return EC; @@ -575,6 +633,7 @@ } std::error_code SampleProfileReaderBinary::readImpl() { + ProfileIsFS = ProfileIsFSDisciminator; while (!at_eof()) { if (std::error_code EC = readFuncProfile(Data)) return EC; @@ -595,6 +654,8 @@ Summary->setPartialProfile(true); if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext)) FunctionSamples::ProfileIsCS = ProfileIsCS = true; + if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator)) + FunctionSamples::ProfileIsFS = ProfileIsFS = true; break; case SecNameTable: { FixedLengthMD5 = @@ -861,6 +922,8 @@ // given a module. bool LoadFuncsToBeUsed = collectFuncsFromModule(); + ProfileIsFS = ProfileIsFSDisciminator; + std::vector OffsetsToUse; if (!LoadFuncsToBeUsed) { // load all the function profiles. @@ -1105,6 +1168,8 @@ Flags.append("partial,"); if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext)) Flags.append("context,"); + if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator)) + Flags.append("fs-discriminator,"); break; default: break; @@ -1521,6 +1586,9 @@ /// This format is generated by the Linux Perf conversion tool at /// https://github.com/google/autofdo. std::error_code SampleProfileReaderGCC::readImpl() { + + assert(!ProfileIsFSDisciminator && "Gcc profiles not support FSDisciminator"); + // Read the string table. if (std::error_code EC = readNameTable()) return EC; Index: llvm/lib/ProfileData/SampleProfWriter.cpp =================================================================== --- llvm/lib/ProfileData/SampleProfWriter.cpp +++ llvm/lib/ProfileData/SampleProfWriter.cpp @@ -252,6 +252,8 @@ addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFullContext); if (Type == SecFuncMetadata && FunctionSamples::ProfileIsCS) addSectionFlag(SecFuncMetadata, SecFuncMetadataFlags::SecFlagHasAttribute); + if (Type == SecProfSummary && FunctionSamples::ProfileIsFS) + addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFSDiscriminator); uint64_t SectionStart = markSectionStart(Type, LayoutIdx); switch (Type) { Index: llvm/lib/Target/X86/X86InsertPrefetch.cpp =================================================================== --- llvm/lib/Target/X86/X86InsertPrefetch.cpp +++ llvm/lib/Target/X86/X86InsertPrefetch.cpp @@ -167,6 +167,7 @@ return false; } Reader = std::move(ReaderOrErr.get()); + Reader->setDiscriminatorMaskedBitFrom(DILocation::getBaseDiscriminatorBits()); Reader->read(); return true; } Index: llvm/lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- llvm/lib/Transforms/IPO/SampleProfile.cpp +++ llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -1729,6 +1729,7 @@ // set module before reading the profile so reader may be able to only // read the function profiles which are used by the current module. Reader->setModule(&M); + Reader->setDiscriminatorMaskedBitFrom(DILocation::getBaseDiscriminatorBits()); if (std::error_code EC = Reader->read()) { std::string Msg = "profile reading failed: " + EC.message(); Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); Index: llvm/lib/Transforms/Utils/LoopUnroll.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -576,7 +576,7 @@ for (Loop *SubLoop : *L) LoopsToSimplify.insert(SubLoop); - if (Header->getParent()->isDebugInfoForProfiling()) + if (Header->getParent()->isDebugInfoForProfiling() && !EnableFSDiscriminator) for (BasicBlock *BB : L->getBlocks()) for (Instruction &I : *BB) if (!isa(&I)) Index: llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp +++ llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp @@ -349,7 +349,7 @@ LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); - if (Header->getParent()->isDebugInfoForProfiling()) + if (Header->getParent()->isDebugInfoForProfiling() && !EnableFSDiscriminator) for (BasicBlock *BB : L->getBlocks()) for (Instruction &I : *BB) if (!isa(&I)) Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1048,7 +1048,7 @@ if (const Instruction *Inst = dyn_cast_or_null(Ptr)) { const DILocation *DIL = Inst->getDebugLoc(); if (DIL && Inst->getFunction()->isDebugInfoForProfiling() && - !isa(Inst)) { + !isa(Inst) && !EnableFSDiscriminator) { assert(!VF.isScalable() && "scalable vectors not yet supported."); auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue()); @@ -1058,8 +1058,7 @@ LLVM_DEBUG(dbgs() << "Failed to create new discriminator: " << DIL->getFilename() << " Line: " << DIL->getLine()); - } - else + } else B.SetCurrentDebugLocation(DIL); } else B.SetCurrentDebugLocation(DebugLoc()); Index: llvm/test/CodeGen/X86/fsafdo_test1.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/fsafdo_test1.ll @@ -0,0 +1,60 @@ +; RUN: llc -enable-fs-discriminator < %s | FileCheck %s +; +; Check that fs-afdo discriminators are generated. +; CHECK: .loc 1 7 3 is_stmt 0 discriminator 2 # foo.c:7:3 +; Check: .loc 1 9 5 is_stmt 1 discriminator 2 # foo.c:9:5 +; CHECK: .loc 1 9 5 is_stmt 0 discriminator 3623878658 # foo.c:9:5 +; CHECK: .loc 1 7 3 is_stmt 1 discriminator 805306370 # foo.c:7:3 +; Check that variable __llvm_fs_discriminator__ is generated. +; CHECK: .type __llvm_fs_discriminator__,@object # @__llvm_fs_discriminator__ +; CHECK: .section .rodata,"a",@progbits +; CHECK: .weak __llvm_fs_discriminator__ +; CHECK: __llvm_fs_discriminator__: +; CHECK: .byte 1 +; CHECK: .size __llvm_fs_discriminator__, 1 + +target triple = "x86_64-unknown-linux-gnu" + +%struct.Node = type { %struct.Node* } + +define i32 @foo(%struct.Node* readonly %node, %struct.Node* readnone %root) !dbg !6 { +entry: + %cmp = icmp eq %struct.Node* %node, %root, !dbg !8 + br i1 %cmp, label %while.end4, label %while.cond1.preheader.lr.ph, !dbg !10 + +while.cond1.preheader.lr.ph: + %tobool = icmp eq %struct.Node* %node, null + br i1 %tobool, label %while.cond1.preheader.us.preheader, label %while.body2.preheader, !dbg !11 + +while.body2.preheader: + br label %while.body2, !dbg !11 + +while.cond1.preheader.us.preheader: + br label %while.cond1.preheader.us, !dbg !10 + +while.cond1.preheader.us: + br label %while.cond1.preheader.us, !dbg !10 + +while.body2: + br label %while.body2, !dbg !11 + +while.end4: + ret i32 0, !dbg !12 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, emissionKind: LineTablesOnly) +!1 = !DIFile(filename: "foo.c", directory: "b/") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{} +!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 5, type: !7, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) +!7 = !DISubroutineType(types: !2) +!8 = !DILocation(line: 7, column: 15, scope: !9) +!9 = !DILexicalBlockFile(scope: !6, file: !1, discriminator: 2) +!10 = !DILocation(line: 7, column: 3, scope: !9) +!11 = !DILocation(line: 9, column: 5, scope: !9) +!12 = !DILocation(line: 14, column: 3, scope: !6) Index: llvm/test/CodeGen/X86/fsafdo_test2.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/fsafdo_test2.ll @@ -0,0 +1,196 @@ +; RUN: llc -enable-fs-discriminator < %s | FileCheck %s +; +; Check that fs-afdo discriminators are generated. +; CHECK: .loc 1 23 9 is_stmt 0 discriminator 1 # unroll.c:23:9 +; CHECK: .loc 1 23 9 is_stmt 0 discriminator 1073741825 # unroll.c:23:9 +; CHECK: .loc 1 23 9 is_stmt 0 discriminator 2147483649 # unroll.c:23:9 +; CHECK: .loc 1 23 9 is_stmt 0 discriminator 268435457 # unroll.c:23:9 +; Check that variable __llvm_fs_discriminator__ is generated. +; CHECK: .type __llvm_fs_discriminator__,@object # @__llvm_fs_discriminator__ +; CHECK: .section .rodata,"a",@progbits +; CHECK: .weak __llvm_fs_discriminator__ +; CHECK: __llvm_fs_discriminator__: +; CHECK: .byte 1 +; CHECK: .size __llvm_fs_discriminator__, 1 + +target triple = "x86_64-unknown-linux-gnu" + +@sum = dso_local local_unnamed_addr global i32 0, align 4 + +declare i32 @bar(i32 %i) #0 +declare void @work(i32 %i) #2 + +define dso_local void @foo() #0 !dbg !29 { +entry: + br label %for.cond1.preheader, !dbg !30 + +for.cond1.preheader: + %j.012 = phi i32 [ 0, %entry ], [ %inc11, %if.end9.3 ] + %mul = mul nuw nsw i32 %j.012, 48 + %call = tail call i32 @bar(i32 %mul), !dbg !32 + %0 = and i32 %call, 1, !dbg !33 + %tobool.not = icmp eq i32 %0, 0, !dbg !33 + br i1 %tobool.not, label %if.end, label %if.then, !dbg !35 + +if.then: + %mul4 = shl nsw i32 %call, 1, !dbg !36 + tail call void @work(i32 %mul4), !dbg !37 + br label %if.end, !dbg !38 + +if.end: + %1 = and i32 %call, 3, !dbg !39 + %tobool6.not = icmp eq i32 %1, 0, !dbg !39 + br i1 %tobool6.not, label %if.end9, label %if.then7, !dbg !40 + +if.then7: + %mul8 = mul nsw i32 %call, 3, !dbg !41 + tail call void @work(i32 %mul8), !dbg !42 + br label %if.end9, !dbg !43 + +if.end9: + %add.1 = or i32 %mul, 1, !dbg !44 + %call.1 = tail call i32 @bar(i32 %add.1), !dbg !32 + %2 = and i32 %call.1, 1, !dbg !33 + %tobool.not.1 = icmp eq i32 %2, 0, !dbg !33 + br i1 %tobool.not.1, label %if.end.1, label %if.then.1, !dbg !35 + +for.end12: + ret void, !dbg !45 + +if.then.1: + %mul4.1 = shl nsw i32 %call.1, 1, !dbg !36 + tail call void @work(i32 %mul4.1), !dbg !37 + br label %if.end.1, !dbg !38 + +if.end.1: + %3 = and i32 %call.1, 3, !dbg !39 + %tobool6.not.1 = icmp eq i32 %3, 0, !dbg !39 + br i1 %tobool6.not.1, label %if.end9.1, label %if.then7.1, !dbg !40 + +if.then7.1: + %mul8.1 = mul nsw i32 %call.1, 3, !dbg !41 + tail call void @work(i32 %mul8.1), !dbg !42 + br label %if.end9.1, !dbg !43 + +if.end9.1: + %add.2 = or i32 %mul, 2, !dbg !44 + %call.2 = tail call i32 @bar(i32 %add.2), !dbg !32 + %4 = and i32 %call.2, 1, !dbg !33 + %tobool.not.2 = icmp eq i32 %4, 0, !dbg !33 + br i1 %tobool.not.2, label %if.end.2, label %if.then.2, !dbg !35 + +if.then.2: + %mul4.2 = shl nsw i32 %call.2, 1, !dbg !36 + tail call void @work(i32 %mul4.2), !dbg !37 + br label %if.end.2, !dbg !38 + +if.end.2: + %5 = and i32 %call.2, 3, !dbg !39 + %tobool6.not.2 = icmp eq i32 %5, 0, !dbg !39 + br i1 %tobool6.not.2, label %if.end9.2, label %if.then7.2, !dbg !40 + +if.then7.2: + %mul8.2 = mul nsw i32 %call.2, 3, !dbg !41 + tail call void @work(i32 %mul8.2), !dbg !42 + br label %if.end9.2, !dbg !43 + +if.end9.2: + %add.3 = or i32 %mul, 3, !dbg !44 + %call.3 = tail call i32 @bar(i32 %add.3), !dbg !32 + %6 = and i32 %call.3, 1, !dbg !33 + %tobool.not.3 = icmp eq i32 %6, 0, !dbg !33 + br i1 %tobool.not.3, label %if.end.3, label %if.then.3, !dbg !35 + +if.then.3: + %mul4.3 = shl nsw i32 %call.3, 1, !dbg !36 + tail call void @work(i32 %mul4.3), !dbg !37 + br label %if.end.3, !dbg !38 + +if.end.3: + %7 = and i32 %call.3, 3, !dbg !39 + %tobool6.not.3 = icmp eq i32 %7, 0, !dbg !39 + br i1 %tobool6.not.3, label %if.end9.3, label %if.then7.3, !dbg !40 + +if.then7.3: + %mul8.3 = mul nsw i32 %call.3, 3, !dbg !41 + tail call void @work(i32 %mul8.3), !dbg !42 + br label %if.end9.3, !dbg !43 + +if.end9.3: + %inc11 = add nuw nsw i32 %j.012, 1, !dbg !46 + %exitcond.not = icmp eq i32 %inc11, 48, !dbg !48 + br i1 %exitcond.not, label %for.end12, label %for.cond1.preheader, !dbg !30, !llvm.loop !49 +} + + +attributes #0 = { noinline nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { nofree noinline norecurse nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) +!1 = !DIFile(filename: "unroll.c", directory: "a/") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!7 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !2) +!9 = !DILocation(line: 4, column: 3, scope: !7) +!10 = !DILocation(line: 5, column: 5, scope: !7) +!11 = !{!12, !12, i64 0} +!12 = !{!"int", !13, i64 0} +!13 = !{!"omnipotent char", !14, i64 0} +!14 = !{!"Simple C/C++ TBAA"} +!15 = !DILocation(line: 6, column: 10, scope: !7) +!16 = !DILocation(line: 7, column: 1, scope: !7) +!17 = !DILocation(line: 6, column: 3, scope: !18) +!18 = !DILexicalBlockFile(scope: !7, file: !1, discriminator: 1) +!19 = distinct !DISubprogram(name: "work", scope: !1, file: !1, line: 10, type: !8, scopeLine: 10, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!20 = !DILocation(line: 11, column: 7, scope: !19) +!21 = !DILocation(line: 11, column: 11, scope: !22) +!22 = !DILexicalBlockFile(scope: !19, file: !1, discriminator: 1) +!23 = !DILocation(line: 11, column: 11, scope: !24) +!24 = !DILexicalBlockFile(scope: !19, file: !1, discriminator: 2) +!25 = !DILocation(line: 11, column: 7, scope: !26) +!26 = !DILexicalBlockFile(scope: !19, file: !1, discriminator: 3) +!27 = !DILocation(line: 0, scope: !22) +!28 = !DILocation(line: 15, column: 1, scope: !19) +!29 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 17, type: !8, scopeLine: 17, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!30 = !DILocation(line: 19, column: 3, scope: !31) +!31 = !DILexicalBlockFile(scope: !29, file: !1, discriminator: 2) +!32 = !DILocation(line: 21, column: 16, scope: !31) +!33 = !DILocation(line: 22, column: 14, scope: !34) +!34 = !DILexicalBlockFile(scope: !29, file: !1, discriminator: 1) +!35 = !DILocation(line: 22, column: 11, scope: !31) +!36 = !DILocation(line: 23, column: 16, scope: !29) +!37 = !DILocation(line: 23, column: 9, scope: !34) +!38 = !DILocation(line: 23, column: 9, scope: !31) +!39 = !DILocation(line: 24, column: 14, scope: !34) +!40 = !DILocation(line: 24, column: 11, scope: !31) +!41 = !DILocation(line: 25, column: 16, scope: !29) +!42 = !DILocation(line: 25, column: 9, scope: !34) +!43 = !DILocation(line: 25, column: 9, scope: !31) +!44 = !DILocation(line: 21, column: 21, scope: !34) +!45 = !DILocation(line: 27, column: 1, scope: !29) +!46 = !DILocation(line: 19, column: 24, scope: !47) +!47 = !DILexicalBlockFile(scope: !29, file: !1, discriminator: 3) +!48 = !DILocation(line: 19, column: 17, scope: !34) +!49 = distinct !{!49, !50, !51} +!50 = !DILocation(line: 19, column: 3, scope: !29) +!51 = !DILocation(line: 26, column: 3, scope: !29) +!52 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 29, type: !8, scopeLine: 29, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!53 = !DILocation(line: 31, column: 3, scope: !54) +!54 = !DILexicalBlockFile(scope: !52, file: !1, discriminator: 2) +!55 = !DILocation(line: 32, column: 5, scope: !52) +!56 = !DILocation(line: 31, column: 30, scope: !57) +!57 = !DILexicalBlockFile(scope: !52, file: !1, discriminator: 3) +!58 = !DILocation(line: 31, column: 17, scope: !59) +!59 = !DILexicalBlockFile(scope: !52, file: !1, discriminator: 1) +!60 = distinct !{!60, !61, !62} +!61 = !DILocation(line: 31, column: 3, scope: !52) +!62 = !DILocation(line: 33, column: 3, scope: !52) +!63 = !DILocation(line: 34, column: 1, scope: !52) Index: llvm/test/Transforms/SampleProfile/Inputs/fsafdo.prof =================================================================== --- /dev/null +++ llvm/test/Transforms/SampleProfile/Inputs/fsafdo.prof @@ -0,0 +1,35 @@ +work:33383580:1068858 + 1: 981870 + 5: 981870 +foo:22388581:3449 + 0: 3449 + 2.1: 204820 + 4: 213086 bar:205247 + 4.2013265920: 222893 bar:218378 + 4.2281701376: 214552 bar:217479 + 4.2550136832: 210692 bar:220056 + 5: 213086 + 5.1207959552: 210692 + 5.1610612736: 202301 + 5.2952790016: 222893 + 6: 4780 + 6.268435456: 202301 work:198259 + 6.1073741824: 222893 work:231680 + 6.2147483648: 4780 + 7: 219065 + 7.134217728: 217053 + 7.2013265920: 183304 + 7.3758096384: 222101 + 8: 4780 + 8.2818572288: 222101 work:238765 + 8.3489660928: 183304 work:181615 + 8.4160749568: 217053 work:218539 + 10: 3281 +bar:7622325:861160 + 2: 846925 + 3: 846925 +main:16419:0 + 0: 0 + 2.1: 3280 + 3: 3299 foo:3449 + 5: 0 Index: llvm/test/Transforms/SampleProfile/fsafdo_test.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SampleProfile/fsafdo_test.ll @@ -0,0 +1,230 @@ +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/fsafdo.extbinary.afdo | opt -analyze -branch-prob -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s -sample-profile -profile-isfs -sample-profile-file=%S/Inputs/fsafdo.prof | opt -analyze -branch-prob -enable-new-pm=0 | FileCheck %s + +target triple = "x86_64-unknown-linux-gnu" + +@sum = dso_local local_unnamed_addr global i32 0, align 4 + +declare i32 @bar(i32 %i) #0 +declare void @work(i32 %i) #2 + +define dso_local void @foo() #0 !dbg !29 { +; CHECK: Printing analysis {{.*}} for function 'foo': + +entry: + br label %for.cond1.preheader, !dbg !30 +; CHECK: edge entry -> for.cond1.preheader probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +for.cond1.preheader: + %j.012 = phi i32 [ 0, %entry ], [ %inc11, %if.end9.3 ] + %mul = mul nuw nsw i32 %j.012, 48 + %call = tail call i32 @bar(i32 %mul), !dbg !32 + %0 = and i32 %call, 1, !dbg !33 + %tobool.not = icmp eq i32 %0, 0, !dbg !33 + br i1 %tobool.not, label %if.end, label %if.then, !dbg !35 +; CHECK: edge for.cond1.preheader -> if.end probability is 0x3f6262b8 / 0x80000000 = 49.52% +; CHECK: edge for.cond1.preheader -> if.then probability is 0x409d9d48 / 0x80000000 = 50.48% + + +if.then: + %mul4 = shl nsw i32 %call, 1, !dbg !36 + tail call void @work(i32 %mul4), !dbg !37 + br label %if.end, !dbg !38 +; CHECK: edge if.then -> if.end probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +if.end: + %1 = and i32 %call, 3, !dbg !39 + %tobool6.not = icmp eq i32 %1, 0, !dbg !39 + br i1 %tobool6.not, label %if.end9, label %if.then7, !dbg !40 +; CHECK: edge if.end -> if.end9 probability is 0x22c6bac3 / 0x80000000 = 27.17% +; CHECK: edge if.end -> if.then7 probability is 0x5d39453d / 0x80000000 = 72.83% + + +if.then7: + %mul8 = mul nsw i32 %call, 3, !dbg !41 + tail call void @work(i32 %mul8), !dbg !42 + br label %if.end9, !dbg !43 +; CHECK: edge if.then7 -> if.end9 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +if.end9: + %add.1 = or i32 %mul, 1, !dbg !44 + %call.1 = tail call i32 @bar(i32 %add.1), !dbg !32 + %2 = and i32 %call.1, 1, !dbg !33 + %tobool.not.1 = icmp eq i32 %2, 0, !dbg !33 + br i1 %tobool.not.1, label %if.end.1, label %if.then.1, !dbg !35 +; CHECK: edge if.end9 -> if.end.1 probability is 0x3f6262b8 / 0x80000000 = 49.52% +; CHECK: edge if.end9 -> if.then.1 probability is 0x409d9d48 / 0x80000000 = 50.48% + +for.end12: + ret void, !dbg !45 + +if.then.1: + %mul4.1 = shl nsw i32 %call.1, 1, !dbg !36 + tail call void @work(i32 %mul4.1), !dbg !37 + br label %if.end.1, !dbg !38 +; CHECK: edge if.then.1 -> if.end.1 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +if.end.1: + %3 = and i32 %call.1, 3, !dbg !39 + %tobool6.not.1 = icmp eq i32 %3, 0, !dbg !39 + br i1 %tobool6.not.1, label %if.end9.1, label %if.then7.1, !dbg !40 +; CHECK: edge if.end.1 -> if.end9.1 probability is 0x22c6bac3 / 0x80000000 = 27.17% +; CHECK: edge if.end.1 -> if.then7.1 probability is 0x5d39453d / 0x80000000 = 72.83% + +if.then7.1: + %mul8.1 = mul nsw i32 %call.1, 3, !dbg !41 + tail call void @work(i32 %mul8.1), !dbg !42 + br label %if.end9.1, !dbg !43 +; CHECK: edge if.then7.1 -> if.end9.1 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +if.end9.1: + %add.2 = or i32 %mul, 2, !dbg !44 + %call.2 = tail call i32 @bar(i32 %add.2), !dbg !32 + %4 = and i32 %call.2, 1, !dbg !33 + %tobool.not.2 = icmp eq i32 %4, 0, !dbg !33 + br i1 %tobool.not.2, label %if.end.2, label %if.then.2, !dbg !35 +; CHECK: edge if.end9.1 -> if.end.2 probability is 0x3f6262b8 / 0x80000000 = 49.52% +; CHECK: edge if.end9.1 -> if.then.2 probability is 0x409d9d48 / 0x80000000 = 50.48% + +if.then.2: + %mul4.2 = shl nsw i32 %call.2, 1, !dbg !36 + tail call void @work(i32 %mul4.2), !dbg !37 + br label %if.end.2, !dbg !38 +; CHECK: edge if.then.2 -> if.end.2 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +if.end.2: + %5 = and i32 %call.2, 3, !dbg !39 + %tobool6.not.2 = icmp eq i32 %5, 0, !dbg !39 + br i1 %tobool6.not.2, label %if.end9.2, label %if.then7.2, !dbg !40 +; CHECK: edge if.end.2 -> if.end9.2 probability is 0x22c6bac3 / 0x80000000 = 27.17% +; CHECK: edge if.end.2 -> if.then7.2 probability is 0x5d39453d / 0x80000000 = 72.83% + +if.then7.2: + %mul8.2 = mul nsw i32 %call.2, 3, !dbg !41 + tail call void @work(i32 %mul8.2), !dbg !42 + br label %if.end9.2, !dbg !43 +; CHECK: edge if.then7.2 -> if.end9.2 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +if.end9.2: + %add.3 = or i32 %mul, 3, !dbg !44 + %call.3 = tail call i32 @bar(i32 %add.3), !dbg !32 + %6 = and i32 %call.3, 1, !dbg !33 + %tobool.not.3 = icmp eq i32 %6, 0, !dbg !33 + br i1 %tobool.not.3, label %if.end.3, label %if.then.3, !dbg !35 +; CHECK: edge if.end9.2 -> if.end.3 probability is 0x3f6262b8 / 0x80000000 = 49.52% +; CHECK: edge if.end9.2 -> if.then.3 probability is 0x409d9d48 / 0x80000000 = 50.48% + +if.then.3: + %mul4.3 = shl nsw i32 %call.3, 1, !dbg !36 + tail call void @work(i32 %mul4.3), !dbg !37 + br label %if.end.3, !dbg !38 +; CHECK: edge if.then.3 -> if.end.3 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +if.end.3: + %7 = and i32 %call.3, 3, !dbg !39 + %tobool6.not.3 = icmp eq i32 %7, 0, !dbg !39 + br i1 %tobool6.not.3, label %if.end9.3, label %if.then7.3, !dbg !40 +; CHECK: edge if.end.3 -> if.end9.3 probability is 0x22c6bac3 / 0x80000000 = 27.17% +; CHECK: edge if.end.3 -> if.then7.3 probability is 0x5d39453d / 0x80000000 = 72.83% + +if.then7.3: + %mul8.3 = mul nsw i32 %call.3, 3, !dbg !41 + tail call void @work(i32 %mul8.3), !dbg !42 + br label %if.end9.3, !dbg !43 +; CHECK: edge if.then7.3 -> if.end9.3 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +if.end9.3: + %inc11 = add nuw nsw i32 %j.012, 1, !dbg !46 + %exitcond.not = icmp eq i32 %inc11, 48, !dbg !48 + br i1 %exitcond.not, label %for.end12, label %for.cond1.preheader, !dbg !30, !llvm.loop !49 +; CHECK: edge if.end9.3 -> for.end12 probability is 0x00834dd9 / 0x80000000 = 0.40% +; CHECK: edge if.end9.3 -> for.cond1.preheader probability is 0x7f7cb227 / 0x80000000 = 99.60% [HOT edge] +} + +define dso_local i32 @main() #3 !dbg !52 { +entry: + br label %for.body, !dbg !53 + +for.body: + %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + tail call void @foo(), !dbg !55 + %inc = add nuw nsw i32 %i.03, 1, !dbg !56 + %exitcond.not = icmp eq i32 %inc, 10000000, !dbg !58 + br i1 %exitcond.not, label %for.end, label %for.body, !dbg !53, !llvm.loop !60 + +for.end: + ret i32 0, !dbg !63 +} + + +attributes #0 = { noinline nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile"} +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { nofree noinline norecurse nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) +!1 = !DIFile(filename: "unroll.c", directory: "a/") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!7 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !2) +!9 = !DILocation(line: 4, column: 3, scope: !7) +!10 = !DILocation(line: 5, column: 5, scope: !7) +!11 = !{!12, !12, i64 0} +!12 = !{!"int", !13, i64 0} +!13 = !{!"omnipotent char", !14, i64 0} +!14 = !{!"Simple C/C++ TBAA"} +!15 = !DILocation(line: 6, column: 10, scope: !7) +!16 = !DILocation(line: 7, column: 1, scope: !7) +!17 = !DILocation(line: 6, column: 3, scope: !18) +!18 = !DILexicalBlockFile(scope: !7, file: !1, discriminator: 1) +!19 = distinct !DISubprogram(name: "work", scope: !1, file: !1, line: 10, type: !8, scopeLine: 10, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!20 = !DILocation(line: 11, column: 7, scope: !19) +!21 = !DILocation(line: 11, column: 11, scope: !22) +!22 = !DILexicalBlockFile(scope: !19, file: !1, discriminator: 1) +!23 = !DILocation(line: 11, column: 11, scope: !24) +!24 = !DILexicalBlockFile(scope: !19, file: !1, discriminator: 2) +!25 = !DILocation(line: 11, column: 7, scope: !26) +!26 = !DILexicalBlockFile(scope: !19, file: !1, discriminator: 3) +!27 = !DILocation(line: 0, scope: !22) +!28 = !DILocation(line: 15, column: 1, scope: !19) +!29 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 17, type: !8, scopeLine: 17, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!30 = !DILocation(line: 19, column: 3, scope: !31) +!31 = !DILexicalBlockFile(scope: !29, file: !1, discriminator: 2) +!32 = !DILocation(line: 21, column: 16, scope: !31) +!33 = !DILocation(line: 22, column: 14, scope: !34) +!34 = !DILexicalBlockFile(scope: !29, file: !1, discriminator: 1) +!35 = !DILocation(line: 22, column: 11, scope: !31) +!36 = !DILocation(line: 23, column: 16, scope: !29) +!37 = !DILocation(line: 23, column: 9, scope: !34) +!38 = !DILocation(line: 23, column: 9, scope: !31) +!39 = !DILocation(line: 24, column: 14, scope: !34) +!40 = !DILocation(line: 24, column: 11, scope: !31) +!41 = !DILocation(line: 25, column: 16, scope: !29) +!42 = !DILocation(line: 25, column: 9, scope: !34) +!43 = !DILocation(line: 25, column: 9, scope: !31) +!44 = !DILocation(line: 21, column: 21, scope: !34) +!45 = !DILocation(line: 27, column: 1, scope: !29) +!46 = !DILocation(line: 19, column: 24, scope: !47) +!47 = !DILexicalBlockFile(scope: !29, file: !1, discriminator: 3) +!48 = !DILocation(line: 19, column: 17, scope: !34) +!49 = distinct !{!49, !50, !51} +!50 = !DILocation(line: 19, column: 3, scope: !29) +!51 = !DILocation(line: 26, column: 3, scope: !29) +!52 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 29, type: !8, scopeLine: 29, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!53 = !DILocation(line: 31, column: 3, scope: !54) +!54 = !DILexicalBlockFile(scope: !52, file: !1, discriminator: 2) +!55 = !DILocation(line: 32, column: 5, scope: !52) +!56 = !DILocation(line: 31, column: 30, scope: !57) +!57 = !DILexicalBlockFile(scope: !52, file: !1, discriminator: 3) +!58 = !DILocation(line: 31, column: 17, scope: !59) +!59 = !DILexicalBlockFile(scope: !52, file: !1, discriminator: 1) +!60 = distinct !{!60, !61, !62} +!61 = !DILocation(line: 31, column: 3, scope: !52) +!62 = !DILocation(line: 33, column: 3, scope: !52) +!63 = !DILocation(line: 34, column: 1, scope: !52) Index: llvm/tools/llvm-profdata/llvm-profdata.cpp =================================================================== --- llvm/tools/llvm-profdata/llvm-profdata.cpp +++ llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -21,6 +21,7 @@ #include "llvm/ProfileData/SampleProfWriter.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Errc.h" +#include "llvm/Support/FSAFDODiscriminator.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" @@ -451,6 +452,8 @@ const uint64_t ColdPercentileIdx = 15; const uint64_t HotPercentileIdx = 11; +static uint32_t MaskHighBitFrom = 31; + /// Adjust the instr profile in \p WC based on the sample profile in /// \p Reader. static void @@ -547,6 +550,7 @@ if (std::error_code EC = ReaderOrErr.getError()) exitWithErrorCode(EC, SampleFilename); auto Reader = std::move(ReaderOrErr.get()); + Reader->setDiscriminatorMaskedBitFrom(MaskHighBitFrom); if (std::error_code EC = Reader->read()) exitWithErrorCode(EC, SampleFilename); @@ -573,13 +577,16 @@ Result.setName(Remapper(Samples.getName())); Result.addTotalSamples(Samples.getTotalSamples()); Result.addHeadSamples(Samples.getHeadSamples()); + + uint32_t DiscriminatorMask = getN1Bits(MaskHighBitFrom); for (const auto &BodySample : Samples.getBodySamples()) { - Result.addBodySamples(BodySample.first.LineOffset, - BodySample.first.Discriminator, + uint32_t MaskedDiscriminator = + BodySample.first.Discriminator & DiscriminatorMask; + Result.addBodySamples(BodySample.first.LineOffset, MaskedDiscriminator, BodySample.second.getSamples()); for (const auto &Target : BodySample.second.getCallTargets()) { Result.addCalledTargetSamples(BodySample.first.LineOffset, - BodySample.first.Discriminator, + MaskedDiscriminator, Remapper(Target.first()), Target.second); } } @@ -689,6 +696,7 @@ // merged profile map. Readers.push_back(std::move(ReaderOrErr.get())); const auto Reader = Readers.back().get(); + Reader->setDiscriminatorMaskedBitFrom(MaskHighBitFrom); if (std::error_code EC = Reader->read()) { warnOrExitGivenError(FailMode, EC, Input.Filename); Readers.pop_back(); @@ -907,18 +915,25 @@ "sample profile, if the ratio of the number of zero counters " "divided by the the total number of counters is above the " "threshold, the profile of the function will be regarded as " - "being harmful for performance and will be dropped. ")); + "being harmful for performance and will be dropped.")); cl::opt SupplMinSizeThreshold( "suppl-min-size-threshold", cl::init(10), cl::Hidden, cl::desc("If the size of a function is smaller than the threshold, " "assume it can be inlined by PGO early inliner and it won't " - "be adjusted based on sample profile. ")); + "be adjusted based on sample profile.")); cl::opt InstrProfColdThreshold( "instr-prof-cold-threshold", cl::init(0), cl::Hidden, cl::desc("User specified cold threshold for instr profile which will " - "override the cold threshold got from profile summary. ")); + "override the cold threshold got from profile summary.")); + cl::opt MaskHighBitFromVal( + "mask-highbit-from", cl::init(31), cl::Hidden, + cl::desc("Zero out the discriminatior bit from this value (0 based) " + "for exmaple, value 11 will only use for base discriminators; " + "17 will use the base and the second round; 23 will the first " + "3 rounds.")); cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n"); + MaskHighBitFrom = MaskHighBitFromVal.getValue(); WeightedFileVector WeightedInputs; for (StringRef Filename : InputFilenames) @@ -1588,6 +1603,7 @@ using namespace sampleprof; StringMap BaseFuncProf; + const auto &BaseProfiles = BaseReader->getProfiles(); for (const auto &BaseFunc : BaseProfiles) { BaseFuncProf.try_emplace(BaseFunc.second.getNameWithContext(), @@ -1870,6 +1886,9 @@ BaseReader = std::move(BaseReaderOrErr.get()); TestReader = std::move(TestReaderOrErr.get()); + BaseReader->setDiscriminatorMaskedBitFrom(MaskHighBitFrom); + TestReader->setDiscriminatorMaskedBitFrom(MaskHighBitFrom); + if (std::error_code EC = BaseReader->read()) exitWithErrorCode(EC, BaseFilename); if (std::error_code EC = TestReader->read()) @@ -2381,6 +2400,8 @@ auto Reader = std::move(ReaderOrErr.get()); + Reader->setDiscriminatorMaskedBitFrom(MaskHighBitFrom); + if (ShowSectionInfoOnly) { showSectionInfo(Reader.get(), OS); return 0; @@ -2472,8 +2493,15 @@ cl::desc("Show the information of each section in the sample profile. " "The flag is only usable when the sample profile is in " "extbinary format")); + cl::opt MaskHighBitFrom1( + "mask-highbit-from", cl::init(31), cl::Hidden, + cl::desc("Zero out the discriminatior bit from this value (0 based) " + "for exmaple, value 11 will only use for base discriminators; " + "17 will use the base and the second round; 23 will the first " + "3 rounds.")); cl::ParseCommandLineOptions(argc, argv, "LLVM profile data summary\n"); + MaskHighBitFrom = MaskHighBitFrom1.getValue(); if (OutputFilename.empty()) OutputFilename = "-"; Index: llvm/unittests/ProfileData/SampleProfTest.cpp =================================================================== --- llvm/unittests/ProfileData/SampleProfTest.cpp +++ llvm/unittests/ProfileData/SampleProfTest.cpp @@ -9,6 +9,7 @@ #include "llvm/ProfileData/SampleProf.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" @@ -61,6 +62,8 @@ ASSERT_TRUE(NoError(ReaderOrErr.getError())); Reader = std::move(ReaderOrErr.get()); Reader->setModule(&M); + Reader->setDiscriminatorMaskedBitFrom( + DILocation::getBaseDiscriminatorBits()); } TempFile createRemapFile() {