diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -30,24 +30,61 @@ namespace llvm { -// The cluster information for a machine basic block. -struct BBClusterInfo { - // Unique ID for this basic block. +// This structure represents a unique ID for every block specified in the +// profile. +struct ProfileBBID { unsigned BBID; + unsigned CloneID; +}; + +// Provides DenseMapInfo for ProfileBBID. +template <> struct DenseMapInfo { + static inline ProfileBBID getEmptyKey() { + unsigned EmptyKey = DenseMapInfo::getEmptyKey(); + return ProfileBBID{EmptyKey, EmptyKey}; + } + static inline ProfileBBID getTombstoneKey() { + unsigned TombstoneKey = DenseMapInfo::getTombstoneKey(); + return ProfileBBID{TombstoneKey, TombstoneKey}; + } + static unsigned getHashValue(const ProfileBBID &Val) { + std::pair PairVal = + std::make_pair(Val.BBID, Val.CloneID); + return DenseMapInfo>::getHashValue(PairVal); + } + static bool isEqual(const ProfileBBID &LHS, const ProfileBBID &RHS) { + return DenseMapInfo::isEqual(LHS.BBID, RHS.BBID) && + DenseMapInfo::isEqual(LHS.CloneID, RHS.CloneID); + } +}; + +// This struct represents the cluster information for a machine basic block, +// which is specifed by a unique ID. +template struct BBProfile { + // Basic block ID. + BBIDType BasicBlockID; // Cluster ID this basic block belongs to. unsigned ClusterID; // Position of basic block within the cluster. unsigned PositionInCluster; }; -using ProgramBBClusterInfoMapTy = StringMap>; +// This represents the profile for one function. +struct RawFunctionProfile { + // BB Cluster information specified by `ProfileBBID`s (before cloning). + SmallVector> RawBBProfiles; + // Paths to clone. A path a -> b -> c -> d implies cloning b, c, and d along + // the edge a -> b. + SmallVector> ClonePaths; +}; class BasicBlockSectionsProfileReader : public ImmutablePass { public: static char ID; BasicBlockSectionsProfileReader(const MemoryBuffer *Buf) - : ImmutablePass(ID), MBuf(Buf) { + : ImmutablePass(ID), MBuf(Buf), + LineIt(*Buf, /*SkipBlanks=*/true, /*CommentMarker=*/'#') { initializeBasicBlockSectionsProfileReaderPass( *PassRegistry::getPassRegistry()); }; @@ -71,8 +108,8 @@ // function. If the first element is true and the second element is empty, it // means unique basic block sections are desired for all basic blocks of the // function. - std::pair> - getBBClusterInfoForFunction(StringRef FuncName) const; + std::pair + getRawProfileForFunction(StringRef FuncName) const; // Initializes the FunctionNameToDIFilename map for the current module and // then reads the profile for matching functions. @@ -84,23 +121,37 @@ return R == FuncAliasMap.end() ? FuncName : R->second; } + // Returns a profile parsing error for the current line. + Error createProfileParseError(Twine Message) const { + return make_error( + Twine("invalid profile " + MBuf->getBufferIdentifier() + " at line " + + Twine(LineIt.line_number()) + ": " + Message), + inconvertibleErrorCode()); + }; + + // Parses a `ProfileBBID` from `S`. + Expected parseProfileBBID(StringRef S) const; + // Reads the basic block sections profile for functions in this module. Error ReadProfile(); // This contains the basic-block-sections profile. const MemoryBuffer *MBuf = nullptr; + // Iterator to the line being parsed. + line_iterator LineIt; + // Map from every function name in the module to its debug info filename or // empty string if no debug info is available. StringMap> FunctionNameToDIFilename; // This encapsulates the BB cluster information for the whole program. // - // For every function name, it contains the cluster information for (all or - // some of) its basic blocks. The cluster information for every basic block - // includes its cluster ID along with the position of the basic block in that - // cluster. - ProgramBBClusterInfoMapTy ProgramBBClusterInfo; + // For every function name, it contains the cloning and cluster information + // for (all or some of) its basic blocks. The cluster information for every + // basic block includes its cluster ID along with the position of the basic + // block in that cluster. + StringMap RawProgramProfile; // Some functions have alias names. We use this map to find the main alias // name for which we have mapping in ProgramBBClusterInfo. diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -13,7 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" @@ -25,6 +25,7 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include +#include using namespace llvm; @@ -32,18 +33,36 @@ INITIALIZE_PASS(BasicBlockSectionsProfileReader, "bbsections-profile-reader", "Reads and parses a basic block sections profile.", false, false) +Expected +BasicBlockSectionsProfileReader::parseProfileBBID(StringRef S) const { + SmallVector Parts; + S.split(Parts, '.'); + if (Parts.size() > 2) + return createProfileParseError(Twine("unable to parse basic block id: '") + + S + "'"); + unsigned long long BBID; + if (getAsUnsignedInteger(Parts[0], 10, BBID)) + return createProfileParseError( + Twine("unable to parse BB id: '" + Parts[0]) + + "': unsigned integer expected"); + unsigned long long CloneID = 0; + if (Parts.size() > 1 && getAsUnsignedInteger(Parts[1], 10, CloneID)) + return createProfileParseError(Twine("unable to parse clone id: '") + + Parts[1] + "': unsigned integer expected"); + return ProfileBBID{static_cast(BBID), + static_cast(CloneID)}; +} bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const { - return getBBClusterInfoForFunction(FuncName).first; + return getRawProfileForFunction(FuncName).first; } -std::pair> -BasicBlockSectionsProfileReader::getBBClusterInfoForFunction( +std::pair +BasicBlockSectionsProfileReader::getRawProfileForFunction( StringRef FuncName) const { - auto R = ProgramBBClusterInfo.find(getAliasName(FuncName)); - return R != ProgramBBClusterInfo.end() - ? std::pair(true, R->second) - : std::pair(false, SmallVector{}); + auto R = RawProgramProfile.find(getAliasName(FuncName)); + return R != RawProgramProfile.end() ? std::pair(true, R->second) + : std::pair(false, RawFunctionProfile()); } // Basic Block Sections can be enabled for a subset of machine basic blocks. @@ -65,16 +84,8 @@ // !!4 Error BasicBlockSectionsProfileReader::ReadProfile() { assert(MBuf); - line_iterator LineIt(*MBuf, /*SkipBlanks=*/true, /*CommentMarker=*/'#'); - - auto invalidProfileError = [&](auto Message) { - return make_error( - Twine("Invalid profile " + MBuf->getBufferIdentifier() + " at line " + - Twine(LineIt.line_number()) + ": " + Message), - inconvertibleErrorCode()); - }; - auto FI = ProgramBBClusterInfo.end(); + auto FI = RawProgramProfile.end(); // Current cluster ID corresponding to this function. unsigned CurrentCluster = 0; @@ -83,7 +94,7 @@ // Temporary set to ensure every basic block ID appears once in the clusters // of a function. - SmallSet FuncBBIDs; + DenseSet FuncBasicBlockIDs; for (; !LineIt.is_at_eof(); ++LineIt) { StringRef S(*LineIt); @@ -92,44 +103,21 @@ // Check for the leading "!" if (!S.consume_front("!") || S.empty()) break; - // Check for second "!" which indicates a cluster of basic blocks. - if (S.consume_front("!")) { - // Skip the profile when we the profile iterator (FI) refers to the - // past-the-end element. - if (FI == ProgramBBClusterInfo.end()) - continue; - SmallVector BBIDs; - S.split(BBIDs, ' '); - // Reset current cluster position. - CurrentPosition = 0; - for (auto BBIDStr : BBIDs) { - unsigned long long BBID; - if (getAsUnsignedInteger(BBIDStr, 10, BBID)) - return invalidProfileError(Twine("Unsigned integer expected: '") + - BBIDStr + "'."); - if (!FuncBBIDs.insert(BBID).second) - return invalidProfileError(Twine("Duplicate basic block id found '") + - BBIDStr + "'."); - if (BBID == 0 && CurrentPosition) - return invalidProfileError("Entry BB (0) does not begin a cluster."); - - FI->second.emplace_back( - BBClusterInfo{((unsigned)BBID), CurrentCluster, CurrentPosition++}); - } - CurrentCluster++; - } else { - // This is a function name specifier. It may include a debug info filename - // specifier starting with `M=`. + + // Check for the second "!" which indicates a cluster of basic blocks. + if (!S.consume_front("!")) { + // A single "!" represents a function name specifier. + // It may include a debug info filename specifier starting with `M=`. auto [AliasesStr, DIFilenameStr] = S.split(' '); SmallString<128> DIFilename; if (DIFilenameStr.startswith("M=")) { DIFilename = sys::path::remove_leading_dotslash(DIFilenameStr.substr(2)); if (DIFilename.empty()) - return invalidProfileError("Empty module name specifier."); + return createProfileParseError("empty module name specifier"); } else if (!DIFilenameStr.empty()) { - return invalidProfileError("Unknown string found: '" + DIFilenameStr + - "'."); + return createProfileParseError("unknown string found: '" + + DIFilenameStr + "'"); } // Function aliases are separated using '/'. We use the first function // name for the cluster info mapping and delegate all other aliases to @@ -148,7 +136,7 @@ if (!FunctionFound) { // Skip the following profile by setting the profile iterator (FI) to // the past-the-end element. - FI = ProgramBBClusterInfo.end(); + FI = RawProgramProfile.end(); continue; } for (size_t i = 1; i < Aliases.size(); ++i) @@ -156,15 +144,60 @@ // Prepare for parsing clusters of this function name. // Start a new cluster map for this function name. - auto R = ProgramBBClusterInfo.try_emplace(Aliases.front()); + auto R = RawProgramProfile.try_emplace(Aliases.front()); // Report error when multiple profiles have been specified for the same // function. if (!R.second) - return invalidProfileError("Duplicate profile for function '" + - Aliases.front() + "'."); + return createProfileParseError("duplicate profile for function '" + + Aliases.front() + "'"); FI = R.first; CurrentCluster = 0; - FuncBBIDs.clear(); + FuncBasicBlockIDs.clear(); + continue; + } + // Skip the profile when the profile iterator (FI) refers to the + // past-the-end element. + if (FI == RawProgramProfile.end()) + continue; + + // Check for the third "!" which indicates a clone path. + if (!S.consume_front("!")) { + // Two "!"s represent a cluster of basic blocks. + SmallVector BasicBlockIDs; + S.split(BasicBlockIDs, ' '); + // Reset current cluster position. + CurrentPosition = 0; + for (auto BasicBlockIDStr : BasicBlockIDs) { + auto BasicBlockID = parseProfileBBID(BasicBlockIDStr); + if (!BasicBlockID) + return BasicBlockID.takeError(); + if (!FuncBasicBlockIDs.insert(*BasicBlockID).second) + return createProfileParseError( + Twine("duplicate basic block id found '") + BasicBlockIDStr + + "'"); + + if (!BasicBlockID->BBID && CurrentPosition) + return createProfileParseError( + "entry BB (0) does not begin a cluster."); + + FI->second.RawBBProfiles.emplace_back(BBProfile{ + *std::move(BasicBlockID), CurrentCluster, CurrentPosition++}); + } + CurrentCluster++; + continue; + } + + // Three "!"s Represent a clone path. + FI->second.ClonePaths.push_back({}); + SmallVector ClonePath; + S.split(ClonePath, ' '); + + for (auto BBIDStr : ClonePath) { + unsigned long long BBID = 0; + if (getAsUnsignedInteger(BBIDStr, 10, BBID)) + return createProfileParseError(Twine("unsigned integer expected: '") + + BBIDStr + "'"); + FI->second.ClonePaths.back().push_back(BBID); } } return Error::success(); diff --git a/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll b/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll --- a/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll @@ -3,25 +3,38 @@ ; RUN: echo '!!1 4' >> %t1 ; RUN: echo '!!1' >> %t1 ; RUN: not --crash llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t1 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR1 -; CHECK-ERROR1: LLVM ERROR: Invalid profile {{.*}} at line 3: Duplicate basic block id found '1'. +; CHECK-ERROR1: LLVM ERROR: invalid profile {{.*}} at line 3: duplicate basic block id found '1' ; RUN: echo '!dummy1' > %t2 ; RUN: echo '!!4 0' >> %t2 ; RUN: not --crash llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t2 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR2 -; CHECK-ERROR2: LLVM ERROR: Invalid profile {{.*}} at line 2: Entry BB (0) does not begin a cluster. +; CHECK-ERROR2: LLVM ERROR: invalid profile {{.*}} at line 2: entry BB (0) does not begin a cluster ; RUN: echo '!dummy1' > %t3 ; RUN: echo '!!-1' >> %t3 ; RUN: not --crash llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t3 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR3 -; CHECK-ERROR3: LLVM ERROR: Invalid profile {{.*}} at line 2: Unsigned integer expected: '-1'. +; CHECK-ERROR3: LLVM ERROR: invalid profile {{.*}} at line 2: unable to parse BB id: '-1': unsigned integer expected ; RUN: echo '!dummy1 /path/to/filename' > %t4 ; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t4 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR4 -; CHECK-ERROR4: LLVM ERROR: Invalid profile {{.*}} at line 1: Unknown string found: '/path/to/filename'. +; CHECK-ERROR4: LLVM ERROR: invalid profile {{.*}} at line 1: unknown string found: '/path/to/filename' ; RUN: echo '!dummy2 M=test_dir/test_file' > %t5 ; RUN: echo '!dummy2 M=test_dir/test_file' >> %t5 ; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t5 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR5 -; CHECK-ERROR5: LLVM ERROR: Invalid profile {{.*}} at line 2: Duplicate profile for function 'dummy2'. -; RUN: echo '!dummy1 M=' >> %t6 +; CHECK-ERROR5: LLVM ERROR: invalid profile {{.*}} at line 2: duplicate profile for function 'dummy2' +; RUN: echo '!dummy1 M=' > %t6 ; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t6 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR6 -; CHECK-ERROR6: LLVM ERROR: Invalid profile {{.*}} at line 1: Empty module name specifier. +; CHECK-ERROR6: LLVM ERROR: invalid profile {{.*}} at line 1: empty module name specifier +; RUN: echo '!dummy1' > %t7 +; RUN: echo '!!0 1.1.1' >> %t7 +; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t7 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR7 +; CHECK-ERROR7: LLVM ERROR: invalid profile {{.*}} at line 2: unable to parse basic block id: '1.1.1' +; RUN: echo '!dummy1' > %t8 +; RUN: echo '!!0 1.a' >> %t8 +; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t8 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR8 +; CHECK-ERROR8: LLVM ERROR: invalid profile {{.*}} at line 2: unable to parse clone id: 'a' +; RUN: echo '!dummy1' > %t9 +; RUN: echo '!!0 1' >> %t9 +; RUN: echo '!!!1 2.1' >> %t9 +; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t9 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR9 +; CHECK-ERROR9: LLVM ERROR: invalid profile {{.*}} at line 3: unsigned integer expected: '2.1' define i32 @dummy1(i32 %x, i32 %y, i32 %z) { entry: