Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -4000,10 +4000,10 @@ """"""""""""" ``DICompileUnit`` nodes represent a compile unit. The ``enums:``, -``retainedTypes:``, ``subprograms:``, ``globals:``, ``imports:`` and ``macros:`` -fields are tuples containing the debug info to be emitted along with the compile -unit, regardless of code optimizations (some nodes are only emitted if there are -references to them from instructions). +``retainedTypes:``, ``subprograms:``, ``globals:``, ``imports:``, ``macros:`` +and ``debugInfoForProfiling:`` fields are tuples containing the debug info to be +emitted along with the compile unit, regardless of code optimizations (some +nodes are only emitted if there are references to them from instructions). .. code-block:: text Index: include/llvm/IR/DebugInfoMetadata.h =================================================================== --- include/llvm/IR/DebugInfoMetadata.h +++ include/llvm/IR/DebugInfoMetadata.h @@ -1315,10 +1315,48 @@ /// /// DWARF discriminators distinguish identical file locations between /// instructions that are on different basic blocks. + /// + /// There are 3 components stored in discriminator, from lower bits: + /// + /// Base discriminator: assigned by AddDiscriminators pass to identify IRs + /// that are defined by the same source line, but + /// different basic blocks. + /// Duplication factor: assigned by optimizations that will scale down + /// the execution frequency of the original IR. + /// Copy Identifier: assigned by optimizations that clones the IR. + /// Each copy of the IR will be assigned an identifier. + /// + /// Encoding: + /// + /// The above 3 components are encoded into a 32bit unsigned integer in + /// order. If the lowest bit is 1, the current component is empty, and the + /// next component will start in the next bit. Otherwise, the the current + /// component is non-empty, and its content starts in the next bit. The + /// length of each components is either 5 bit or 12 bit: if the 7th bit + /// is 0, the bit 2~6 (5 bits) are used to represent the component; if the + /// 7th bit is 1, the bit 2~6 (5 bits) and 8~14 (7 bits) are combined to + /// represent the component. + inline unsigned getDiscriminator() const; /// Returns a new DILocation with updated \p Discriminator. - inline DILocation *cloneWithDiscriminator(unsigned Discriminator) const; + inline const DILocation *cloneWithDiscriminator(unsigned Discriminator) const; + + /// Returns a new DILocation with updated base discriminator \p BD. + inline const DILocation *setBaseDiscriminator(unsigned BD) const; + + /// Returns the duplication factor stored in the discriminator. + inline unsigned getDuplicationFactor() const; + + /// Returns the copy identifier stored in the discriminator. + inline unsigned getCopyIdentifier() const; + + /// Returns the base discriminator stored in the discriminator. + inline unsigned getBaseDiscriminator() const; + + /// Returns a new DILocation with duplication factor \p DF encoded in the + /// discriminator. + inline const DILocation *cloneWithDuplicationFactor(unsigned DF) const; /// When two instructions are combined into a single instruction we also /// need to combine the original locations into a single location. @@ -1351,6 +1389,28 @@ static bool classof(const Metadata *MD) { return MD->getMetadataID() == DILocationKind; } + + /// With a give unsigned int \p U, use up to 13 bits to represent it. + /// old_bit 1~5 --> new_bit 1~5 + /// old_bit 6~12 --> new_bit 7~13 + /// new_bit_6 is 0 if higher bits (7~13) are all 0 + static unsigned getPrefixEncodingFromUnsigned(unsigned U) { + U &= 0xfff; + return U > 0x1f ? (((U & 0xfe0) << 1) | (U & 0x1f) | 0x20) : U; + } + + /// Reverse transformation as getPrefixEncodingFromUnsigned. + static unsigned getUnsignedFromPrefixEncoding(unsigned U) { + return (U & 0x20) ? (((U >> 1) & 0xfe0) | (U & 0x1f)) : (U & 0x1f); + } + + /// Returns the next component stored in discriminator. + static unsigned getNextComponentInDiscriminator(unsigned D) { + if ((D & 1) == 0) + return D >> ((D & 0x40) ? 14 : 7); + else + return D >> 1; + } }; /// Subprogram description. @@ -1684,7 +1744,8 @@ return 0; } -DILocation *DILocation::cloneWithDiscriminator(unsigned Discriminator) const { +const DILocation * +DILocation::cloneWithDiscriminator(unsigned Discriminator) const { DIScope *Scope = getScope(); // Skip all parent DILexicalBlockFile that already have a discriminator // assigned. We do not want to have nested DILexicalBlockFiles that have @@ -1700,6 +1761,52 @@ getInlinedAt()); } +unsigned DILocation::getBaseDiscriminator() const { + unsigned D = getDiscriminator(); + if ((D & 1) == 0) + return getUnsignedFromPrefixEncoding(D >> 1); + else + return 0; +} + +unsigned DILocation::getDuplicationFactor() const { + unsigned D = getDiscriminator(); + D = getNextComponentInDiscriminator(D); + if (D == 0 || (D & 1)) + return 1; + else + return getUnsignedFromPrefixEncoding(D >> 1); +} + +unsigned DILocation::getCopyIdentifier() const { + return getUnsignedFromPrefixEncoding(getNextComponentInDiscriminator( + getNextComponentInDiscriminator(getDiscriminator()))); +} + +const DILocation *DILocation::setBaseDiscriminator(unsigned D) const { + if (D == 0) + return this; + else + return cloneWithDiscriminator(getPrefixEncodingFromUnsigned(D) << 1); +} + +const DILocation *DILocation::cloneWithDuplicationFactor(unsigned DF) const { + DF *= getDuplicationFactor(); + if (DF <= 1) + return this; + + unsigned BD = getBaseDiscriminator(); + unsigned CI = getCopyIdentifier() << (DF > 0x1f ? 14 : 7); + unsigned D = CI | (getPrefixEncodingFromUnsigned(DF) << 1); + + if (BD == 0) + D = (D << 1) | 1; + else + D = (D << (BD > 0x1f ? 14 : 7)) | (getPrefixEncodingFromUnsigned(BD) << 1); + + return cloneWithDiscriminator(D); +} + class DINamespace : public DIScope { friend class LLVMContextImpl; friend class MDNode; Index: include/llvm/IR/Function.h =================================================================== --- include/llvm/IR/Function.h +++ include/llvm/IR/Function.h @@ -671,6 +671,9 @@ /// to \a DISubprogram. DISubprogram *getSubprogram() const; + /// Returns true if we should emit debug info for profiling. + bool isDebugInfoForProfiling() const; + private: void allocHungoffUselist(); template void setHungoffOperand(Constant *C); Index: lib/IR/Metadata.cpp =================================================================== --- lib/IR/Metadata.cpp +++ lib/IR/Metadata.cpp @@ -1459,6 +1459,15 @@ return cast_or_null(getMetadata(LLVMContext::MD_dbg)); } +bool Function::isDebugInfoForProfiling() const { + if (DISubprogram *SP = getSubprogram()) { + if (DICompileUnit *CU = SP->getUnit()) { + return CU->getDebugInfoForProfiling(); + } + } + return false; +} + void GlobalVariable::addDebugInfo(DIGlobalVariableExpression *GV) { addMetadata(LLVMContext::MD_dbg, *GV); } Index: lib/Transforms/Utils/AddDiscriminators.cpp =================================================================== --- lib/Transforms/Utils/AddDiscriminators.cpp +++ lib/Transforms/Utils/AddDiscriminators.cpp @@ -190,8 +190,8 @@ // discriminator is needed to distinguish both instructions. // Only the lowest 7 bits are used to represent a discriminator to fit // it in 1 byte ULEB128 representation. - unsigned Discriminator = (R.second ? ++LDM[L] : LDM[L]) & 0x7f; - I.setDebugLoc(DIL->cloneWithDiscriminator(Discriminator)); + unsigned Discriminator = R.second ? ++LDM[L] : LDM[L]; + I.setDebugLoc(DIL->setBaseDiscriminator(Discriminator)); DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":" << DIL->getColumn() << ":" << Discriminator << " " << I << "\n"); @@ -216,8 +216,8 @@ Location L = std::make_pair(CurrentDIL->getFilename(), CurrentDIL->getLine()); if (!CallLocations.insert(L).second) { - Current->setDebugLoc( - CurrentDIL->cloneWithDiscriminator((++LDM[L]) & 0x7f)); + unsigned Discriminator = ++LDM[L]; + Current->setDebugLoc(CurrentDIL->setBaseDiscriminator(Discriminator)); Changed = true; } } Index: lib/Transforms/Utils/LoopUnroll.cpp =================================================================== --- lib/Transforms/Utils/LoopUnroll.cpp +++ lib/Transforms/Utils/LoopUnroll.cpp @@ -27,6 +27,7 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" @@ -462,6 +463,12 @@ for (Loop *SubLoop : *L) LoopsToSimplify.insert(SubLoop); + if (Header->getParent()->isDebugInfoForProfiling()) + for (BasicBlock *BB : L->getBlocks()) + for (Instruction &I : *BB) + if (const DILocation *DIL = I.getDebugLoc()) + I.setDebugLoc(DIL->cloneWithDuplicationFactor(Count)); + for (unsigned It = 1; It != Count; ++It) { std::vector NewBlocks; SmallDenseMap NewLoops; Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -616,6 +616,10 @@ /// vector of instructions. void addMetadata(ArrayRef To, Instruction *From); + /// \brief Set the debug location in the builder using the debug location in + /// the instruction. + void setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr); + /// This is a helper class for maintaining vectorization state. It's used for /// mapping values from the original loop to their corresponding values in /// the new loop. Two mappings are maintained: one for vectorized values and @@ -865,12 +869,14 @@ return I; } -/// \brief Set the debug location in the builder using the debug location in the -/// instruction. -static void setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr) { - if (const Instruction *Inst = dyn_cast_or_null(Ptr)) - B.SetCurrentDebugLocation(Inst->getDebugLoc()); - else +void InnerLoopVectorizer::setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr) { + if (const Instruction *Inst = dyn_cast_or_null(Ptr)) { + const DILocation *DIL = Inst->getDebugLoc(); + if (DIL && Inst->getFunction()->isDebugInfoForProfiling()) + B.SetCurrentDebugLocation(DIL->cloneWithDuplicationFactor(UF * VF)); + else + B.SetCurrentDebugLocation(DIL); + } else B.SetCurrentDebugLocation(DebugLoc()); } Index: test/Transforms/AddDiscriminators/basic.ll =================================================================== --- test/Transforms/AddDiscriminators/basic.ll +++ test/Transforms/AddDiscriminators/basic.ll @@ -58,5 +58,5 @@ ; CHECK: ![[FOO:[0-9]+]] = distinct !DISubprogram(name: "foo" ; CHECK: ![[BLOCK:[0-9]+]] = distinct !DILexicalBlock(scope: ![[FOO]],{{.*}} line: 3) ; CHECK: ![[THEN]] = !DILocation(line: 3, scope: ![[BLOCKFILE:[0-9]+]]) -; CHECK: ![[BLOCKFILE]] = !DILexicalBlockFile(scope: ![[BLOCK]],{{.*}} discriminator: 1) +; CHECK: ![[BLOCKFILE]] = !DILexicalBlockFile(scope: ![[BLOCK]],{{.*}} discriminator: 2) ; CHECK: ![[END]] = !DILocation(line: 4, scope: ![[FOO]]) Index: test/Transforms/AddDiscriminators/call-nested.ll =================================================================== --- test/Transforms/AddDiscriminators/call-nested.ll +++ test/Transforms/AddDiscriminators/call-nested.ll @@ -47,4 +47,4 @@ !14 = !DILocation(line: 4, column: 3, scope: !4) ; CHECK: ![[CALL2]] = !DILocation(line: 4, column: 10, scope: ![[CALL2BLOCK:[0-9]+]]) -; CHECK: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 1) +; CHECK: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 2) Index: test/Transforms/AddDiscriminators/call.ll =================================================================== --- test/Transforms/AddDiscriminators/call.ll +++ test/Transforms/AddDiscriminators/call.ll @@ -5,7 +5,7 @@ ; #1 void bar(); ; #2 ; #3 void foo() { -; #4 bar();bar()/*discriminator 1*/;bar()/*discriminator 2*/; +; #4 bar();bar()/*discriminator 2*/;bar()/*discriminator 4*/; ; #5 } ; Function Attrs: uwtable @@ -49,6 +49,6 @@ !13 = !DILocation(line: 5, column: 1, scope: !4) ; CHECK: ![[CALL1]] = !DILocation(line: 4, column: 9, scope: ![[CALL1BLOCK:[0-9]+]]) -; CHECK: ![[CALL1BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 1) +; CHECK: ![[CALL1BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 2) ; CHECK: ![[CALL2]] = !DILocation(line: 4, column: 15, scope: ![[CALL2BLOCK:[0-9]+]]) -; CHECK: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 2) +; CHECK: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 4) Index: test/Transforms/AddDiscriminators/diamond.ll =================================================================== --- test/Transforms/AddDiscriminators/diamond.ll +++ test/Transforms/AddDiscriminators/diamond.ll @@ -10,7 +10,7 @@ ; #6 } ; bar(5): discriminator 0 -; bar(3): discriminator 1 +; bar(3): discriminator 2 ; Function Attrs: uwtable define void @_Z3fooi(i32 %i) #0 !dbg !4 { @@ -69,4 +69,4 @@ !20 = !DILocation(line: 6, column: 1, scope: !4) ; CHECK: ![[ELSE]] = !DILocation(line: 5, column: 18, scope: ![[ELSEBLOCK:[0-9]+]]) -; CHECK: ![[ELSEBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 1) +; CHECK: ![[ELSEBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 2) Index: test/Transforms/AddDiscriminators/first-only.ll =================================================================== --- test/Transforms/AddDiscriminators/first-only.ll +++ test/Transforms/AddDiscriminators/first-only.ll @@ -69,7 +69,7 @@ !12 = !DILocation(line: 3, scope: !13) !13 = distinct !DILexicalBlock(line: 3, column: 0, file: !1, scope: !11) -; CHECK: !DILexicalBlockFile(scope: ![[BLOCK2:[0-9]+]],{{.*}} discriminator: 1) +; CHECK: !DILexicalBlockFile(scope: ![[BLOCK2:[0-9]+]],{{.*}} discriminator: 2) !14 = !DILocation(line: 4, scope: !13) ; CHECK: ![[BLOCK2]] = distinct !DILexicalBlock(scope: ![[BLOCK1]],{{.*}} line: 3) Index: test/Transforms/AddDiscriminators/inlined.ll =================================================================== --- test/Transforms/AddDiscriminators/inlined.ll +++ test/Transforms/AddDiscriminators/inlined.ll @@ -62,8 +62,8 @@ !12 = distinct !DISubprogram(name: "g", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, variables: !2) !13 = distinct !DILocation(line: 1, column: 17, scope: !14) ; CHECK: ![[BF:.*]] = !DILexicalBlockFile(scope: ![[LB1:[0-9]+]], -; CHECK-SAME: discriminator: 1) -!14 = !DILexicalBlockFile(scope: !15, file: !1, discriminator: 1) +; CHECK-SAME: discriminator: 2) +!14 = !DILexicalBlockFile(scope: !15, file: !1, discriminator: 2) ; CHECK: ![[LB1]] = distinct !DILexicalBlock(scope: ![[LB2:[0-9]+]], ; CHECK-SAME: line: 1, column: 16) !15 = distinct !DILexicalBlock(scope: !16, file: !1, line: 1, column: 16) Index: test/Transforms/AddDiscriminators/multiple.ll =================================================================== --- test/Transforms/AddDiscriminators/multiple.ll +++ test/Transforms/AddDiscriminators/multiple.ll @@ -67,6 +67,6 @@ !12 = !DILocation(line: 4, scope: !4) ; CHECK: ![[THEN]] = !DILocation(line: 3, scope: ![[THENBLOCK:[0-9]+]]) -; CHECK: ![[THENBLOCK]] = !DILexicalBlockFile(scope: ![[SCOPE:[0-9]+]],{{.*}} discriminator: 1) +; CHECK: ![[THENBLOCK]] = !DILexicalBlockFile(scope: ![[SCOPE:[0-9]+]],{{.*}} discriminator: 2) ; CHECK: ![[ELSE]] = !DILocation(line: 3, scope: ![[ELSEBLOCK:[0-9]+]]) -; CHECK: ![[ELSEBLOCK]] = !DILexicalBlockFile(scope: ![[SCOPE]],{{.*}} discriminator: 2) +; CHECK: ![[ELSEBLOCK]] = !DILexicalBlockFile(scope: ![[SCOPE]],{{.*}} discriminator: 4) Index: test/Transforms/AddDiscriminators/oneline.ll =================================================================== --- test/Transforms/AddDiscriminators/oneline.ll +++ test/Transforms/AddDiscriminators/oneline.ll @@ -7,9 +7,9 @@ ; #3 } ; i == 3: discriminator 0 -; i == 5: discriminator 1 -; return 100: discriminator 2 -; return 99: discriminator 3 +; i == 5: discriminator 2 +; return 100: discriminator 4 +; return 99: discriminator 6 define i32 @_Z3fooi(i32 %i) #0 !dbg !4 { %1 = alloca i32, align 4 @@ -91,11 +91,11 @@ ; CHECK: ![[F:.*]] = distinct !DISubprogram(name: "foo", ; CHECK: ![[IF:.*]] = distinct !DILexicalBlock(scope: ![[F]],{{.*}}line: 2, column: 7) ; CHECK: ![[THEN1]] = !DILocation(line: 2, column: 17, scope: ![[THENBLOCK:[0-9]+]]) -; CHECK: ![[THENBLOCK]] = !DILexicalBlockFile(scope: ![[IF]],{{.*}} discriminator: 1) +; CHECK: ![[THENBLOCK]] = !DILexicalBlockFile(scope: ![[IF]],{{.*}} discriminator: 2) ; CHECK: ![[THEN2]] = !DILocation(line: 2, column: 19, scope: ![[THENBLOCK]]) ; CHECK: ![[THEN3]] = !DILocation(line: 2, column: 7, scope: ![[BRBLOCK:[0-9]+]]) -; CHECK: ![[BRBLOCK]] = !DILexicalBlockFile(scope: ![[F]],{{.*}} discriminator: 1) +; CHECK: ![[BRBLOCK]] = !DILexicalBlockFile(scope: ![[F]],{{.*}} discriminator: 2) ; CHECK: ![[ELSE]] = !DILocation(line: 2, column: 25, scope: ![[ELSEBLOCK:[0-9]+]]) -; CHECK: ![[ELSEBLOCK]] = !DILexicalBlockFile(scope: ![[IF]],{{.*}} discriminator: 2) +; CHECK: ![[ELSEBLOCK]] = !DILexicalBlockFile(scope: ![[IF]],{{.*}} discriminator: 4) ; CHECK: ![[COMBINE]] = !DILocation(line: 2, column: 42, scope: ![[COMBINEBLOCK:[0-9]+]]) -; CHECK: ![[COMBINEBLOCK]] = !DILexicalBlockFile(scope: ![[IF]],{{.*}} discriminator: 3) +; CHECK: ![[COMBINEBLOCK]] = !DILexicalBlockFile(scope: ![[IF]],{{.*}} discriminator: 6) Index: test/Transforms/LoopVectorize/discriminator.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVectorize/discriminator.ll @@ -0,0 +1,70 @@ +; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s | FileCheck --check-prefix=LOOPVEC_4_1 %s +; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-interleave=3 < %s | FileCheck --check-prefix=LOOPVEC_2_3 %s +; RUN: opt -S -loop-unroll -unroll-count=5 < %s | FileCheck --check-prefix=LOOPUNROLL_5 %s +; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=4 -loop-unroll -unroll-count=2 < %s | FileCheck --check-prefix=LOOPVEC_UNROLL %s + +; Test if vectorization/unroll factor is recorded in discriminator. +; +; Original source code: +; 1 int *a; +; 2 int *b; +; 3 +; 4 void foo() { +; 5 for (int i = 0; i < 4096; i++) +; 6 a[i] += b[i]; +; 7 } + +@a = local_unnamed_addr global i32* null, align 8 +@b = local_unnamed_addr global i32* null, align 8 + +define void @_Z3foov() local_unnamed_addr #0 !dbg !6 { + %1 = load i32*, i32** @b, align 8, !dbg !8, !tbaa !9 + %2 = load i32*, i32** @a, align 8, !dbg !13, !tbaa !9 + br label %3, !dbg !14 + +;