Index: llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h =================================================================== --- llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h +++ llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h @@ -71,6 +71,7 @@ using PredRangeT = pred_range; using SuccRangeT = succ_range; static Function &getFunction(Function &F) { return F; } + static bool shouldIgnoreInst(const Instruction &Inst) { return false; } static const BasicBlock *getEntryBB(const Function *F) { return &F->getEntryBlock(); } @@ -133,7 +134,9 @@ SuccRangeT getSuccessors(BasicBlockT *BB) { return afdo_detail::IRTraits::getSuccessors(BB); } - + bool shouldIgnoreInst(const InstructionT &Inst) { + return afdo_detail::IRTraits::shouldIgnoreInst(Inst); + } unsigned getFunctionLoc(FunctionT &Func); virtual ErrorOr getInstWeight(const InstructionT &Inst); ErrorOr getInstWeightImpl(const InstructionT &Inst); @@ -308,6 +311,8 @@ const FunctionSamples *FS = findFunctionSamples(Inst); if (!FS) return std::error_code(); + if (shouldIgnoreInst(Inst)) + return std::error_code(); const DebugLoc &DLoc = Inst.getDebugLoc(); if (!DLoc) Index: llvm/lib/CodeGen/MIRFSDiscriminator.cpp =================================================================== --- llvm/lib/CodeGen/MIRFSDiscriminator.cpp +++ llvm/lib/CodeGen/MIRFSDiscriminator.cpp @@ -30,6 +30,13 @@ #define DEBUG_TYPE "mirfs-discriminators" +// TODO(xur): Remove this option and related code once we make true as the +// default. +cl::opt ImprovedFSDiscriminator( + "improved-fs-discriminator", cl::Hidden, cl::init(false), + cl::desc("New FS discriminators encoding (incompatible with the original " + "encoding)")); + char MIRAddFSDiscriminators::ID = 0; INITIALIZE_PASS(MIRAddFSDiscriminators, DEBUG_TYPE, @@ -42,11 +49,12 @@ return new MIRAddFSDiscriminators(P); } +// TODO(xur): Remove this once we switch to ImprovedFSDiscriminator. // Compute a hash value using debug line number, and the line numbers from the // inline stack. -static uint64_t getCallStackHash(const MachineBasicBlock &BB, - const MachineInstr &MI, - const DILocation *DIL) { +static uint64_t getCallStackHashV0(const MachineBasicBlock &BB, + const MachineInstr &MI, + const DILocation *DIL) { auto updateHash = [](const StringRef &Str) -> uint64_t { if (Str.empty()) return 0; @@ -62,6 +70,20 @@ return Ret; } +static uint64_t getCallStackHash(const DILocation *DIL) { + auto updateHash = [](const StringRef &Str) -> uint64_t { + if (Str.empty()) + return 0; + return MD5Hash(Str); + }; + uint64_t Ret = 0; + for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) { + Ret ^= updateHash(std::to_string(DIL->getLine())); + Ret ^= updateHash(DIL->getScope()->getSubprogram()->getLinkageName()); + } + return Ret; +} + // Traverse the CFG and assign FD discriminators. If two instructions // have the same lineno and discriminator, but residing in different BBs, // the latter instruction will get a new discriminator value. The new @@ -74,7 +96,8 @@ return false; bool Changed = false; - using LocationDiscriminator = std::tuple; + using LocationDiscriminator = + std::tuple; using BBSet = DenseSet; using LocationDiscriminatorBBMap = DenseMap; using LocationDiscriminatorCurrPassMap = @@ -84,7 +107,11 @@ LocationDiscriminatorCurrPassMap LDCM; // Mask of discriminators before this pass. - unsigned BitMaskBefore = getN1Bits(LowBit); + // TODO(xur): simplify this once we switch to ImprovedFSDiscriminator. + unsigned LowBitTemp = LowBit; + if (ImprovedFSDiscriminator) + LowBitTemp -= 1; + unsigned BitMaskBefore = getN1Bits(LowBitTemp); // Mask of discriminators including this pass. unsigned BitMaskNow = getN1Bits(HighBit); // Mask of discriminators for bits specific to this pass. @@ -92,9 +119,28 @@ unsigned NumNewD = 0; LLVM_DEBUG(dbgs() << "MIRAddFSDiscriminators working on Func: " - << MF.getFunction().getName() << "\n"); + << MF.getFunction().getName() << " Highbit=" << HighBit + << "\n"); + + auto BBSize = [](const MachineBasicBlock &BB) { + int Size = 0; + for (const MachineInstr &I : BB) { + if (ImprovedFSDiscriminator && I.isMetaInstruction()) + continue; + Size++; + } + return Size; + }; + for (MachineBasicBlock &BB : MF) { + uint64_t BBSizeHash = 0; + if (ImprovedFSDiscriminator) + BBSizeHash = MD5Hash(std::to_string(BBSize(BB))); + for (MachineInstr &I : BB) { + if (ImprovedFSDiscriminator && I.isMetaInstruction()) { + continue; + } const DILocation *DIL = I.getDebugLoc().get(); if (!DIL) continue; @@ -102,7 +148,12 @@ if (LineNo == 0) continue; unsigned Discriminator = DIL->getDiscriminator(); - LocationDiscriminator LD{DIL->getFilename(), LineNo, Discriminator}; + uint64_t CallStackHashVal = 0; + if (ImprovedFSDiscriminator) + CallStackHashVal = getCallStackHash(DIL); + + LocationDiscriminator LD{DIL->getFilename(), LineNo, Discriminator, + CallStackHashVal}; auto &BBMap = LDBM[LD]; auto R = BBMap.insert(&BB); if (BBMap.size() == 1) @@ -111,7 +162,10 @@ unsigned DiscriminatorCurrPass; DiscriminatorCurrPass = R.second ? ++LDCM[LD] : LDCM[LD]; DiscriminatorCurrPass = DiscriminatorCurrPass << LowBit; - DiscriminatorCurrPass += getCallStackHash(BB, I, DIL); + if (ImprovedFSDiscriminator) + DiscriminatorCurrPass += BBSizeHash; + else + DiscriminatorCurrPass += getCallStackHashV0(BB, I, DIL); DiscriminatorCurrPass &= BitMaskThisPass; unsigned NewD = Discriminator | DiscriminatorCurrPass; const auto *const NewDIL = DIL->cloneWithDiscriminator(NewD); Index: llvm/lib/CodeGen/MIRSampleProfile.cpp =================================================================== --- llvm/lib/CodeGen/MIRSampleProfile.cpp +++ llvm/lib/CodeGen/MIRSampleProfile.cpp @@ -58,6 +58,7 @@ cl::init(false), cl::desc("View BFI after MIR loader")); +extern cl::opt ImprovedFSDiscriminator; char MIRProfileLoaderPass::ID = 0; INITIALIZE_PASS_BEGIN(MIRProfileLoaderPass, DEBUG_TYPE, @@ -107,6 +108,9 @@ using PredRangeT = iterator_range::iterator>; using SuccRangeT = iterator_range::iterator>; static Function &getFunction(MachineFunction &F) { return F.getFunction(); } + static bool shouldIgnoreInst(const MachineInstr &Inst) { + return ImprovedFSDiscriminator && Inst.isMetaInstruction(); + } static const MachineBasicBlock *getEntryBB(const MachineFunction *F) { return GraphTraits::getEntryNode(F); } Index: llvm/test/CodeGen/X86/Inputs/fsloader_v1.afdo =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/Inputs/fsloader_v1.afdo @@ -0,0 +1,35 @@ +work:42380966:1346190 + 1: 1246499 + 5: 1246499 +foo:28798256:4267 + 0: 4267 + 2.1: 255999 + 4: 264627 bar:250018 + 4.1792: 269485 bar:278102 + 4.6656: 280297 bar:280933 + 4.6912: 278916 bar:267752 + 5: 264627 + 5.1792: 269485 + 5.6656: 260670 + 5.6912: 278916 + 6: 11541 + 6.6912: 278916 work:284547 + 6.7168: 260670 work:249428 + 6.7424: 11541 + 7: 272442 + 7.6912: 283590 + 7.7168: 234082 + 7.7424: 279149 + 8: 11541 + 8.14848: 283590 work:305061 + 8.15104: 279149 work:281368 + 8.15360: 234082 work:225786 + 10: 4050 +bar:9504180:1076805 + 2: 1056020 + 3: 1056020 +main:20360:0 + 0: 0 + 2.1: 4045 + 3: 4156 foo:4267 + 5: 0 Index: llvm/test/CodeGen/X86/fsafdo_test1.ll =================================================================== --- llvm/test/CodeGen/X86/fsafdo_test1.ll +++ llvm/test/CodeGen/X86/fsafdo_test1.ll @@ -1,10 +1,13 @@ -; RUN: llc -enable-fs-discriminator < %s | FileCheck %s +; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=false < %s | FileCheck %s --check-prefix=V0 +; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=true < %s | FileCheck %s --check-prefix=V1 ; ; Check that fs-afdo discriminators are generated. ; CHECK: .loc 1 7 3 is_stmt 0 discriminator 2 # foo.c:7:3 ; ChECK: .loc 1 9 5 is_stmt 1 discriminator 2 # foo.c:9:5 -; CHECK: .loc 1 9 5 is_stmt 0 discriminator 11266 # foo.c:9:5 -; CHECK: .loc 1 7 3 is_stmt 1 discriminator 11266 # foo.c:7:3 +; V0: .loc 1 9 5 is_stmt 0 discriminator 11266 # foo.c:9:5 +; V0: .loc 1 7 3 is_stmt 1 discriminator 11266 # foo.c:7:3 +; V1: .loc 1 9 5 is_stmt 0 discriminator 2818 # foo.c:9:5 +; V1: .loc 1 7 3 is_stmt 1 discriminator 2818 # foo.c:7:3 ; Check that variable __llvm_fs_discriminator__ is generated. ; CHECK: .type __llvm_fs_discriminator__,@object # @__llvm_fs_discriminator__ ; CHECK: .section .rodata,"a",@progbits Index: llvm/test/CodeGen/X86/fsafdo_test2.ll =================================================================== --- llvm/test/CodeGen/X86/fsafdo_test2.ll +++ llvm/test/CodeGen/X86/fsafdo_test2.ll @@ -1,7 +1,10 @@ ; REQUIRES: asserts -; RUN: llc -enable-fs-discriminator < %s | FileCheck %s -; RUN: llvm-profdata merge --sample -profile-isfs -o %t.afdo %S/Inputs/fsloader.afdo -; RUN: llc -enable-fs-discriminator -fs-profile-file=%t.afdo -show-fs-branchprob -disable-ra-fsprofile-loader=false -disable-layout-fsprofile-loader=false < %s 2>&1 | FileCheck %s --check-prefix=LOADER +; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=false < %s | FileCheck %s --check-prefixes=V0,V01 +; RUN: llvm-profdata merge --sample -profile-isfs -o %t0.afdo %S/Inputs/fsloader.afdo +; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=false -fs-profile-file=%t0.afdo -show-fs-branchprob -disable-ra-fsprofile-loader=false -disable-layout-fsprofile-loader=false < %s 2>&1 | FileCheck %s --check-prefixes=LOADERV0,LOADER +; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=true < %s | FileCheck %s --check-prefixes=V1,V01 +; RUN: llvm-profdata merge --sample -profile-isfs -o %t1.afdo %S/Inputs/fsloader_v1.afdo +; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=true -fs-profile-file=%t1.afdo -show-fs-branchprob -disable-ra-fsprofile-loader=false -disable-layout-fsprofile-loader=false < %s 2>&1 | FileCheck %s --check-prefixes=LOADERV1,LOADER ; ;; ;; C source code for the test (compiler at -O3): @@ -41,18 +44,21 @@ ;; } ;; ;; Check that fs-afdo discriminators are generated. -; CHECK: .loc 1 23 9 is_stmt 0 discriminator 1 # unroll.c:23:9 -; CHECK: .loc 1 23 9 is_stmt 0 discriminator 3585 # unroll.c:23:9 -; CHECK: .loc 1 23 9 is_stmt 0 discriminator 8705 # unroll.c:23:9 -; CHECK: .loc 1 23 9 is_stmt 0 discriminator 4097 # unroll.c:23:9 +; V01: .loc 1 23 9 is_stmt 0 discriminator 1 # unroll.c:23:9 +; V0: .loc 1 23 9 is_stmt 0 discriminator 3585 # unroll.c:23:9 +; V0: .loc 1 23 9 is_stmt 0 discriminator 8705 # unroll.c:23:9 +; V0: .loc 1 23 9 is_stmt 0 discriminator 4097 # unroll.c:23:9 +; V1: .loc 1 23 9 is_stmt 0 discriminator 6913 # unroll.c:23:9 +; V1: .loc 1 23 9 is_stmt 0 discriminator 7169 # unroll.c:23:9 +; V1: .loc 1 23 9 is_stmt 0 discriminator 7425 # unroll.c:23:9 ;; ;; Check that variable __llvm_fs_discriminator__ is generated. -; CHECK: .type __llvm_fs_discriminator__,@object # @__llvm_fs_discriminator__ -; CHECK: .section .rodata,"a",@progbits -; CHECK: .weak __llvm_fs_discriminator__ -; CHECK: __llvm_fs_discriminator__: -; CHECK: .byte 1 -; CHECK: .size __llvm_fs_discriminator__, 1 +; V01: .type __llvm_fs_discriminator__,@object # @__llvm_fs_discriminator__ +; V01: .section .rodata,"a",@progbits +; V01: .weak __llvm_fs_discriminator__ +; V01: __llvm_fs_discriminator__: +; V01: .byte 1 +; V01: .size __llvm_fs_discriminator__, 1 ;; Check that new branch probs are generated. ; LOADER: Set branch fs prob: MBB (1 -> 3): unroll.c:22:11-->unroll.c:24:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x7aca7894 / 0x80000000 = 95.93% @@ -63,16 +69,19 @@ ; LOADER: Set branch fs prob: MBB (5 -> 7): unroll.c:22:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x7de3eed2 / 0x80000000 = 98.35% ; LOADER: Set branch fs prob: MBB (8 -> 10): unroll.c:24:11-->unroll.c:22:11 W=283590 0x30000000 / 0x80000000 = 37.50% --> 0x00000000 / 0x80000000 = 0.00% ; LOADER: Set branch fs prob: MBB (8 -> 9): unroll.c:24:11 W=283590 0x50000000 / 0x80000000 = 62.50% --> 0x80000000 / 0x80000000 = 100.00% -; LOADER: Set branch fs prob: MBB (10 -> 12): unroll.c:22:11-->unroll.c:24:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x7aca7894 / 0x80000000 = 95.93% -; LOADER: Set branch fs prob: MBB (10 -> 11): unroll.c:22:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x0535876c / 0x80000000 = 4.07% +; LOADERV0: Set branch fs prob: MBB (10 -> 12): unroll.c:22:11-->unroll.c:24:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x7aca7894 / 0x80000000 = 95.93% +; LOADERV1: Set branch fs prob: MBB (10 -> 12): unroll.c:22:11-->unroll.c:24:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x0a5856e1 / 0x80000000 = 8.08% +; LOADERV0: Set branch fs prob: MBB (10 -> 11): unroll.c:22:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x0535876c / 0x80000000 = 4.07% +; LOADERV1: Set branch fs prob: MBB (10 -> 11): unroll.c:22:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x75a7a91f / 0x80000000 = 91.92% ; LOADER: Set branch fs prob: MBB (12 -> 14): unroll.c:24:11-->unroll.c:22:11 W=283590 0x30000000 / 0x80000000 = 37.50% --> 0x02012507 / 0x80000000 = 1.57% ; LOADER: Set branch fs prob: MBB (12 -> 13): unroll.c:24:11 W=283590 0x50000000 / 0x80000000 = 62.50% --> 0x7dfedaf9 / 0x80000000 = 98.43% -; LOADER: Set branch fs prob: MBB (14 -> 16): unroll.c:22:11-->unroll.c:24:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x0a5856e1 / 0x80000000 = 8.08% -; LOADER: Set branch fs prob: MBB (14 -> 15): unroll.c:22:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x75a7a91f / 0x80000000 = 91.92% +; LOADERV0: Set branch fs prob: MBB (14 -> 16): unroll.c:22:11-->unroll.c:24:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x0a5856e1 / 0x80000000 = 8.08% +; LOADERV1: Set branch fs prob: MBB (14 -> 16): unroll.c:22:11-->unroll.c:24:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x7aca7894 / 0x80000000 = 95.93% +; LOADERV0: Set branch fs prob: MBB (14 -> 15): unroll.c:22:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x75a7a91f / 0x80000000 = 91.92% +; LOADERV1: Set branch fs prob: MBB (14 -> 15): unroll.c:22:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x0535876c / 0x80000000 = 4.07% ; LOADER: Set branch fs prob: MBB (16 -> 18): unroll.c:24:11-->unroll.c:19:3 W=283590 0x30000000 / 0x80000000 = 37.50% --> 0x16588166 / 0x80000000 = 17.46% ; LOADER: Set branch fs prob: MBB (16 -> 17): unroll.c:24:11 W=283590 0x50000000 / 0x80000000 = 62.50% --> 0x69a77e9a / 0x80000000 = 82.54% - target triple = "x86_64-unknown-linux-gnu" @sum = dso_local local_unnamed_addr global i32 0, align 4 Index: llvm/test/CodeGen/X86/fsafdo_test3.ll =================================================================== --- llvm/test/CodeGen/X86/fsafdo_test3.ll +++ llvm/test/CodeGen/X86/fsafdo_test3.ll @@ -1,5 +1,7 @@ -; RUN: llvm-profdata merge --sample -profile-isfs -o %t.afdo %S/Inputs/fsloader.afdo -; RUN: llc -enable-fs-discriminator -fs-profile-file=%t.afdo -disable-ra-fsprofile-loader=false -disable-layout-fsprofile-loader=false -print-machine-bfi -print-bfi-func-name=foo -print-before=fs-profile-loader -stop-after=fs-profile-loader < %s 2>&1 | FileCheck %s --check-prefix=BFI +; RUN: llvm-profdata merge --sample -profile-isfs -o %t0.afdo %S/Inputs/fsloader.afdo +; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=false -fs-profile-file=%t0.afdo -disable-ra-fsprofile-loader=false -disable-layout-fsprofile-loader=false -print-machine-bfi -print-bfi-func-name=foo -print-before=fs-profile-loader -stop-after=fs-profile-loader < %s 2>&1 | FileCheck %s --check-prefixes=BFI,BFIV0 +; RUN: llvm-profdata merge --sample -profile-isfs -o %t1.afdo %S/Inputs/fsloader_v1.afdo +; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=true -fs-profile-file=%t1.afdo -disable-ra-fsprofile-loader=false -disable-layout-fsprofile-loader=false -print-machine-bfi -print-bfi-func-name=foo -print-before=fs-profile-loader -stop-after=fs-profile-loader < %s 2>&1 | FileCheck %s --check-prefixes=BFI,BFIV1 ; ;; ;; C source code for the test (compiler at -O3): @@ -63,7 +65,7 @@ ; ; BFI: # *** IR Dump Before SampleFDO loader in MIR (fs-profile-loader) ***: ; BFI: # End machine code for function foo. -; +; BFI-EMPTY: ; BFI: block-frequency-info: foo ; BFI: - BB0[entry]: float = 1.0, int = 8, count = 4268 ; BFI: - BB1[for.cond1.preheader]: float = 66.446, int = 531, count = 283289 @@ -75,11 +77,13 @@ ; BFI: - BB7[if.end.1]: float = 66.446, int = 531, count = 283289 ; BFI: - BB8[if.then7.1]: float = 66.446, int = 531, count = 283289 ; BFI: - BB9[if.end9.1]: float = 66.446, int = 531, count = 283289 -; BFI: - BB10[if.then.2]: float = 2.7041, int = 21, count = 11204 +; BFIV0: - BB10[if.then.2]: float = 2.7041, int = 21, count = 11204 +; BFIV1: - BB10[if.then.2]: float = 61.075, int = 488, count = 260348 ; BFI: - BB11[if.end.2]: float = 66.446, int = 531, count = 283289 ; BFI: - BB12[if.then7.2]: float = 65.405, int = 523, count = 279021 ; BFI: - BB13[if.end9.2]: float = 66.446, int = 531, count = 283289 -; BFI: - BB14[if.then.3]: float = 61.075, int = 488, count = 260348 +; BFIV0: - BB14[if.then.3]: float = 61.075, int = 488, count = 260348 +; BFIV1: - BB14[if.then.3]: float = 2.7041, int = 21, count = 11204 ; BFI: - BB15[if.end.3]: float = 66.446, int = 531, count = 283289 ; BFI: - BB16[if.then7.3]: float = 54.846, int = 438, count = 233673 ; BFI: - BB17[if.end9.3]: float = 66.446, int = 531, count = 283289 Index: llvm/test/CodeGen/X86/fsafdo_test4.ll =================================================================== --- llvm/test/CodeGen/X86/fsafdo_test4.ll +++ llvm/test/CodeGen/X86/fsafdo_test4.ll @@ -1,10 +1,11 @@ -; RUN: llc -enable-fs-discriminator < %s | FileCheck %s +; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=false < %s | FileCheck %s +; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=true < %s | FileCheck %s ; ; Check that fs-afdo discriminators are NOT generated, as debugInfoForProfiling is false (not set). ; CHECK: .loc 1 7 3 is_stmt 0 discriminator 2 # foo.c:7:3 ; CHECK: .loc 1 9 5 is_stmt 1 discriminator 2 # foo.c:9:5 -; CHECK-NOT: .loc 1 9 5 is_stmt 0 discriminator 11266 # foo.c:9:5 -; CHECK-NOT: .loc 1 7 3 is_stmt 1 discriminator 11266 # foo.c:7:3 +; CHECK-NOT: .loc 1 9 5 is_stmt 0 discriminator +; CHECK-NOT: .loc 1 7 3 is_stmt 1 discriminator ; Check that variable __llvm_fs_discriminator__ is NOT generated. ; CHECK-NOT: __llvm_fs_discriminator__: