diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -405,6 +405,7 @@ bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT); bool fixupDbgValue(Instruction *I); bool placeDbgValues(Function &F); + bool placePseudoProbes(Function &F); bool canFormExtLd(const SmallVectorImpl &MovedExts, LoadInst *&LI, Instruction *&Inst, bool HasPromoted); bool tryToPromoteExts(TypePromotionTransaction &TPT, @@ -611,6 +612,7 @@ // Do this last to clean up use-before-def scenarios introduced by other // preparatory transforms. EverMadeChange |= placeDbgValues(F); + EverMadeChange |= placePseudoProbes(F); #ifndef NDEBUG if (VerifyBFIUpdates) @@ -7989,6 +7991,28 @@ return MadeChange; } +// Group scattered pseudo probes in a block to favor SelectionDAG. Scattered +// probes can be chained dependencies of other regular DAG nodes and block DAG +// combine optimizations. +bool CodeGenPrepare::placePseudoProbes(Function &F) { + bool MadeChange = false; + for (auto &Block : F) { + // Move the rest probes to the beginning of the block. + auto FirstInst = Block.getFirstInsertionPt(); + while (FirstInst != Block.end() && FirstInst->isDebugOrPseudoInst()) + ++FirstInst; + BasicBlock::iterator I(FirstInst); + I++; + while (I != Block.end()) { + if (auto *II = dyn_cast(I++)) { + II->moveBefore(&*FirstInst); + MadeChange = true; + } + } + } + return MadeChange; +} + /// Scale down both weights to fit into uint32_t. static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse; diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-selectionDAG.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-selectionDAG.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-selectionDAG.ll @@ -0,0 +1,39 @@ +; REQUIRES: x86_64-linux +; RUN: opt < %s -codegenprepare -mtriple=x86_64 -S -o %t +; RUN: FileCheck %s < %t --check-prefix=IR +; RUN: llc -mtriple=x86_64-- -stop-after=finalize-isel %t -o - | FileCheck %s --check-prefix=MIR + +define internal i32 @arc_compare() { +entry: + %0 = load i64, i64* undef, align 8 + br i1 undef, label %return, label %if.end + +if.end: ; preds = %entry +;; Check pseudo probes are next to each other at the beginning of this block. +; IR-label: if.end +; IR: call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1) +; IR: call void @llvm.pseudoprobe(i64 5116412291814990879, i64 3, i32 0, i64 -1) + call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1) + %1 = load i16, i16* undef, align 8 + call void @llvm.pseudoprobe(i64 5116412291814990879, i64 3, i32 0, i64 -1) + %2 = and i16 %1, 16 + %3 = icmp eq i16 %2, 0 +;; Check the load-and-cmp sequence is fold into a test instruction. +; MIR-label: bb.1.if.end +; MIR: %[[#REG:]]:gr64 = IMPLICIT_DEF +; MIR: TEST8mi killed %[[#REG]], 1, $noreg, 0, $noreg, 16 +; MIR: JCC_1 + br i1 %3, label %return, label %if.end6 + +if.end6: ; preds = %if.end + call void @llvm.pseudoprobe(i64 5116412291814990879, i64 5, i32 0, i64 -1) + br label %return + +return: ; preds = %if.end6, %if.end, %entry + ret i32 undef +} + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0 + +attributes #0 = { inaccessiblememonly nounwind willreturn }