diff --git a/llvm/include/llvm/IR/PseudoProbe.h b/llvm/include/llvm/IR/PseudoProbe.h --- a/llvm/include/llvm/IR/PseudoProbe.h +++ b/llvm/include/llvm/IR/PseudoProbe.h @@ -92,6 +92,8 @@ void setProbeDistributionFactor(Instruction &Inst, float Factor); bool moveAndDanglePseudoProbes(BasicBlock *From, Instruction *To); + +bool removeRedundantPseudoProbes(BasicBlock *Block); } // end namespace llvm #endif // LLVM_IR_PSEUDOPROBE_H diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h --- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -109,8 +109,9 @@ DomTreeUpdater *DTU = nullptr, LoopInfo *LI = nullptr); /// Try to remove redundant dbg.value instructions from given basic block. -/// Returns true if at least one instruction was removed. -bool RemoveRedundantDbgInstrs(BasicBlock *BB); +/// Returns true if at least one instruction was removed. Remove redundant +/// pseudo ops when RemovePseudoOp is true. +bool RemoveRedundantDbgInstrs(BasicBlock *BB, bool RemovePseudoOp = false); /// Replace all uses of an instruction (specified by BI) with a value, then /// remove and delete the original instruction. diff --git a/llvm/lib/CodeGen/PseudoProbeInserter.cpp b/llvm/lib/CodeGen/PseudoProbeInserter.cpp --- a/llvm/lib/CodeGen/PseudoProbeInserter.cpp +++ b/llvm/lib/CodeGen/PseudoProbeInserter.cpp @@ -22,7 +22,7 @@ #include "llvm/InitializePasses.h" #include "llvm/MC/MCPseudoProbe.h" #include "llvm/Target/TargetMachine.h" -#include +#include #define DEBUG_TYPE "pseudo-probe-inserter" @@ -112,6 +112,41 @@ } } + // Remove redundant dangling probes. Same dangling probes are redundant + // since they all have the same semantic that is to rely on the counts + // inference too to get reasonable count for the same original block. + // Therefore, there's no need to keep multiple copies of them. + auto Hash = [](const MachineInstr *MI) { + return std::hash()(MI->getOperand(0).getImm()) ^ + std::hash()(MI->getOperand(1).getImm()); + }; + + auto IsEqual = [](const MachineInstr *Left, const MachineInstr *Right) { + return Left->getOperand(0).getImm() == Right->getOperand(0).getImm() && + Left->getOperand(1).getImm() == Right->getOperand(1).getImm() && + Left->getOperand(3).getImm() == Right->getOperand(3).getImm() && + Left->getDebugLoc() == Right->getDebugLoc(); + }; + + SmallVector ToBeRemoved; + std::unordered_set + DanglingProbes(0, Hash, IsEqual); + + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (MI.isPseudoProbe()) { + if ((uint32_t)MI.getPseudoProbeAttribute() & + (uint32_t)PseudoProbeAttributes::Dangling) + if (!DanglingProbes.insert(&MI).second) + ToBeRemoved.push_back(&MI); + } + } + } + + for (auto *MI : ToBeRemoved) + MI->eraseFromParent(); + + Changed |= !ToBeRemoved.empty(); return Changed; } diff --git a/llvm/lib/IR/PseudoProbe.cpp b/llvm/lib/IR/PseudoProbe.cpp --- a/llvm/lib/IR/PseudoProbe.cpp +++ b/llvm/lib/IR/PseudoProbe.cpp @@ -15,6 +15,7 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" +#include using namespace llvm; @@ -130,4 +131,40 @@ return !ToBeMoved.empty(); } + +/// Same dangling probes in one blocks are redundant since they all have the +/// same semantic that is to rely on the counts inference too to get reasonable +/// count for the same original block. Therefore, there's no need to keep +/// multiple copies of them. +bool removeRedundantPseudoProbes(BasicBlock *Block) { + + auto Hash = [](const PseudoProbeInst *I) { + return std::hash()(I->getFuncGuid()->getZExtValue()) ^ + std::hash()(I->getIndex()->getZExtValue()); + }; + + auto IsEqual = [](const PseudoProbeInst *Left, const PseudoProbeInst *Right) { + return Left->getFuncGuid() == Right->getFuncGuid() && + Left->getIndex() == Right->getIndex() && + Left->getAttributes() == Right->getAttributes() && + Left->getDebugLoc() == Right->getDebugLoc(); + }; + + SmallVector ToBeRemoved; + std::unordered_set + DanglingProbes(0, Hash, IsEqual); + + for (auto &I : *Block) { + if (auto *II = dyn_cast(&I)) { + if (II->getAttributes()->getZExtValue() & + (uint32_t)PseudoProbeAttributes::Dangling) + if (!DanglingProbes.insert(II).second) + ToBeRemoved.push_back(II); + } + } + + for (auto *I : ToBeRemoved) + I->eraseFromParent(); + return !ToBeRemoved.empty(); +} } // namespace llvm diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -433,8 +433,9 @@ // Jump threading may have introduced redundant debug values into BB // which should be removed. + // Remove redundant pseudo probes as well. if (Changed) - RemoveRedundantDbgInstrs(&BB); + RemoveRedundantDbgInstrs(&BB, true); // Stop processing BB if it's the entry or is now deleted. The following // routines attempt to eliminate BB and locating a suitable replacement @@ -467,7 +468,7 @@ // detect and transform nested loops later. !LoopHeaders.count(&BB) && !LoopHeaders.count(Succ) && TryToSimplifyUncondBranchFromEmptyBlock(&BB, DTU)) { - RemoveRedundantDbgInstrs(Succ); + RemoveRedundantDbgInstrs(Succ, true); // BB is valid for cleanup here because we passed in DTU. F remains // BB's parent until a DTU->getDomTree() event. LVI->eraseBlock(&BB); diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -33,6 +33,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/PseudoProbe.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" @@ -432,7 +433,7 @@ return !ToBeRemoved.empty(); } -bool llvm::RemoveRedundantDbgInstrs(BasicBlock *BB) { +bool llvm::RemoveRedundantDbgInstrs(BasicBlock *BB, bool RemovePseudoOp) { bool MadeChanges = false; // By using the "backward scan" strategy before the "forward scan" strategy we // can remove both dbg.value (2) and (3) in a situation like this: @@ -447,6 +448,8 @@ // already is described as having the value V1 at (1). MadeChanges |= removeRedundantDbgInstrsUsingBackwardScan(BB); MadeChanges |= removeRedundantDbgInstrsUsingForwardScan(BB); + if (RemovePseudoOp) + MadeChanges |= removeRedundantPseudoProbes(BB); if (MadeChanges) LLVM_DEBUG(dbgs() << "Removed redundant dbg instrs from: " diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll @@ -39,6 +39,34 @@ ; MIR-tail: PSEUDO_PROBE [[#GUID:]], 3, 0, 2 ; MIR-tail: PSEUDO_PROBE [[#GUID:]], 4, 0, 0 + +define void @foo2() { +bb: + %tmp = call i32 @f1() + %tmp1 = icmp eq i32 %tmp, 1 + br i1 %tmp1, label %bb5, label %bb8 + +bb2: + %tmp4 = icmp ne i32 %tmp, 1 + switch i1 %tmp4, label %bb2 [ + i1 0, label %bb5 + i1 1, label %bb8 + ] + +bb5: +;; Check the pseudo probe with id 3 only has one copy. +; JT-COUNT-1: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 3, i32 2, i64 -1) +; JT-NOT: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 3, i32 2, i64 -1) + %tmp6 = phi i1 [ %tmp1, %bb ], [ false, %bb2 ] + br i1 %tmp6, label %bb8, label %bb7 + +bb7: + br label %bb8 + +bb8: + ret void +} + define i32 @test(i32 %a, i32 %b, i32 %c) { ;; Check block bb1 and bb2 are gone, and their probes (probe 2 and 3) are dangling. ; SC-LABEL: @test( diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-dedup.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-dedup.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-dedup.ll @@ -0,0 +1,41 @@ +; REQUIRES: x86_64-linux +; RUN: llc -pseudo-probe-for-profiling %s -filetype=asm -o - | FileCheck %s + +declare i32 @f1() +declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0 + +define void @foo2() { +bb: +; CHECK: .pseudoprobe 2494702099028631698 1 0 0 + call void @llvm.pseudoprobe(i64 2494702099028631698, i64 1, i32 0, i64 -1) + %tmp = call i32 @f1() + %tmp1 = icmp eq i32 %tmp, 1 + br i1 %tmp1, label %bb5, label %bb8 + +bb2: +;; Check the pseudo probe with id 2 only has one copy. +; CHECK-COUNT-1: .pseudoprobe 2494702099028631698 2 0 2 +; CHECK-NOT: .pseudoprobe 2494702099028631698 2 0 2 + call void @llvm.pseudoprobe(i64 2494702099028631698, i64 2, i32 2, i64 -1) + %tmp4 = icmp ne i32 %tmp, 1 + call void @llvm.pseudoprobe(i64 2494702099028631698, i64 2, i32 2, i64 -1) + switch i1 %tmp4, label %bb2 [ + i1 0, label %bb5 + i1 1, label %bb8 + ] + +bb5: + %tmp6 = phi i1 [ %tmp1, %bb ], [ false, %bb2 ] + call void @llvm.pseudoprobe(i64 2494702099028631698, i64 2, i32 2, i64 -1) + br i1 %tmp6, label %bb8, label %bb7 + +bb7: + br label %bb8 + +bb8: + ret void +} + +!llvm.pseudo_probe_desc = !{!0} + +!0 = !{i64 2494702099028631698, i64 281612674956943, !"foo2", null} \ No newline at end of file