diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp --- a/llvm/lib/CodeGen/MachineCSE.cpp +++ b/llvm/lib/CodeGen/MachineCSE.cpp @@ -130,6 +130,12 @@ DenseMap &OpenChildren); bool PerformCSE(MachineDomTreeNode *Node); + bool isPotentiallyReachableWithoutPassing(const BasicBlock *BBA, + const BasicBlock *BBB, + const BasicBlock *ExclusionBB, + const DominatorTree *DT, + const LoopInfo *LI); + bool isPRECandidate(MachineInstr *MI); bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB); bool PerformSimplePRE(MachineDominatorTree *DT); @@ -779,6 +785,22 @@ return true; } +/// Determine whether block 'To' is reachable from 'From', without passing +/// through ExclusionBB block, returning true if uncertain. +bool MachineCSE::isPotentiallyReachableWithoutPassing( + const BasicBlock *From, const BasicBlock *To, const BasicBlock *ExclusionBB, + const DominatorTree *DT = nullptr, const LoopInfo *LI = nullptr) { + + SmallVector Worklist; + SmallPtrSet ExclusionSet; + Worklist.push_back(const_cast(From)); + if (ExclusionBB != nullptr) + ExclusionSet.insert(const_cast(ExclusionBB)); + + return llvm::isPotentiallyReachableFromMany( + Worklist, const_cast(To), &ExclusionSet, DT, LI); +} + bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT, MachineBasicBlock *MBB) { bool Changed = false; @@ -799,16 +821,17 @@ !DT->properlyDominates(MBB, MBB1) && "MBB cannot properly dominate MBB1 while DFS through dominators tree!"); auto CMBB = DT->findNearestCommonDominator(MBB, MBB1); - if (!CMBB->isLegalToHoistInto()) + if (CMBB == nullptr || !CMBB->isLegalToHoistInto()) continue; - // Two instrs are partial redundant if their basic blocks are reachable - // from one to another but one doesn't dominate another. + // Two equal instrs are partially redundant if their basic blocks are + // reachable from one to another but one doesn't dominate another. if (CMBB != MBB1) { - auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock(); + auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock(), + CBB = CMBB->getBasicBlock(); if (BB != nullptr && BB1 != nullptr && - (isPotentiallyReachable(BB1, BB) || - isPotentiallyReachable(BB, BB1))) { + (MachineCSE::isPotentiallyReachableWithoutPassing(BB1, BB, CBB) || + MachineCSE::isPotentiallyReachableWithoutPassing(BB, BB1, CBB))) { assert(MI->getOperand(0).isDef() && "First operand of instr with one explicit def must be this def"); diff --git a/llvm/test/CodeGen/MIR/AArch64/machine-cse-pre.mir b/llvm/test/CodeGen/MIR/AArch64/machine-cse-pre.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/MIR/AArch64/machine-cse-pre.mir @@ -0,0 +1,105 @@ +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass=machine-cse -o - %s | FileCheck %s +# This test ensures that MachineCSE pass doesn't hoist equal instrs SDIVWr. +# Such hoisting makes no sense, since these instrs are actually not partially +# redundant, their basic blocks are reachable from one to another only through +# their common dominator block. + +# CHECK: SDIVWr +# CHECK: SDIVWr + +--- | + + define dso_local void @foo(i32 %segment_index, i32 %num_segments, i16* nocapture %scores, i8* nocapture readonly %arr_segments_ptr) local_unnamed_addr { + entry: + br label %while.body + + while.body: ; preds = %if.end8, %entry + %0 = bitcast i8* %arr_segments_ptr to i16* + %1 = load i16, i16* %0, align 2 + %conv = sext i16 %1 to i32 + %cmp = icmp sgt i16 %1, 0 + br i1 %cmp, label %if.then, label %if.else + + if.then: ; preds = %while.body + %div = sdiv i32 65535, %conv + %mul = shl nsw i32 %div, 3 + br label %if.end8 + + if.else: ; preds = %while.body + %2 = trunc i32 %conv to i16 + %cmp2 = icmp slt i16 %2, 0 + br i1 %cmp2, label %if.then4, label %if.end8 + + if.then4: ; preds = %if.else + %div6 = sdiv i32 65535, %conv + %mul7 = shl nsw i32 %div6, 3 + br label %if.end8 + + if.end8: ; preds = %if.then4, %if.else, %if.then + %ye.0 = phi i32 [ 8, %if.then ], [ %mul7, %if.then4 ], [ 8, %if.else ] + %ys.0 = phi i32 [ %mul, %if.then ], [ 8, %if.then4 ], [ 8, %if.else ] + %add = add nsw i32 %ys.0, %ye.0 + %conv9 = trunc i32 %add to i16 + store i16 %conv9, i16* %scores, align 2 + br label %while.body + } + +... +--- +name: foo +tracksRegLiveness: true +body: | + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $x2, $x3 + + %8:gpr64common = COPY $x3 + %7:gpr64common = COPY $x2 + %19:gpr32 = MOVi32imm 65535 + + bb.1.while.body: + successors: %bb.2(0x50000000), %bb.3(0x30000000) + + %9:gpr32common = LDRSHWui %8, 0 :: (load 2 from %ir.0) + %0:gpr32 = COPY %9 + %10:gpr32 = SUBSWri %9, 1, 0, implicit-def $nzcv + Bcc 11, %bb.3, implicit $nzcv + B %bb.2 + + bb.2.if.then: + successors: %bb.5(0x80000000) + + %20:gpr32 = SDIVWr %19, %0 + %21:gpr32 = nsw UBFMWri killed %20, 29, 28 + %22:gpr32 = MOVi32imm 8 + %18:gpr32all = COPY %22 + %1:gpr32all = COPY %21 + B %bb.5 + + bb.3.if.else: + successors: %bb.4(0x30000000), %bb.5(0x50000000) + + %12:gpr32 = MOVi32imm 8 + %11:gpr32all = COPY %12 + TBZW %0, 31, %bb.5 + B %bb.4 + + bb.4.if.then4: + successors: %bb.5(0x80000000) + + %15:gpr32 = SDIVWr %19, %0 + %16:gpr32 = nsw UBFMWri killed %15, 29, 28 + %17:gpr32 = MOVi32imm 8 + %13:gpr32all = COPY %17 + %2:gpr32all = COPY %16 + + bb.5.if.end8: + successors: %bb.1(0x80000000) + + %3:gpr32 = PHI %11, %bb.3, %2, %bb.4, %18, %bb.2 + %4:gpr32 = PHI %11, %bb.3, %13, %bb.4, %1, %bb.2 + %23:gpr32 = nsw ADDWrr %4, %3 + STRHHui killed %23, %7, 0 :: (store 2 into %ir.scores) + B %bb.1 + +...