Index: lib/Target/X86/X86CondBrFolding.cpp =================================================================== --- lib/Target/X86/X86CondBrFolding.cpp +++ lib/Target/X86/X86CondBrFolding.cpp @@ -132,6 +132,32 @@ }; } // namespace +// Check if there are PHIs that prevent the merge. +// RootMBB --... --> MBB --> TargetMBB +// \--> FallthoughMBB +// For the PHIs in TargetMBB and FallthroughMBB, if one of its values from +// MBB and there exists another value from RootMBB, we cannot merge MBB with +// RootMBB. Here we are doing in a conservative way: if there are PHIs and +// there is a value from RootMBB, we return true. A more general approach +// would be checking the PHI values and if they are the same, we still can +// merge MBB with RootMBB. +bool unmergeablePHI(const MachineBasicBlock *MBB, + const MachineBasicBlock *RootMBB) { + for (auto MBBI : MBB->successors()) { + MachineInstr *MI = &*MBBI->begin(); + if (!MI->isPHI()) + continue; + // If RootMBB is MBBI's pred, we don't merge. + auto SI = + llvm::find_if(MBBI->successors(), [&RootMBB](const MachineBasicBlock *SI){ + return SI == RootMBB; + }); + if (SI != MBBI->pred_end()) + return true; + } + return false; +} + // Find a valid path that we can reuse the CondCode. // The resulted path (if return true) is stored in BranchPath. // Return value: @@ -237,10 +263,19 @@ MBBInfo->FBB = NewDest; BrMI = &*UncondBrI; } + + auto SI = + llvm::find_if(MBB->successors(), [&NewDest](const MachineBasicBlock *SI) { + return SI == NewDest; + }); + BranchProbability Prob = MBPI->getEdgeProbability(MBB, OrigDest); + if (SI == MBB->succ_end()) { + MBB->addSuccessor(NewDest); + } else + Prob += MBPI->getEdgeProbability(MBB, NewDest); fixPHIsInSucc(NewDest, OrigDest, MBB); + setBranchProb(MBB, NewDest, Prob); BrMI->eraseFromParent(); - MBB->addSuccessor(NewDest); - setBranchProb(MBB, NewDest, MBPI->getEdgeProbability(MBB, OrigDest)); MBB->removeSuccessor(OrigDest); } @@ -329,8 +364,14 @@ BuildMI(*RootMBB, UncondBrI, RootMBB->findDebugLoc(UncondBrI), TII->get(X86::JMP_1)) .addMBB(TargetMBB); - RootMBB->addSuccessor(TargetMBB); + + auto SI = llvm::find_if( + RootMBB->successors(), + [&TargetMBB](const MachineBasicBlock *SI) { return SI == TargetMBB; }); + if (SI == RootMBB->succ_end()) + RootMBB->addSuccessor(TargetMBB); fixPHIsInSucc(TargetMBB, &MBB, RootMBB); + RootMBB->erase(UncondBrI); } else { replaceBrDest(RootMBB, RootMBBInfo->TBB, TargetMBB); @@ -406,6 +447,8 @@ SmallVector BranchPath; if (!findPath(&MBB, BranchPath)) continue; + if (unmergeablePHI(&MBB, BranchPath.back())) + continue; #ifndef NDEBUG LLVM_DEBUG(dbgs() << "Found one path (len=" << BranchPath.size() << "):\n"); @@ -578,5 +621,10 @@ &getAnalysis(); X86CondBrFolding CondBr(TII, MBPI, MF); - return CondBr.optimize(); + bool Ret = CondBr.optimize(); +#ifdef EXPENSIVE_CHECKS + if (Ret) + MF.verify(); +#endif + return Ret; } Index: test/CodeGen/X86/pr39658.ll =================================================================== --- test/CodeGen/X86/pr39658.ll +++ test/CodeGen/X86/pr39658.ll @@ -0,0 +1,99 @@ +; RUN: llc -x86-condbr-folding=true -mtriple=x86_64-linux-gnu -mcpu=sandybridge %s -o - -verify-machineinstrs | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@s = common dso_local local_unnamed_addr global i32 0, align 4 + +define i32 @case1(i32 %n, i32 %v1, i32 %v2, i32 %v3, i32 %v4, i32 %v5) { +entry: + %add = add nsw i32 %v1, 100 + %cmp = icmp sgt i32 %n, 43 + br i1 %cmp, label %if.end11, label %if.else +; CHECK_LABEL: case1 +; CHECK: jg +; CHECK-NOT: jge + +if.else: + %cmp1 = icmp eq i32 %n, 43 + br i1 %cmp1, label %if.end11, label %if.else3 + +if.else3: + %cmp4 = icmp sgt i32 %n, 41 + br i1 %cmp4, label %if.end11, label %if.else6 + +if.else6: + %cmp7 = icmp eq i32 %n, 41 + br i1 %cmp7, label %if.then8, label %if.end11 + +if.then8: + %call = tail call i32 (...) @bar1() + br label %if.end11 + +if.end11: + %v.0 = phi i32 [ %add, %if.then8 ], [ %add, %if.else6 ], [ %v2, %entry ], [ %v3, %if.else ], [ %v4, %if.else3 ] + %call12 = tail call i32 @bar3(i32 %v.0) + ret i32 %call12 +} + +define i32 @case2(i32 %n, i32 %v1, i32 %v3, i32 %v4, i32 %v5) { +entry: + %add = add nsw i32 %v1, 100 + %cmp = icmp sgt i32 %n, 43 + br i1 %cmp, label %if.end11, label %if.else +; CHECK_LABEL: case1 +; CHECK: jg +; CHECK-NOT: jge + +if.else: + %cmp1 = icmp eq i32 %n, 43 + br i1 %cmp1, label %if.end11, label %if.else3 + +if.else3: + %cmp4 = icmp sgt i32 %n, 41 + br i1 %cmp4, label %if.end11, label %if.else6 + +if.else6: + %cmp7 = icmp eq i32 %n, 41 + br i1 %cmp7, label %if.then8, label %if.end11 + +if.then8: + %call = tail call i32 (...) @bar1() + br label %if.end11 + +if.end11: + %v.0 = phi i32 [ %add, %if.then8 ], [ %add, %if.else6 ], [ %add, %entry ], [ %v3, %if.else ], [ %v4, %if.else3 ] + %call12 = tail call i32 @bar3(i32 %v.0) + ret i32 %call12 +} + +define i32 @case3(i32 %n, i32 %v1, i32 %v2, i32 %v3) { +entry: + %cmp = icmp sgt i32 %n, 41 + br i1 %cmp, label %if.then, label %if.else +; CHECK_LABEL: case2 +; CHECK: jl +; CHECK: jle + +if.then: + %call = tail call i32 (...) @bar1() + br label %if.end4 + +if.else: + %cmp1 = icmp eq i32 %n, 41 + br i1 %cmp1, label %if.then2, label %if.end4 + +if.then2: + %call3 = tail call i32 (...) @bar2() + br label %if.end4 + +if.end4: + %v.0 = phi i32 [ %v2, %if.then ], [ %v3, %if.then2 ], [ %v1, %if.else ] + %call5 = tail call i32 @bar3(i32 %v.0) + %0 = load i32, i32* @s, align 4 + ret i32 %0 +} + +declare i32 @bar1(...) +declare i32 @bar2(...) +declare i32 @bar3(i32)