Index: lib/Target/X86/X86InstrInfo.h =================================================================== --- lib/Target/X86/X86InstrInfo.h +++ lib/Target/X86/X86InstrInfo.h @@ -69,6 +69,15 @@ COND_NE_OR_P, COND_NP_OR_E, + // Artificial condition codes. These are used to represent the "negation" of + // above two conditions. Here "negation" is not in terms of logic. The only + // scenario we need these two conditions is when we try to reverse above two + // conditions in order to remove redundant unconditional jumps. Note that both + // true and false bodies need to be avaiable in order to correctly synthesize + // instructions for them. These are never used in MachineInstrs. + COND_NEG_NE_OR_P, + COND_NEG_NP_OR_E, + COND_INVALID }; Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -3335,6 +3335,10 @@ case X86::COND_NP: return X86::COND_P; case X86::COND_O: return X86::COND_NO; case X86::COND_NO: return X86::COND_O; + case X86::COND_NE_OR_P: return X86::COND_NEG_NE_OR_P; + case X86::COND_NP_OR_E: return X86::COND_NEG_NP_OR_E; + case X86::COND_NEG_NE_OR_P: return X86::COND_NE_OR_P; + case X86::COND_NEG_NP_OR_E: return X86::COND_NP_OR_E; } } @@ -3503,44 +3507,6 @@ // Working from the bottom, handle the first conditional branch. if (Cond.empty()) { - MachineBasicBlock *TargetBB = I->getOperand(0).getMBB(); - if (AllowModify && UnCondBrIter != MBB.end() && - MBB.isLayoutSuccessor(TargetBB)) { - // If we can modify the code and it ends in something like: - // - // jCC L1 - // jmp L2 - // L1: - // ... - // L2: - // - // Then we can change this to: - // - // jnCC L2 - // L1: - // ... - // L2: - // - // Which is a bit more efficient. - // We conditionally jump to the fall-through block. - BranchCode = GetOppositeBranchCondition(BranchCode); - unsigned JNCC = GetCondBranchFromCond(BranchCode); - MachineBasicBlock::iterator OldInst = I; - - BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(JNCC)) - .addMBB(UnCondBrIter->getOperand(0).getMBB()); - BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_1)) - .addMBB(TargetBB); - - OldInst->eraseFromParent(); - UnCondBrIter->eraseFromParent(); - - // Restart the analysis. - UnCondBrIter = MBB.end(); - I = MBB.end(); - continue; - } - FBB = TBB; TBB = I->getOperand(0).getMBB(); Cond.push_back(MachineOperand::CreateImm(BranchCode)); @@ -3554,11 +3520,6 @@ assert(Cond.size() == 1); assert(TBB); - // Only handle the case where all conditional branches branch to the same - // destination. - if (TBB != I->getOperand(0).getMBB()) - return true; - // If the conditions are the same, we can leave them alone. X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm(); if (OldBranchCode == BranchCode) @@ -3577,9 +3538,26 @@ (OldBranchCode == X86::COND_NE && BranchCode == X86::COND_P)) BranchCode = X86::COND_NE_OR_P; + else if ((OldBranchCode == X86::COND_NE && + BranchCode == X86::COND_NP) || + (OldBranchCode == X86::COND_P && + BranchCode == X86::COND_E)) + BranchCode = X86::COND_NEG_NP_OR_E; + else if ((OldBranchCode == X86::COND_NP && + BranchCode == X86::COND_NE) || + (OldBranchCode == X86::COND_E && + BranchCode == X86::COND_P)) + BranchCode = X86::COND_NEG_NE_OR_P; else return true; + // Only handle the case where all conditional branches branch to the same + // destination except X86::COND_NEG_NP_OR_E and X86::COND_NEG_NE_OR_P. + if (TBB != I->getOperand(0).getMBB() && + BranchCode != X86::COND_NEG_NP_OR_E && + BranchCode != X86::COND_NEG_NE_OR_P) + return true; + // Update the MachineOperand. Cond[0].setImm(BranchCode); CondBranches.push_back(I); @@ -3702,6 +3680,9 @@ return 1; } + // If FBB is null, it is implied to be a fall-through block. + bool FallThru = FBB == nullptr; + // Conditional branch. unsigned Count = 0; X86::CondCode CC = (X86::CondCode)Cond[0].getImm(); @@ -3720,13 +3701,43 @@ BuildMI(&MBB, DL, get(X86::JP_1)).addMBB(TBB); ++Count; break; + case X86::COND_NEG_NP_OR_E: + // If FBB is null, it is implied to be the next block of MBB. + if (FBB == nullptr) { + MachineFunction::iterator I = &MBB; + FBB = ++I; + assert(I != TBB->getParent()->end() && "MBB cannot be the last block in " + "function when the false body is " + "a fall-through."); + } + // Synthesize NEG_NP_OR_E with two branches. + BuildMI(&MBB, DL, get(X86::JNP_1)).addMBB(FBB); + ++Count; + BuildMI(&MBB, DL, get(X86::JNE_1)).addMBB(TBB); + ++Count; + break; + case X86::COND_NEG_NE_OR_P: + // If FBB is null, it is implied to be the next block of MBB. + if (FBB == nullptr) { + MachineFunction::iterator I = &MBB; + FBB = ++I; + assert(I != TBB->getParent()->end() && "MBB cannot be the last block in " + "function when the false body is " + "a fall-through."); + } + // Synthesize NEG_NE_OR_P with two branches. + BuildMI(&MBB, DL, get(X86::JNE_1)).addMBB(FBB); + ++Count; + BuildMI(&MBB, DL, get(X86::JNP_1)).addMBB(TBB); + ++Count; + break; default: { unsigned Opc = GetCondBranchFromCond(CC); BuildMI(&MBB, DL, get(Opc)).addMBB(TBB); ++Count; } } - if (FBB) { + if (!FallThru) { // Two-way Conditional branch. Insert the second branch. BuildMI(&MBB, DL, get(X86::JMP_1)).addMBB(FBB); ++Count; @@ -6091,8 +6102,6 @@ ReverseBranchCondition(SmallVectorImpl &Cond) const { assert(Cond.size() == 1 && "Invalid X86 branch condition!"); X86::CondCode CC = static_cast(Cond[0].getImm()); - if (CC == X86::COND_NE_OR_P || CC == X86::COND_NP_OR_E) - return true; Cond[0].setImm(GetOppositeBranchCondition(CC)); return false; } Index: test/CodeGen/X86/block-placement.ll =================================================================== --- test/CodeGen/X86/block-placement.ll +++ test/CodeGen/X86/block-placement.ll @@ -463,26 +463,22 @@ } define void @fpcmp_unanalyzable_branch(i1 %cond) { -; This function's CFG contains an unanalyzable branch that is likely to be -; split due to having a different high-probability predecessor. +; This function's CFG contains an once-unanalyzable branch (une on floating +; points). As now it becomes analyzable, we should get best layout in which each +; edge in 'entry' -> 'entry.if.then_crit_edge' -> 'if.then' -> 'if.end' is +; fall-through. ; CHECK: fpcmp_unanalyzable_branch ; CHECK: %entry +; CHECK: %if.then +; CHECK: %if.end ; CHECK: %exit -; CHECK-NOT: %if.then -; CHECK-NOT: %if.end -; CHECK-NOT: jne -; CHECK-NOT: jnp ; CHECK: jne ; CHECK-NEXT: jnp -; CHECK-NEXT: %if.then entry: ; Note that this branch must be strongly biased toward ; 'entry.if.then_crit_edge' to ensure that we would try to form a chain for -; 'entry' -> 'entry.if.then_crit_edge' -> 'if.then'. It is the last edge in that -; chain which would violate the unanalyzable branch in 'exit', but we won't even -; try this trick unless 'if.then' is believed to almost always be reached from -; 'entry.if.then_crit_edge'. +; 'entry' -> 'entry.if.then_crit_edge' -> 'if.then' -> 'if.end'. br i1 %cond, label %entry.if.then_crit_edge, label %lor.lhs.false, !prof !1 entry.if.then_crit_edge: @@ -665,11 +661,14 @@ ; Ensure that we can handle unanalyzable branches where the destination block ; gets selected as the optimal successor to merge. ; +; This branch is now analyzable and hence the destination block becomes the +; hotter one. +; ; CHECK: unanalyzable_branch_to_best_succ ; CHECK: %entry -; CHECK: %foo ; CHECK: %bar ; CHECK: %exit +; CHECK: %foo entry: ; Bias this branch toward bar to ensure we form that chain. Index: test/CodeGen/X86/fast-isel-cmp-branch2.ll =================================================================== --- test/CodeGen/X86/fast-isel-cmp-branch2.ll +++ test/CodeGen/X86/fast-isel-cmp-branch2.ll @@ -5,7 +5,7 @@ ; CHECK-LABEL: fcmp_oeq ; CHECK: ucomiss %xmm1, %xmm0 ; CHECK-NEXT: jne {{LBB.+_1}} -; CHECK-NEXT: jnp {{LBB.+_2}} +; CHECK-NEXT: jp {{LBB.+_1}} %1 = fcmp oeq float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -162,8 +162,7 @@ ; CHECK-LABEL: fcmp_une ; CHECK: ucomiss %xmm1, %xmm0 ; CHECK-NEXT: jne {{LBB.+_2}} -; CHECK-NEXT: jp {{LBB.+_2}} -; CHECK-NEXT: jmp {{LBB.+_1}} +; CHECK-NEXT: jnp {{LBB.+_1}} %1 = fcmp une float %x, %y br i1 %1, label %bb1, label %bb2 bb2: Index: test/CodeGen/X86/fast-isel-cmp-branch3.ll =================================================================== --- test/CodeGen/X86/fast-isel-cmp-branch3.ll +++ test/CodeGen/X86/fast-isel-cmp-branch3.ll @@ -17,7 +17,7 @@ ; CHECK: xorps %xmm1, %xmm1 ; CHECK-NEXT: ucomiss %xmm1, %xmm0 ; CHECK-NEXT: jne {{LBB.+_1}} -; CHECK-NEXT: jnp {{LBB.+_2}} +; CHECK-NEXT: jp {{LBB.+_1}} %1 = fcmp oeq float %x, 0.000000e+00 br i1 %1, label %bb1, label %bb2 bb2: @@ -338,8 +338,7 @@ ; CHECK: xorps %xmm1, %xmm1 ; CHECK-NEXT: ucomiss %xmm1, %xmm0 ; CHECK-NEXT: jne {{LBB.+_2}} -; CHECK-NEXT: jp {{LBB.+_2}} -; CHECK-NEXT: jmp {{LBB.+_1}} +; CHECK-NEXT: jnp {{LBB.+_1}} %1 = fcmp une float %x, 0.000000e+00 br i1 %1, label %bb1, label %bb2 bb2: