Index: lib/Target/X86/X86InstrInfo.h =================================================================== --- lib/Target/X86/X86InstrInfo.h +++ lib/Target/X86/X86InstrInfo.h @@ -48,13 +48,20 @@ COND_S = 15, LAST_VALID_COND = COND_S, - // Artificial condition codes. These are used by AnalyzeBranch + // Artificial condition codes. This is used by AnalyzeBranch // to indicate a block terminated with two conditional branches to // the same location. This occurs in code using FCMP_OEQ or FCMP_UNE, // which can't be represented on x86 with a single condition. These // are never used in MachineInstrs. COND_NE_OR_P, - COND_NP_OR_E, + + // Artificial condition codes. This is used to represent the negation of + // above condition. The only scenario we need this condition is when we try to + // reverse the above condition in order to remove redundant unconditional + // jumps. Note that both true and false bodies need to be avaiable in order to + // correctly synthesize instructions for them. These are never used in + // MachineInstrs. + COND_E_AND_NP, // negate of COND_NE_OR_P COND_INVALID }; Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -3805,6 +3805,8 @@ case X86::COND_NP: return X86::COND_P; case X86::COND_O: return X86::COND_NO; case X86::COND_NO: return X86::COND_O; + case X86::COND_NE_OR_P: return X86::COND_E_AND_NP; + case X86::COND_E_AND_NP: return X86::COND_NE_OR_P; } } @@ -4024,30 +4026,43 @@ assert(Cond.size() == 1); assert(TBB); - // Only handle the case where all conditional branches branch to the same - // destination. - if (TBB != I->getOperand(0).getMBB()) - return true; - // If the conditions are the same, we can leave them alone. X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm(); - if (OldBranchCode == BranchCode) + auto NewTBB = I->getOperand(0).getMBB(); + if (OldBranchCode == BranchCode && TBB == NewTBB) continue; // If they differ, see if they fit one of the known patterns. Theoretically, // we could handle more patterns here, but we shouldn't expect to see them // if instruction selection has done a reasonable job. - if ((OldBranchCode == X86::COND_NP && - BranchCode == X86::COND_E) || - (OldBranchCode == X86::COND_E && - BranchCode == X86::COND_NP)) - BranchCode = X86::COND_NP_OR_E; - else if ((OldBranchCode == X86::COND_P && - BranchCode == X86::COND_NE) || - (OldBranchCode == X86::COND_NE && - BranchCode == X86::COND_P)) + if (TBB == NewTBB && + ((OldBranchCode == X86::COND_P && BranchCode == X86::COND_NE) || + (OldBranchCode == X86::COND_NE && BranchCode == X86::COND_P))) { BranchCode = X86::COND_NE_OR_P; - else + } else if (((OldBranchCode == X86::COND_NP && BranchCode == X86::COND_NE) || + (OldBranchCode == X86::COND_E && BranchCode == X86::COND_P)) && + (FBB == nullptr || NewTBB == FBB)) { + // X86::COND_E_AND_NP usually has two different branch destinations. + // + // JP B1 + // JE B2 + // JMP B1 + // B1: + // B2: + // + // Here this condition branches to B2 only if NP && E. It has another + // equivalent form: + // + // JNE B1 + // JNP B2 + // JMP B1 + // B1: + // B2: + // + // Similarly it branches to B2 only if E && NP. That is why this condition + // is named with COND_E_AND_NP. + BranchCode = X86::COND_E_AND_NP; + } else return true; // Update the MachineOperand. @@ -4156,6 +4171,13 @@ return Count; } +static MachineBasicBlock *getFallThroughMBB(MachineBasicBlock *MBB) { + auto I = std::next(MBB->getIterator()); + if (I == MBB->getParent()->end()) + return nullptr; + return &*I; +} + unsigned X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, @@ -4172,17 +4194,13 @@ return 1; } + // If FBB is null, it is implied to be a fall-through block. + bool FallThru = FBB == nullptr; + // Conditional branch. unsigned Count = 0; X86::CondCode CC = (X86::CondCode)Cond[0].getImm(); switch (CC) { - case X86::COND_NP_OR_E: - // Synthesize NP_OR_E with two branches. - BuildMI(&MBB, DL, get(X86::JNP_1)).addMBB(TBB); - ++Count; - BuildMI(&MBB, DL, get(X86::JE_1)).addMBB(TBB); - ++Count; - break; case X86::COND_NE_OR_P: // Synthesize NE_OR_P with two branches. BuildMI(&MBB, DL, get(X86::JNE_1)).addMBB(TBB); @@ -4190,13 +4208,28 @@ BuildMI(&MBB, DL, get(X86::JP_1)).addMBB(TBB); ++Count; break; + case X86::COND_E_AND_NP: + // Use the next block of MBB as FBB if it is null. + if (FBB == nullptr) { + FBB = getFallThroughMBB(&MBB); + assert(FBB && "MBB cannot be the last block in function when the false " + "body is a fall-through."); + } + // Synthesize COND_E_AND_NP with two branches. + BuildMI(&MBB, DL, get(X86::JNE_1)).addMBB(FBB); + ++Count; + BuildMI(&MBB, DL, get(X86::JNP_1)).addMBB(TBB); + ++Count; + if (getFallThroughMBB(&MBB) != FBB) + BuildMI(&MBB, DL, get(X86::JMP_1)).addMBB(FBB); + break; default: { unsigned Opc = GetCondBranchFromCond(CC); BuildMI(&MBB, DL, get(Opc)).addMBB(TBB); ++Count; } } - if (FBB) { + if (!FallThru) { // Two-way Conditional branch. Insert the second branch. BuildMI(&MBB, DL, get(X86::JMP_1)).addMBB(FBB); ++Count; @@ -6717,8 +6750,6 @@ ReverseBranchCondition(SmallVectorImpl &Cond) const { assert(Cond.size() == 1 && "Invalid X86 branch condition!"); X86::CondCode CC = static_cast(Cond[0].getImm()); - if (CC == X86::COND_NE_OR_P || CC == X86::COND_NP_OR_E) - return true; Cond[0].setImm(GetOppositeBranchCondition(CC)); return false; } Index: test/CodeGen/X86/block-placement.ll =================================================================== --- test/CodeGen/X86/block-placement.ll +++ test/CodeGen/X86/block-placement.ll @@ -463,26 +463,23 @@ } define void @fpcmp_unanalyzable_branch(i1 %cond) { -; This function's CFG contains an unanalyzable branch that is likely to be -; split due to having a different high-probability predecessor. +; This function's CFG contains an once-unanalyzable branch (une on floating +; points). As now it becomes analyzable, we should get best layout in which each +; edge in 'entry' -> 'entry.if.then_crit_edge' -> 'if.then' -> 'if.end' is +; fall-through. ; CHECK: fpcmp_unanalyzable_branch ; CHECK: %entry +; CHECK: %entry.if.then_crit_edge +; CHECK: %if.then +; CHECK: %if.end ; CHECK: %exit -; CHECK-NOT: %if.then -; CHECK-NOT: %if.end -; CHECK-NOT: jne -; CHECK-NOT: jnp ; CHECK: jne ; CHECK-NEXT: jnp -; CHECK-NEXT: %if.then entry: ; Note that this branch must be strongly biased toward ; 'entry.if.then_crit_edge' to ensure that we would try to form a chain for -; 'entry' -> 'entry.if.then_crit_edge' -> 'if.then'. It is the last edge in that -; chain which would violate the unanalyzable branch in 'exit', but we won't even -; try this trick unless 'if.then' is believed to almost always be reached from -; 'entry.if.then_crit_edge'. +; 'entry' -> 'entry.if.then_crit_edge' -> 'if.then' -> 'if.end'. br i1 %cond, label %entry.if.then_crit_edge, label %lor.lhs.false, !prof !1 entry.if.then_crit_edge: @@ -494,7 +491,7 @@ exit: %cmp.i = fcmp une double 0.000000e+00, undef - br i1 %cmp.i, label %if.then, label %if.end + br i1 %cmp.i, label %if.then, label %if.end, !prof !3 if.then: %0 = phi i8 [ %.pre14, %entry.if.then_crit_edge ], [ undef, %exit ] @@ -507,6 +504,7 @@ } !1 = !{!"branch_weights", i32 1000, i32 1} +!3 = !{!"branch_weights", i32 1, i32 1000} declare i32 @f() declare i32 @g() @@ -665,11 +663,14 @@ ; Ensure that we can handle unanalyzable branches where the destination block ; gets selected as the optimal successor to merge. ; +; This branch is now analyzable and hence the destination block becomes the +; hotter one. The right order is entry->bar->exit->foo. +; ; CHECK: unanalyzable_branch_to_best_succ ; CHECK: %entry -; CHECK: %foo ; CHECK: %bar ; CHECK: %exit +; CHECK: %foo entry: ; Bias this branch toward bar to ensure we form that chain. Index: test/CodeGen/X86/fast-isel-cmp-branch2.ll =================================================================== --- test/CodeGen/X86/fast-isel-cmp-branch2.ll +++ test/CodeGen/X86/fast-isel-cmp-branch2.ll @@ -5,7 +5,7 @@ ; CHECK-LABEL: fcmp_oeq ; CHECK: ucomiss %xmm1, %xmm0 ; CHECK-NEXT: jne {{LBB.+_1}} -; CHECK-NEXT: jnp {{LBB.+_2}} +; CHECK-NEXT: jp {{LBB.+_1}} %1 = fcmp oeq float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -162,8 +162,7 @@ ; CHECK-LABEL: fcmp_une ; CHECK: ucomiss %xmm1, %xmm0 ; CHECK-NEXT: jne {{LBB.+_2}} -; CHECK-NEXT: jp {{LBB.+_2}} -; CHECK-NEXT: jmp {{LBB.+_1}} +; CHECK-NEXT: jnp {{LBB.+_1}} %1 = fcmp une float %x, %y br i1 %1, label %bb1, label %bb2 bb2: Index: test/CodeGen/X86/fast-isel-cmp-branch3.ll =================================================================== --- test/CodeGen/X86/fast-isel-cmp-branch3.ll +++ test/CodeGen/X86/fast-isel-cmp-branch3.ll @@ -17,7 +17,7 @@ ; CHECK: xorps %xmm1, %xmm1 ; CHECK-NEXT: ucomiss %xmm1, %xmm0 ; CHECK-NEXT: jne {{LBB.+_1}} -; CHECK-NEXT: jnp {{LBB.+_2}} +; CHECK-NEXT: jp {{LBB.+_1}} %1 = fcmp oeq float %x, 0.000000e+00 br i1 %1, label %bb1, label %bb2 bb2: @@ -338,8 +338,7 @@ ; CHECK: xorps %xmm1, %xmm1 ; CHECK-NEXT: ucomiss %xmm1, %xmm0 ; CHECK-NEXT: jne {{LBB.+_2}} -; CHECK-NEXT: jp {{LBB.+_2}} -; CHECK-NEXT: jmp {{LBB.+_1}} +; CHECK-NEXT: jnp {{LBB.+_1}} %1 = fcmp une float %x, 0.000000e+00 br i1 %1, label %bb1, label %bb2 bb2: Index: test/CodeGen/X86/fp-une-cmp.ll =================================================================== --- test/CodeGen/X86/fp-une-cmp.ll +++ test/CodeGen/X86/fp-une-cmp.ll @@ -19,12 +19,12 @@ ; addsd ... ; LBB0_2: -; CHECK: func +define float @func1(float %x, float %y) nounwind readnone optsize ssp { +; CHECK: func1 ; CHECK: jne [[LABEL:.*]] ; CHECK-NEXT: jp [[LABEL]] ; CHECK-NOT: jmp - -define float @func(float %x, float %y) nounwind readnone optsize ssp { +; entry: %0 = fpext float %x to double %1 = fpext float %y to double @@ -41,3 +41,33 @@ %.0 = fptrunc double %.0.in to float ret float %.0 } + +define float @func2(float %x, float %y) nounwind readnone optsize ssp { +; +; With branch weights indicated, bb2 will be placed ahead of bb1. +; +; CHECK: func2 +; CHECK: jne [[LABEL:.*]] +; CHECK-NEXT: jp [[LABEL]] +; CHECK: %bb2 +; CHECK: %bb1 +; CHECK: jmp +; +entry: + %0 = fpext float %x to double + %1 = fpext float %y to double + %2 = fmul double %0, %1 + %3 = fcmp une double %2, 0.000000e+00 + br i1 %3, label %bb1, label %bb2, !prof !1 + +bb1: + %4 = fadd double %2, -1.000000e+00 + br label %bb2 + +bb2: + %.0.in = phi double [ %4, %bb1 ], [ %2, %entry ] + %.0 = fptrunc double %.0.in to float + ret float %.0 +} + +!1 = !{!"branch_weights", i32 1, i32 1000} Index: test/CodeGen/X86/x86-analyze-branch-jne-jp.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/x86-analyze-branch-jne-jp.ll @@ -0,0 +1,21 @@ +; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s -check-prefix=CHECK + +; Test if the negation of the non-equality check between floating points are +; translated to jnp followed by jne. + +; CHECK: jne +; CHECK-NEXT: jnp +define void @foo(float %f) { +entry: + %cmp = fcmp une float %f, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @a() + br label %if.end + +if.end: + ret void +} + +declare void @a()