Index: lib/CodeGen/EarlyIfConversion.cpp =================================================================== --- lib/CodeGen/EarlyIfConversion.cpp +++ lib/CodeGen/EarlyIfConversion.cpp @@ -462,7 +462,7 @@ MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); assert(FirstTerm != Head->end() && "No terminators"); DebugLoc HeadDL = FirstTerm->getDebugLoc(); - + // Convert all PHIs to select instructions inserted before FirstTerm. for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { PHIInfo &PI = PHIs[i]; @@ -523,7 +523,7 @@ /// void SSAIfConv::convertIf(SmallVectorImpl &RemovedBlocks) { assert(Head && Tail && TBB && FBB && "Call canConvertIf first."); - + // Update statistics. if (isTriangle()) ++NumTrianglesConv; @@ -777,6 +777,37 @@ /// bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) { bool Changed = false; + const BasicBlock *BB = MBB->getBasicBlock(); + unsigned PhiInst = 0; + const PHINode *PN; + // iterate through Basic Block for PHI nodes. + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); + I != E; ++I) { + if (dyn_cast(I)) { + PN = dyn_cast(I); + PhiInst++; + } + } + unsigned IfBlock1Inst = 0; + unsigned IfBlock2Inst = 0; + if (PhiInst == 1 && PN) { + BasicBlock *IfBlock1 = PN->getIncomingBlock(0); + BasicBlock *IfBlock2 = PN->getIncomingBlock(1); + // iterate through if block instructions + for (BasicBlock::iterator I = IfBlock1->begin(); !I->isTerminator(); ++I) { + if(!dyn_cast(I)) + IfBlock1Inst++; + } + for (BasicBlock::iterator I = IfBlock2->begin(); !I->isTerminator(); ++I) { + if(!dyn_cast(I)) + IfBlock2Inst++; + } + } + // if optimization options is either -Oz or -Os and if have less than + // two instrutions in if or else basic block return false. + if (BB->getParent()->optForSize() && IfBlock1Inst < 2 && IfBlock2Inst < 2) + return false; + while (IfConv.canConvertIf(MBB) && shouldConvertIf()) { // If-convert MBB and update analyses. invalidateTraces(); Index: lib/CodeGen/TailDuplication.cpp =================================================================== --- lib/CodeGen/TailDuplication.cpp +++ lib/CodeGen/TailDuplication.cpp @@ -72,7 +72,7 @@ bool TailDuplicateBase::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; - + auto MBPI = &getAnalysis(); Duplicator.initMF(MF, PreRegAlloc, MBPI, /*LayoutMode=*/false); Index: lib/CodeGen/TailDuplicator.cpp =================================================================== --- lib/CodeGen/TailDuplicator.cpp +++ lib/CodeGen/TailDuplicator.cpp @@ -921,9 +921,25 @@ } appendCopies(PrevBB, CopyInfos, Copies); } else { - TII->removeBranch(*PrevBB); // No PHIs to worry about, just splice the instructions over. - PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); + const BasicBlock *BBM = PrevBB->getBasicBlock(); + unsigned SelGEPInst = 0; + // if BBM is not null then iterate through basicblock and + // get the count of select and getElementPtr instructions. + if (BBM) { + for (BasicBlock::const_iterator I = BBM->begin(), E = BBM->end(); + I != E; ++I) { + if (isa(I) || isa(I)) + SelGEPInst++; + } + } + // if do not have any select or getelementptr instructions in + // current basic block and optimization level is -Os or -Oz then + // skip the splice of the basic block. + if (!BBM || SelGEPInst != 0 || !BBM->getParent()->optForSize()) { + TII->removeBranch(*PrevBB); + PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); + } } PrevBB->removeSuccessor(PrevBB->succ_begin()); assert(PrevBB->succ_empty()); Index: lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- lib/Transforms/Utils/SimplifyCFG.cpp +++ lib/Transforms/Utils/SimplifyCFG.cpp @@ -2285,7 +2285,26 @@ // dependence information for this check, but simplifycfg can't keep it up // to date, and this catches most of the cases we care about anyway. BasicBlock *BB = PN->getParent(); + int IfBlock1Inst = 0; + int IfBlock2Inst = 0; + BasicBlock *IfBlock1 = PN->getIncomingBlock(0); + BasicBlock *IfBlock2 = PN->getIncomingBlock(1); + + for (BasicBlock::iterator I = IfBlock1->begin(); !I->isTerminator(); ++I) { + if(!dyn_cast(I)) + IfBlock1Inst++; + } + for (BasicBlock::iterator I = IfBlock2->begin(); !I->isTerminator(); ++I) { + if(!dyn_cast(I)) + IfBlock2Inst++; + } + const Function *Fn = BB->getParent(); + // if optimization options is either -Oz or -Os and if have + // less than two instrutions in if or else basic block return false. + if (Fn->optForSize() && IfBlock1Inst < 2 && IfBlock2Inst < 2) + return false; + if (Fn && Fn->hasFnAttribute(Attribute::OptForFuzzing)) return false; @@ -2349,8 +2368,6 @@ // to get rid of the control flow, so it's not worth promoting to select // instructions. BasicBlock *DomBlock = nullptr; - BasicBlock *IfBlock1 = PN->getIncomingBlock(0); - BasicBlock *IfBlock2 = PN->getIncomingBlock(1); if (cast(IfBlock1->getTerminator())->isConditional()) { IfBlock1 = nullptr; } else { @@ -2397,13 +2414,13 @@ // Change the PHI node into a select instruction. Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse); Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue); - + Value *Sel = Builder.CreateSelect(IfCond, TrueVal, FalseVal, "", InsertPt); PN->replaceAllUsesWith(Sel); Sel->takeName(PN); PN->eraseFromParent(); } - + // At this point, IfBlock1 and IfBlock2 are both empty, so our if statement // has been flattened. Change DomBlock to jump directly to our new block to // avoid other simplifycfg's kicking in on the diamond. Index: test/CodeGen/AArch64/branches_are_betterthan_csels.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/branches_are_betterthan_csels.ll @@ -0,0 +1,37 @@ +;RUN: llc %s -o - -verify-machineinstrs | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-arm-none-eabi" + +; Function Attrs: minsize norecurse nounwind optsize readnone +;CHECK-LABEL: @test +;CHECK: cbz +;CHECK: orr +;CHECK-NEXT: ret +;CHECK: and +;CHECK-NOT: cmp +;CHECK-NOT: csel +define dso_local i32 @test(i32, i32) local_unnamed_addr #0 { + %3 = icmp eq i32 %1, 0 + br i1 %3, label %6, label %4 + +;