Index: lib/Target/AArch64/AArch64.h =================================================================== --- lib/Target/AArch64/AArch64.h +++ lib/Target/AArch64/AArch64.h @@ -36,6 +36,7 @@ FunctionPass *createAArch64ExpandPseudoPass(); FunctionPass *createAArch64LoadStoreOptimizationPass(); ModulePass *createAArch64PromoteConstantPass(); +FunctionPass *createAArch64ConditionOptimizerPass(); FunctionPass *createAArch64AddressTypePromotionPass(); FunctionPass *createAArch64A57FPLoadBalancing(); /// \brief Creates an ARM-specific Target Transformation Info pass. Index: lib/Target/AArch64/AArch64ConditionOptimizer.cpp =================================================================== --- /dev/null +++ lib/Target/AArch64/AArch64ConditionOptimizer.cpp @@ -0,0 +1,405 @@ +//=- AArch64ConditionOptimizer.cpp - Remove useless comparisons for AArch64 -=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass tries to make consecutive compares of values use same operands to +// allow CSE pass to remove duplicated instructions. For this it analyzes +// branches and adjusts comparisons with immediate values by converting: +// * GE -> GT +// * GT -> GE +// * LT -> LE +// * LE -> LT +// and adjusting immediate values appropriately. It basically corrects two +// immediate values towards each other to make them equal. +// +// Consider the following example in C: +// +// if ((a < 5 && ...) || (a > 5 && ...)) { +// ~~~~~ ~~~~~ +// ^ ^ +// x y +// +// Here both "x" and "y" expressions compare "a" with "5". When "x" evaluates +// to "false", "y" can just check flags set by the first comparison. As a +// result of the canonicalization employed by +// SelectionDAGBuilder::visitSwitchCase, DAGCombine, and other target-specific +// code, assembly ends up in the form that is not CSE friendly: +// +// ... +// cmp w8, #4 +// b.gt .LBB0_3 +// ... +// .LBB0_3: +// cmp w8, #6 +// b.lt .LBB0_6 +// ... +// +// Same assembly after the pass: +// +// ... +// cmp w8, #5 +// b.ge .LBB0_3 +// ... +// .LBB0_3: +// cmp w8, #5 // <-- CSE pass removes this instruction +// b.le .LBB0_6 +// ... +// +// Currently only SUBS and ADDS followed by b.?? are supported. +// +// TODO: maybe handle TBNZ/TBZ the same way as CMP when used instead for "a < 0" +// TODO: handle other conditional instructions (e.g. CSET) +// TODO: allow second branching to be anything if it doesn't require adjusting +// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "aarch64-condopt" + +STATISTIC(NumConditionsAdjusted, "Number of conditions adjusted"); + +namespace { +class AArch64ConditionOptimizer : public MachineFunctionPass { + const TargetInstrInfo *TII; + MachineDominatorTree *DomTree; + +public: + // Stores immediate, compare instruction opcode and branch condition (in this + // order) of adjusted comparison. + typedef std::tuple CmpInfo; + + static char ID; + AArch64ConditionOptimizer() : MachineFunctionPass(ID) {} + void getAnalysisUsage(AnalysisUsage &AU) const override; + MachineInstr *findSuitableCompare(MachineBasicBlock *MBB); + CmpInfo adjustCmp(MachineInstr *CmpMI, AArch64CC::CondCode Cmp); + void modifyCmp(MachineInstr *CmpMI, const CmpInfo &Info); + bool adjustTo(MachineInstr *CmpMI, AArch64CC::CondCode Cmp, MachineInstr *To, + int ToImm); + bool runOnMachineFunction(MachineFunction &MF) override; + const char *getPassName() const override { + return "AArch64 Condition Optimizer"; + } +}; +} // end anonymous namespace + +char AArch64ConditionOptimizer::ID = 0; + +namespace llvm { +void initializeAArch64ConditionOptimizerPass(PassRegistry &); +} + +INITIALIZE_PASS_BEGIN(AArch64ConditionOptimizer, "aarch64-condopt", + "AArch64 CondOpt Pass", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(AArch64ConditionOptimizer, "aarch64-condopt", + "AArch64 CondOpt Pass", false, false) + +FunctionPass *llvm::createAArch64ConditionOptimizerPass() { + return new AArch64ConditionOptimizer(); +} + +void AArch64ConditionOptimizer::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +// Finds compare instruction that corresponds to supported types of branching. +// Returns the instruction or nullptr on failures or detecting unsupported +// instructions. +MachineInstr *AArch64ConditionOptimizer::findSuitableCompare( + MachineBasicBlock *MBB) { + MachineBasicBlock::iterator I = MBB->getFirstTerminator(); + if (I == MBB->end()) { + return nullptr; + } + + if (I->getOpcode() != AArch64::Bcc) { + return nullptr; + } + + // Now find the instruction controlling the terminator. + for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) { + --I; + assert(!I->isTerminator() && "Spurious terminator"); + switch (I->getOpcode()) { + // cmp is an alias for subs with a dead destination register. + case AArch64::SUBSWri: + case AArch64::SUBSXri: + // cmn is an alias for adds with a dead destination register. + case AArch64::ADDSWri: + case AArch64::ADDSXri: + return I; + + case AArch64::SUBSWrr: + case AArch64::SUBSXrr: + case AArch64::ADDSWrr: + case AArch64::ADDSXrr: + case AArch64::FCMPSrr: + case AArch64::FCMPDrr: + case AArch64::FCMPESrr: + case AArch64::FCMPEDrr: + // Skip comparison instructions without immediate operands. + return nullptr; + } + } + DEBUG(dbgs() << "Flags not defined in BB#" << MBB->getNumber() << '\n'); + return nullptr; +} + +// Changes opcode adds <-> subs considering register operand width. +static int getComplementOpc(int Opc) { + switch (Opc) { + case AArch64::ADDSWri: return AArch64::SUBSWri; + case AArch64::ADDSXri: return AArch64::SUBSXri; + case AArch64::SUBSWri: return AArch64::ADDSWri; + case AArch64::SUBSXri: return AArch64::ADDSXri; + default: + llvm_unreachable("Unexpected opcode"); + } +} + +// Changes form of comparison inclusive <-> exclusive. +static AArch64CC::CondCode getAdjustedCmp(AArch64CC::CondCode Cmp) { + switch (Cmp) { + case AArch64CC::GT: return AArch64CC::GE; + case AArch64CC::GE: return AArch64CC::GT; + case AArch64CC::LT: return AArch64CC::LE; + case AArch64CC::LE: return AArch64CC::LT; + default: + llvm_unreachable("Unexpected condition code"); + } +} + +// Transforms GT -> GE, GE -> GT, LT -> LE, LE -> LT by updating comparison +// operator and condition code. +AArch64ConditionOptimizer::CmpInfo AArch64ConditionOptimizer::adjustCmp( + MachineInstr *CmpMI, AArch64CC::CondCode Cmp) { + int Opc = CmpMI->getOpcode(); + + // CMN (compare with negative immediate) is an alias to ADDS (as + // "operand - negative" == "operand + positive") + bool Negative = (Opc == AArch64::ADDSWri || Opc == AArch64::ADDSXri); + + int Correction = (Cmp == AArch64CC::GT) ? 1 : -1; + // Negate Correction value for comparison with negative immediate (CMN). + if (Negative) { + Correction = -Correction; + } + + const int OldImm = (int)CmpMI->getOperand(2).getImm(); + const int NewImm = std::abs(OldImm + Correction); + + // Handle +0 -> -1 and -0 -> +1 (CMN with 0 immediate) transitions by + // adjusting compare instruction opcode. + if (OldImm == 0 && ((Negative && Correction == 1) || + (!Negative && Correction == -1))) { + Opc = getComplementOpc(Opc); + } + + return CmpInfo(NewImm, Opc, getAdjustedCmp(Cmp)); +} + +// Applies changes to comparison instruction suggested by adjustCmp(). +void AArch64ConditionOptimizer::modifyCmp(MachineInstr *CmpMI, + const CmpInfo &Info) { + int Imm; + int Opc; + AArch64CC::CondCode Cmp; + std::tie(Imm, Opc, Cmp) = Info; + + MachineBasicBlock *const MBB = CmpMI->getParent(); + + // Change immediate in comparison instruction (ADDS or SUBS). + BuildMI(*MBB, CmpMI, CmpMI->getDebugLoc(), TII->get(Opc)) + .addOperand(CmpMI->getOperand(0)) + .addOperand(CmpMI->getOperand(1)) + .addImm(Imm) + .addOperand(CmpMI->getOperand(3)); + CmpMI->eraseFromParent(); + + // The fact that this comparison was picked ensures that it's related to the + // first terminator instruction. + MachineInstr *BrMI = MBB->getFirstTerminator(); + + // Change condition in branch instruction. + BuildMI(*MBB, BrMI, BrMI->getDebugLoc(), TII->get(AArch64::Bcc)) + .addImm(Cmp) + .addOperand(BrMI->getOperand(1)); + BrMI->eraseFromParent(); + + MBB->updateTerminator(); + + ++NumConditionsAdjusted; +} + +// Parse a condition code returned by AnalyzeBranch, and compute the CondCode +// corresponding to TBB. +// Returns true if parsing was successful, otherwise false is returned. +static bool parseCond(ArrayRef Cond, AArch64CC::CondCode &CC) { + // A normal br.cond simply has the condition code. + if (Cond[0].getImm() != -1) { + assert(Cond.size() == 1 && "Unknown Cond array format"); + CC = (AArch64CC::CondCode)(int)Cond[0].getImm(); + return true; + } + return false; +} + +// Adjusts one cmp instruction to another one if result of adjustment will allow +// CSE. Returns true if compare instruction was changed, otherwise false is +// returned. +bool AArch64ConditionOptimizer::adjustTo(MachineInstr *CmpMI, + AArch64CC::CondCode Cmp, MachineInstr *To, int ToImm) +{ + CmpInfo Info = adjustCmp(CmpMI, Cmp); + if (std::get<0>(Info) == ToImm && std::get<1>(Info) == To->getOpcode()) { + modifyCmp(CmpMI, Info); + return true; + } + return false; +} + +bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n" + << "********** Function: " << MF.getName() << '\n'); + TII = MF.getTarget().getSubtargetImpl()->getInstrInfo(); + DomTree = &getAnalysis(); + + bool Changed = false; + + // Visit blocks in dominator tree pre-order. The pre-order enables multiple + // cmp-conversions from the same head block. + // Note that updateDomTree() modifies the children of the DomTree node + // currently being visited. The df_iterator supports that; it doesn't look at + // child_begin() / child_end() until after a node has been visited. + for (MachineDomTreeNode *I : depth_first(DomTree)) { + MachineBasicBlock *HBB = I->getBlock(); + + SmallVector HeadCond; + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + if (TII->AnalyzeBranch(*HBB, TBB, FBB, HeadCond)) { + continue; + } + + // Equivalence check is to skip loops. + if (!TBB || TBB == HBB) { + continue; + } + + SmallVector TrueCond; + MachineBasicBlock *TBB_TBB = nullptr, *TBB_FBB = nullptr; + if (TII->AnalyzeBranch(*TBB, TBB_TBB, TBB_FBB, TrueCond)) { + continue; + } + + MachineInstr *HeadCmpMI = findSuitableCompare(HBB); + if (!HeadCmpMI) { + continue; + } + + MachineInstr *TrueCmpMI = findSuitableCompare(TBB); + if (!TrueCmpMI) { + continue; + } + + AArch64CC::CondCode HeadCmp; + if (HeadCond.empty() || !parseCond(HeadCond, HeadCmp)) { + continue; + } + + AArch64CC::CondCode TrueCmp; + if (TrueCond.empty() || !parseCond(TrueCond, TrueCmp)) { + continue; + } + + const int HeadImm = (int)HeadCmpMI->getOperand(2).getImm(); + const int TrueImm = (int)TrueCmpMI->getOperand(2).getImm(); + + DEBUG(dbgs() << "Head branch:\n"); + DEBUG(dbgs() << "\tcondition: " + << AArch64CC::getCondCodeName(HeadCmp) << '\n'); + DEBUG(dbgs() << "\timmediate: " << HeadImm << '\n'); + + DEBUG(dbgs() << "True branch:\n"); + DEBUG(dbgs() << "\tcondition: " + << AArch64CC::getCondCodeName(TrueCmp) << '\n'); + DEBUG(dbgs() << "\timmediate: " << TrueImm << '\n'); + + if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::LT) || + (HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::GT)) && + std::abs(TrueImm - HeadImm) == 2) { + // This branch transforms machine instructions that correspond to + // + // 1) (a > {TrueImm} && ...) || (a < {HeadImm} && ...) + // 2) (a < {TrueImm} && ...) || (a > {HeadImm} && ...) + // + // into + // + // 1) (a >= {NewImm} && ...) || (a <= {NewImm} && ...) + // 2) (a <= {NewImm} && ...) || (a >= {NewImm} && ...) + + CmpInfo HeadCmpInfo = adjustCmp(HeadCmpMI, HeadCmp); + CmpInfo TrueCmpInfo = adjustCmp(TrueCmpMI, TrueCmp); + if (std::get<0>(HeadCmpInfo) == std::get<0>(TrueCmpInfo) && + std::get<1>(HeadCmpInfo) == std::get<1>(TrueCmpInfo)) { + modifyCmp(HeadCmpMI, HeadCmpInfo); + modifyCmp(TrueCmpMI, TrueCmpInfo); + Changed = true; + } + } else if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::GT) || + (HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::LT)) && + std::abs(TrueImm - HeadImm) == 1) { + // This branch transforms machine instructions that correspond to + // + // 1) (a > {TrueImm} && ...) || (a > {HeadImm} && ...) + // 2) (a < {TrueImm} && ...) || (a < {HeadImm} && ...) + // + // into + // + // 1) (a <= {NewImm} && ...) || (a > {NewImm} && ...) + // 2) (a < {NewImm} && ...) || (a >= {NewImm} && ...) + + // GT -> GE transformation increases immediate value, so picking the + // smaller one; LT -> LE decreases immediate value so invert the choice. + bool adjustHeadCond = (HeadImm < TrueImm); + if (HeadCmp == AArch64CC::LT) { + adjustHeadCond = !adjustHeadCond; + } + + if (adjustHeadCond) { + Changed |= adjustTo(HeadCmpMI, HeadCmp, TrueCmpMI, TrueImm); + } else { + Changed |= adjustTo(TrueCmpMI, TrueCmp, HeadCmpMI, HeadImm); + } + } + // Other transformation cases almost never occur due to generation of < or > + // comparisons instead of <= and >=. + } + + return Changed; +} Index: lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- lib/Target/AArch64/AArch64TargetMachine.cpp +++ lib/Target/AArch64/AArch64TargetMachine.cpp @@ -68,6 +68,11 @@ cl::desc("Run early if-conversion"), cl::init(true)); +static cl::opt +EnableCondOpt("aarch64-condopt", + cl::desc("Enable the condition optimizer pass"), + cl::init(true), cl::Hidden); + extern "C" void LLVMInitializeAArch64Target() { // Register the target. @@ -182,6 +187,8 @@ } bool AArch64PassConfig::addILPOpts() { + if (EnableCondOpt) + addPass(createAArch64ConditionOptimizerPass()); if (EnableCCMP) addPass(createAArch64ConditionalCompares()); if (EnableMCR) Index: lib/Target/AArch64/CMakeLists.txt =================================================================== --- lib/Target/AArch64/CMakeLists.txt +++ lib/Target/AArch64/CMakeLists.txt @@ -27,6 +27,7 @@ AArch64ExpandPseudoInsts.cpp AArch64FastISel.cpp AArch64FrameLowering.cpp + AArch64ConditionOptimizer.cpp AArch64ISelDAGToDAG.cpp AArch64ISelLowering.cpp AArch64InstrInfo.cpp Index: test/CodeGen/AArch64/combine-comparisons-by-cse.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/combine-comparisons-by-cse.ll @@ -0,0 +1,349 @@ +; RUN: llc < %s -march=aarch64 -mtriple=aarch64-linux-gnu | FileCheck %s + +; marked as external to prevent possible optimizations +@a = external global i32 +@b = external global i32 +@c = external global i32 +@d = external global i32 + +; (a > 10 && b == c) || (a >= 10 && b == d) +define i32 @combine_gt_ge_10() #0 { +; CHECK-LABEL: combine_gt_ge_10 +; CHECK: cmp +; CHECK: b.le +; CHECK: ret +; CHECK-NOT: cmp +; CHECK: b.lt +entry: + %0 = load i32* @a, align 4 + %cmp = icmp sgt i32 %0, 10 + br i1 %cmp, label %land.lhs.true, label %lor.lhs.false + +land.lhs.true: ; preds = %entry + %1 = load i32* @b, align 4 + %2 = load i32* @c, align 4 + %cmp1 = icmp eq i32 %1, %2 + br i1 %cmp1, label %return, label %land.lhs.true3 + +lor.lhs.false: ; preds = %entry + %cmp2 = icmp sgt i32 %0, 9 + br i1 %cmp2, label %land.lhs.true3, label %if.end + +land.lhs.true3: ; preds = %lor.lhs.false, %land.lhs.true + %3 = load i32* @b, align 4 + %4 = load i32* @d, align 4 + %cmp4 = icmp eq i32 %3, %4 + br i1 %cmp4, label %return, label %if.end + +if.end: ; preds = %land.lhs.true3, %lor.lhs.false + br label %return + +return: ; preds = %if.end, %land.lhs.true3, %land.lhs.true + %retval.0 = phi i32 [ 0, %if.end ], [ 1, %land.lhs.true3 ], [ 1, %land.lhs.true ] + ret i32 %retval.0 +} + +; (a > 5 && b == c) || (a < 5 && b == d) +define i32 @combine_gt_lt_5() #0 { +; CHECK-LABEL: combine_gt_lt_5 +; CHECK: cmp +; CHECK: b.le +; CHECK: ret +; CHECK-NOT: cmp +; CHECK: b.ge +entry: + %0 = load i32* @a, align 4 + %cmp = icmp sgt i32 %0, 5 + br i1 %cmp, label %land.lhs.true, label %lor.lhs.false + +land.lhs.true: ; preds = %entry + %1 = load i32* @b, align 4 + %2 = load i32* @c, align 4 + %cmp1 = icmp eq i32 %1, %2 + br i1 %cmp1, label %return, label %if.end + +lor.lhs.false: ; preds = %entry + %cmp2 = icmp slt i32 %0, 5 + br i1 %cmp2, label %land.lhs.true3, label %if.end + +land.lhs.true3: ; preds = %lor.lhs.false + %3 = load i32* @b, align 4 + %4 = load i32* @d, align 4 + %cmp4 = icmp eq i32 %3, %4 + br i1 %cmp4, label %return, label %if.end + +if.end: ; preds = %land.lhs.true3, %lor.lhs.false, %land.lhs.true + br label %return + +return: ; preds = %if.end, %land.lhs.true3, %land.lhs.true + %retval.0 = phi i32 [ 0, %if.end ], [ 1, %land.lhs.true3 ], [ 1, %land.lhs.true ] + ret i32 %retval.0 +} + +; (a < 5 && b == c) || (a <= 5 && b == d) +define i32 @combine_lt_ge_5() #0 { +; CHECK-LABEL: combine_lt_ge_5 +; CHECK: cmp +; CHECK: b.ge +; CHECK: ret +; CHECK-NOT: cmp +; CHECK: b.gt +entry: + %0 = load i32* @a, align 4 + %cmp = icmp slt i32 %0, 5 + br i1 %cmp, label %land.lhs.true, label %lor.lhs.false + +land.lhs.true: ; preds = %entry + %1 = load i32* @b, align 4 + %2 = load i32* @c, align 4 + %cmp1 = icmp eq i32 %1, %2 + br i1 %cmp1, label %return, label %land.lhs.true3 + +lor.lhs.false: ; preds = %entry + %cmp2 = icmp slt i32 %0, 6 + br i1 %cmp2, label %land.lhs.true3, label %if.end + +land.lhs.true3: ; preds = %lor.lhs.false, %land.lhs.true + %3 = load i32* @b, align 4 + %4 = load i32* @d, align 4 + %cmp4 = icmp eq i32 %3, %4 + br i1 %cmp4, label %return, label %if.end + +if.end: ; preds = %land.lhs.true3, %lor.lhs.false + br label %return + +return: ; preds = %if.end, %land.lhs.true3, %land.lhs.true + %retval.0 = phi i32 [ 0, %if.end ], [ 1, %land.lhs.true3 ], [ 1, %land.lhs.true ] + ret i32 %retval.0 +} + +; (a < 5 && b == c) || (a > 5 && b == d) +define i32 @combine_lt_gt_5() #0 { +; CHECK-LABEL: combine_lt_gt_5 +; CHECK: cmp +; CHECK: b.ge +; CHECK: ret +; CHECK-NOT: cmp +; CHECK: b.le +entry: + %0 = load i32* @a, align 4 + %cmp = icmp slt i32 %0, 5 + br i1 %cmp, label %land.lhs.true, label %lor.lhs.false + +land.lhs.true: ; preds = %entry + %1 = load i32* @b, align 4 + %2 = load i32* @c, align 4 + %cmp1 = icmp eq i32 %1, %2 + br i1 %cmp1, label %return, label %if.end + +lor.lhs.false: ; preds = %entry + %cmp2 = icmp sgt i32 %0, 5 + br i1 %cmp2, label %land.lhs.true3, label %if.end + +land.lhs.true3: ; preds = %lor.lhs.false + %3 = load i32* @b, align 4 + %4 = load i32* @d, align 4 + %cmp4 = icmp eq i32 %3, %4 + br i1 %cmp4, label %return, label %if.end + +if.end: ; preds = %land.lhs.true3, %lor.lhs.false, %land.lhs.true + br label %return + +return: ; preds = %if.end, %land.lhs.true3, %land.lhs.true + %retval.0 = phi i32 [ 0, %if.end ], [ 1, %land.lhs.true3 ], [ 1, %land.lhs.true ] + ret i32 %retval.0 +} + +; (a > -5 && b == c) || (a < -5 && b == d) +define i32 @combine_gt_lt_n5() #0 { +; CHECK-LABEL: combine_gt_lt_n5 +; CHECK: cmn +; CHECK: b.le +; CHECK: ret +; CHECK-NOT: cmn +; CHECK: b.ge +entry: + %0 = load i32* @a, align 4 + %cmp = icmp sgt i32 %0, -5 + br i1 %cmp, label %land.lhs.true, label %lor.lhs.false + +land.lhs.true: ; preds = %entry + %1 = load i32* @b, align 4 + %2 = load i32* @c, align 4 + %cmp1 = icmp eq i32 %1, %2 + br i1 %cmp1, label %return, label %if.end + +lor.lhs.false: ; preds = %entry + %cmp2 = icmp slt i32 %0, -5 + br i1 %cmp2, label %land.lhs.true3, label %if.end + +land.lhs.true3: ; preds = %lor.lhs.false + %3 = load i32* @b, align 4 + %4 = load i32* @d, align 4 + %cmp4 = icmp eq i32 %3, %4 + br i1 %cmp4, label %return, label %if.end + +if.end: ; preds = %land.lhs.true3, %lor.lhs.false, %land.lhs.true + br label %return + +return: ; preds = %if.end, %land.lhs.true3, %land.lhs.true + %retval.0 = phi i32 [ 0, %if.end ], [ 1, %land.lhs.true3 ], [ 1, %land.lhs.true ] + ret i32 %retval.0 +} + +; (a < -5 && b == c) || (a > -5 && b == d) +define i32 @combine_lt_gt_n5() #0 { +; CHECK-LABEL: combine_lt_gt_n5 +; CHECK: cmn +; CHECK: b.ge +; CHECK: ret +; CHECK-NOT: cmn +; CHECK: b.le +entry: + %0 = load i32* @a, align 4 + %cmp = icmp slt i32 %0, -5 + br i1 %cmp, label %land.lhs.true, label %lor.lhs.false + +land.lhs.true: ; preds = %entry + %1 = load i32* @b, align 4 + %2 = load i32* @c, align 4 + %cmp1 = icmp eq i32 %1, %2 + br i1 %cmp1, label %return, label %if.end + +lor.lhs.false: ; preds = %entry + %cmp2 = icmp sgt i32 %0, -5 + br i1 %cmp2, label %land.lhs.true3, label %if.end + +land.lhs.true3: ; preds = %lor.lhs.false + %3 = load i32* @b, align 4 + %4 = load i32* @d, align 4 + %cmp4 = icmp eq i32 %3, %4 + br i1 %cmp4, label %return, label %if.end + +if.end: ; preds = %land.lhs.true3, %lor.lhs.false, %land.lhs.true + br label %return + +return: ; preds = %if.end, %land.lhs.true3, %land.lhs.true + %retval.0 = phi i32 [ 0, %if.end ], [ 1, %land.lhs.true3 ], [ 1, %land.lhs.true ] + ret i32 %retval.0 +} + +%struct.Struct = type { i64, i64 } + +@glob = internal unnamed_addr global %struct.Struct* null, align 8 + +declare %struct.Struct* @Update(%struct.Struct*) #1 + +; no checks for this case, it just should be processed without errors +define void @combine_non_adjacent_cmp_br(%struct.Struct* nocapture readonly %hdCall) #0 { +entry: + %size = getelementptr inbounds %struct.Struct* %hdCall, i64 0, i32 0 + %0 = load i64* %size, align 8 + br label %land.rhs + +land.rhs: + %rp.06 = phi i64 [ %0, %entry ], [ %sub, %while.body ] + %1 = load i64* inttoptr (i64 24 to i64*), align 8 + %cmp2 = icmp sgt i64 %1, 0 + br i1 %cmp2, label %while.body, label %while.end + +while.body: + %2 = load %struct.Struct** @glob, align 8 + %call = tail call %struct.Struct* @Update(%struct.Struct* %2) #2 + %sub = add nsw i64 %rp.06, -2 + %cmp = icmp slt i64 %0, %rp.06 + br i1 %cmp, label %land.rhs, label %while.end + +while.end: + ret void +} + +; undefined external to prevent possible optimizations +declare void @do_something() #1 + +define i32 @do_nothing_if_resultant_opcodes_would_differ() #0 { +; CHECK-LABEL: do_nothing_if_resultant_opcodes_would_differ +; CHECK: cmn +; CHECK: b.gt +; CHECK: cmp +; CHECK: b.gt +entry: + %0 = load i32* @a, align 4 + %cmp4 = icmp slt i32 %0, -1 + br i1 %cmp4, label %while.body.preheader, label %while.end + +while.body.preheader: ; preds = %entry + br label %while.body + +while.body: ; preds = %while.body, %while.body.preheader + %i.05 = phi i32 [ %inc, %while.body ], [ %0, %while.body.preheader ] + tail call void @do_something() #2 + %inc = add nsw i32 %i.05, 1 + %cmp = icmp slt i32 %i.05, 0 + br i1 %cmp, label %while.body, label %while.cond.while.end_crit_edge + +while.cond.while.end_crit_edge: ; preds = %while.body + %.pre = load i32* @a, align 4 + br label %while.end + +while.end: ; preds = %while.cond.while.end_crit_edge, %entry + %1 = phi i32 [ %.pre, %while.cond.while.end_crit_edge ], [ %0, %entry ] + %cmp1 = icmp slt i32 %1, 2 + br i1 %cmp1, label %land.lhs.true, label %if.end + +land.lhs.true: ; preds = %while.end + %2 = load i32* @b, align 4 + %3 = load i32* @d, align 4 + %cmp2 = icmp eq i32 %2, %3 + br i1 %cmp2, label %return, label %if.end + +if.end: ; preds = %land.lhs.true, %while.end + br label %return + +return: ; preds = %if.end, %land.lhs.true + %retval.0 = phi i32 [ 0, %if.end ], [ 123, %land.lhs.true ] + ret i32 %retval.0 +} + +define i32 @do_nothing_if_compares_can_not_be_adjusted_to_each_other() #0 { +; CHECK-LABEL: do_nothing_if_compares_can_not_be_adjusted_to_each_other +; CHECK: cmp +; CHECK: b.gt +; CHECK: cmn +; CHECK: b.lt +entry: + %0 = load i32* @a, align 4 + %cmp4 = icmp slt i32 %0, 1 + br i1 %cmp4, label %while.body.preheader, label %while.end + +while.body.preheader: ; preds = %entry + br label %while.body + +while.body: ; preds = %while.body, %while.body.preheader + %i.05 = phi i32 [ %inc, %while.body ], [ %0, %while.body.preheader ] + tail call void @do_something() #2 + %inc = add nsw i32 %i.05, 1 + %cmp = icmp slt i32 %i.05, 0 + br i1 %cmp, label %while.body, label %while.end.loopexit + +while.end.loopexit: ; preds = %while.body + br label %while.end + +while.end: ; preds = %while.end.loopexit, %entry + %1 = load i32* @c, align 4 + %cmp1 = icmp sgt i32 %1, -3 + br i1 %cmp1, label %land.lhs.true, label %if.end + +land.lhs.true: ; preds = %while.end + %2 = load i32* @b, align 4 + %3 = load i32* @d, align 4 + %cmp2 = icmp eq i32 %2, %3 + br i1 %cmp2, label %return, label %if.end + +if.end: ; preds = %land.lhs.true, %while.end + br label %return + +return: ; preds = %if.end, %land.lhs.true + %retval.0 = phi i32 [ 0, %if.end ], [ 123, %land.lhs.true ] + ret i32 %retval.0 +}