Index: lib/Target/AArch64/AArch64.h =================================================================== --- lib/Target/AArch64/AArch64.h +++ lib/Target/AArch64/AArch64.h @@ -31,6 +31,7 @@ FunctionPass *createAArch64DeadRegisterDefinitions(); FunctionPass *createAArch64RedundantCopyEliminationPass(); +FunctionPass *createAArch64CondBrTuning(); FunctionPass *createAArch64ConditionalCompares(); FunctionPass *createAArch64AdvSIMDScalar(); FunctionPass *createAArch64ISelDag(AArch64TargetMachine &TM, @@ -55,6 +56,7 @@ void initializeAArch64A57FPLoadBalancingPass(PassRegistry&); void initializeAArch64AdvSIMDScalarPass(PassRegistry&); void initializeAArch64CollectLOHPass(PassRegistry&); +void initializeAArch64CondBrTuningPass(PassRegistry &); void initializeAArch64ConditionalComparesPass(PassRegistry&); void initializeAArch64ConditionOptimizerPass(PassRegistry&); void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&); Index: lib/Target/AArch64/AArch64CondBrTuning.cpp =================================================================== --- /dev/null +++ lib/Target/AArch64/AArch64CondBrTuning.cpp @@ -0,0 +1,336 @@ +//===-- AArch64CondBrTuning.cpp --- Conditional branch tuning for AArch64 -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file contains a pass that transforms CBZ/CBNZ/TBZ/TBNZ instructions +/// into a conditional branch (B.cond), when the NZCV flags can be set for +/// "free". This is preferred on targets that have more flexibility when +/// scheduling B.cond instructions as compared to CBZ/CBNZ/TBZ/TBNZ (assuming +/// all other variables are equal). This can also reduce register pressure. +/// +/// A few examples: +/// +/// 1) add w8, w0, w1 -> cmn w0, w1 ; CMN is an alias of ADDS. +/// cbz w8, .LBB_2 -> b.eq .LBB0_2 +/// +/// 2) add w8, w0, w1 -> adds w8, w0, w1 ; w8 has multiple uses. +/// cbz w8, .LBB1_2 -> b.eq .LBB1_2 +/// +/// 3) sub w8, w0, w1 -> subs w8, w0, w1 ; w8 has multiple uses. +/// tbz w8, #31, .LBB6_2 -> b.ge .LBB6_2 +/// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "AArch64Subtarget.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineTraceMetrics.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "aarch64-cond-br-tuning" +#define AARCH64_CONDBR_TUNING_NAME "AArch64 Conditional Branch Tuning" + +namespace { +class AArch64CondBrTuning : public MachineFunctionPass { + const AArch64InstrInfo *TII; + const TargetRegisterInfo *TRI; + + MachineRegisterInfo *MRI; + +public: + static char ID; + AArch64CondBrTuning() : MachineFunctionPass(ID) { + initializeAArch64CondBrTuningPass(*PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; + StringRef getPassName() const override { return AARCH64_CONDBR_TUNING_NAME; } + +private: + MachineInstr *getOperandDef(const MachineOperand &MO); + MachineInstr *convertToFlagSetting(MachineInstr &MI, bool IsFlagSetting); + MachineInstr *convertToCondBr(MachineInstr &MI); + bool tryToTuneBranch(MachineInstr &MI, MachineInstr &DefMI); +}; +} // end anonymous namespace + +char AArch64CondBrTuning::ID = 0; + +INITIALIZE_PASS(AArch64CondBrTuning, "aarch64-cond-br-tuning", + AARCH64_CONDBR_TUNING_NAME, false, false) + +void AArch64CondBrTuning::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +MachineInstr *AArch64CondBrTuning::getOperandDef(const MachineOperand &MO) { + if (!TargetRegisterInfo::isVirtualRegister(MO.getReg())) + return nullptr; + return MRI->getUniqueVRegDef(MO.getReg()); +} + +MachineInstr *AArch64CondBrTuning::convertToFlagSetting(MachineInstr &MI, + bool IsFlagSetting) { + // If this is already the flag setting version of the instruction (e.g., SUBS) + // just make sure the implicit-def of NZCV isn't marked dead. + if (IsFlagSetting) { + for (unsigned I = MI.getNumExplicitOperands(), E = MI.getNumOperands(); + I != E; ++I) { + MachineOperand &MO = MI.getOperand(I); + if (MO.isReg() && MO.isDead() && MO.getReg() == AArch64::NZCV) + MO.setIsDead(false); + } + return &MI; + } + bool Is64Bit; + unsigned NewOpc = TII->convertToFlagSettingOpc(MI.getOpcode(), Is64Bit); + unsigned NewDestReg = MI.getOperand(0).getReg(); + if (MRI->hasOneNonDBGUse(MI.getOperand(0).getReg())) + NewDestReg = Is64Bit ? AArch64::XZR : AArch64::WZR; + + MachineInstrBuilder MIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), + TII->get(NewOpc), NewDestReg); + for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) + MIB.add(MI.getOperand(I)); + + return MIB; +} + +MachineInstr *AArch64CondBrTuning::convertToCondBr(MachineInstr &MI) { + AArch64CC::CondCode CC; + MachineBasicBlock *TargetMBB = TII->getBranchDestBlock(MI); + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected opcode!"); + + case AArch64::CBZW: + case AArch64::CBZX: + CC = AArch64CC::EQ; + break; + case AArch64::CBNZW: + case AArch64::CBNZX: + CC = AArch64CC::NE; + break; + case AArch64::TBZW: + case AArch64::TBZX: + CC = AArch64CC::GE; + break; + case AArch64::TBNZW: + case AArch64::TBNZX: + CC = AArch64CC::LT; + break; + } + return BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::Bcc)) + .addImm(CC) + .addMBB(TargetMBB); +} + +bool AArch64CondBrTuning::tryToTuneBranch(MachineInstr &MI, + MachineInstr &DefMI) { + // We don't want NZCV bits live across blocks. + if (MI.getParent() != DefMI.getParent()) + return false; + + bool IsFlagSetting = true; + unsigned MIOpc = MI.getOpcode(); + MachineInstr *NewCmp = nullptr, *NewBr = nullptr; + switch (DefMI.getOpcode()) { + default: + return false; + case AArch64::ADDWri: + case AArch64::ADDWrr: + case AArch64::ADDWrs: + case AArch64::ADDWrx: + case AArch64::ANDWri: + case AArch64::ANDWrr: + case AArch64::ANDWrs: + case AArch64::BICWrr: + case AArch64::BICWrs: + case AArch64::SUBWri: + case AArch64::SUBWrr: + case AArch64::SUBWrs: + case AArch64::SUBWrx: + IsFlagSetting = false; + case AArch64::ADDSWri: + case AArch64::ADDSWrr: + case AArch64::ADDSWrs: + case AArch64::ADDSWrx: + case AArch64::ANDSWri: + case AArch64::ANDSWrr: + case AArch64::ANDSWrs: + case AArch64::BICSWrr: + case AArch64::BICSWrs: + case AArch64::SUBSWri: + case AArch64::SUBSWrr: + case AArch64::SUBSWrs: + case AArch64::SUBSWrx: + switch (MIOpc) { + default: + llvm_unreachable("Unexpected opcode!"); + + case AArch64::CBZW: + case AArch64::CBNZW: + case AArch64::TBZW: + case AArch64::TBNZW: + // Check to see if the TBZ/TBNZ is checking the sign bit. + if ((MIOpc == AArch64::TBZW || MIOpc == AArch64::TBNZW) && + MI.getOperand(1).getImm() != 31) + return false; + + // There must not be any instruction between DefMI and MI that clobbers or + // reads NZCV. + MachineBasicBlock::iterator I(DefMI), E(MI); + for (I = std::next(I); I != E; ++I) { + if (I->modifiesRegister(AArch64::NZCV, TRI) || + I->readsRegister(AArch64::NZCV, TRI)) + return false; + } + DEBUG(dbgs() << " Replacing instructions:\n "); + DEBUG(DefMI.print(dbgs())); + DEBUG(dbgs() << " "); + DEBUG(MI.print(dbgs())); + + NewCmp = convertToFlagSetting(DefMI, IsFlagSetting); + NewBr = convertToCondBr(MI); + break; + } + break; + + case AArch64::ADDXri: + case AArch64::ADDXrr: + case AArch64::ADDXrs: + case AArch64::ADDXrx: + case AArch64::ANDXri: + case AArch64::ANDXrr: + case AArch64::ANDXrs: + case AArch64::BICXrr: + case AArch64::BICXrs: + case AArch64::SUBXri: + case AArch64::SUBXrr: + case AArch64::SUBXrs: + case AArch64::SUBXrx: + IsFlagSetting = false; + case AArch64::ADDSXri: + case AArch64::ADDSXrr: + case AArch64::ADDSXrs: + case AArch64::ADDSXrx: + case AArch64::ANDSXri: + case AArch64::ANDSXrr: + case AArch64::ANDSXrs: + case AArch64::BICSXrr: + case AArch64::BICSXrs: + case AArch64::SUBSXri: + case AArch64::SUBSXrr: + case AArch64::SUBSXrs: + case AArch64::SUBSXrx: + switch (MIOpc) { + default: + llvm_unreachable("Unexpected opcode!"); + + case AArch64::CBZX: + case AArch64::CBNZX: + case AArch64::TBZX: + case AArch64::TBNZX: { + // Check to see if the TBZ/TBNZ is checking the sign bit. + if ((MIOpc == AArch64::TBZX || MIOpc == AArch64::TBNZX) && + MI.getOperand(1).getImm() != 63) + return false; + // There must not be any instruction between DefMI and MI that clobbers or + // reads NZCV. + MachineBasicBlock::iterator I(DefMI), E(MI); + for (I = std::next(I); I != E; ++I) { + if (I->modifiesRegister(AArch64::NZCV, TRI) || + I->readsRegister(AArch64::NZCV, TRI)) + return false; + } + DEBUG(dbgs() << " Replacing instructions:\n "); + DEBUG(DefMI.print(dbgs())); + DEBUG(dbgs() << " "); + DEBUG(MI.print(dbgs())); + + NewCmp = convertToFlagSetting(DefMI, IsFlagSetting); + NewBr = convertToCondBr(MI); + break; + } + } + break; + } + assert(NewCmp && NewBr && "Expected new instructions."); + + DEBUG(dbgs() << " with instruction:\n "); + DEBUG(NewCmp->print(dbgs())); + DEBUG(dbgs() << " "); + DEBUG(NewBr->print(dbgs())); + + // If this was a flag setting version of the instruction, we use the original + // instruction by just clearing the dead marked on the implicit-def of NCZV. + // Therefore, we should not erase this instruction. + if (!IsFlagSetting) + DefMI.eraseFromParent(); + MI.eraseFromParent(); + return true; +} + +bool AArch64CondBrTuning::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(*MF.getFunction())) + return false; + + DEBUG(dbgs() << "********** AArch64 Conditional Branch Tuning **********\n" + << "********** Function: " << MF.getName() << '\n'); + + TII = static_cast(MF.getSubtarget().getInstrInfo()); + TRI = MF.getSubtarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + + bool Changed = false; + for (MachineBasicBlock &MBB : MF) { + bool LocalChange = false; + for (MachineBasicBlock::iterator I = MBB.getFirstTerminator(), + E = MBB.end(); + I != E; ++I) { + MachineInstr &MI = *I; + switch (MI.getOpcode()) { + default: + break; + case AArch64::CBZW: + case AArch64::CBZX: + case AArch64::CBNZW: + case AArch64::CBNZX: + case AArch64::TBZW: + case AArch64::TBZX: + case AArch64::TBNZW: + case AArch64::TBNZX: + MachineInstr *DefMI = getOperandDef(MI.getOperand(0)); + LocalChange = (DefMI && tryToTuneBranch(MI, *DefMI)); + break; + } + // If the optimization was successful, we can't optimize any other + // branches because doing so would clobber the NZCV flags. + if (LocalChange) { + Changed = true; + break; + } + } + } + return Changed; +} + +FunctionPass *llvm::createAArch64CondBrTuning() { + return new AArch64CondBrTuning(); +} Index: lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.h +++ lib/Target/AArch64/AArch64InstrInfo.h @@ -119,6 +119,44 @@ } } + /// \brief Return the opcode that set flags when possible. The caller is + /// responsible for ensuring the opc has a flag setting equivalent. + static unsigned convertToFlagSettingOpc(unsigned Opc, bool &Is64Bit) { + switch (Opc) { + default: + llvm_unreachable("Opcode has no flag setting equivalent!"); + // 32-bit cases: + case AArch64::ADDWri: Is64Bit = false; return AArch64::ADDSWri; + case AArch64::ADDWrr: Is64Bit = false; return AArch64::ADDSWrr; + case AArch64::ADDWrs: Is64Bit = false; return AArch64::ADDSWrs; + case AArch64::ADDWrx: Is64Bit = false; return AArch64::ADDSWrx; + case AArch64::ANDWri: Is64Bit = false; return AArch64::ANDSWri; + case AArch64::ANDWrr: Is64Bit = false; return AArch64::ANDSWrr; + case AArch64::ANDWrs: Is64Bit = false; return AArch64::ANDSWrs; + case AArch64::BICWrr: Is64Bit = false; return AArch64::BICSWrr; + case AArch64::BICWrs: Is64Bit = false; return AArch64::BICSWrs; + case AArch64::SUBWri: Is64Bit = false; return AArch64::SUBSWri; + case AArch64::SUBWrr: Is64Bit = false; return AArch64::SUBSWrr; + case AArch64::SUBWrs: Is64Bit = false; return AArch64::SUBSWrs; + case AArch64::SUBWrx: Is64Bit = false; return AArch64::SUBSWrx; + // 64-bit cases: + case AArch64::ADDXri: Is64Bit = true; return AArch64::ADDSXri; + case AArch64::ADDXrr: Is64Bit = true; return AArch64::ADDSXrr; + case AArch64::ADDXrs: Is64Bit = true; return AArch64::ADDSXrs; + case AArch64::ADDXrx: Is64Bit = true; return AArch64::ADDSXrx; + case AArch64::ANDXri: Is64Bit = true; return AArch64::ANDSXri; + case AArch64::ANDXrr: Is64Bit = true; return AArch64::ANDSXrr; + case AArch64::ANDXrs: Is64Bit = true; return AArch64::ANDSXrs; + case AArch64::BICXrr: Is64Bit = true; return AArch64::BICSXrr; + case AArch64::BICXrs: Is64Bit = true; return AArch64::BICSXrs; + case AArch64::SUBXri: Is64Bit = true; return AArch64::SUBSXri; + case AArch64::SUBXrr: Is64Bit = true; return AArch64::SUBSXrr; + case AArch64::SUBXrs: Is64Bit = true; return AArch64::SUBSXrs; + case AArch64::SUBXrx: Is64Bit = true; return AArch64::SUBSXrx; + } + } + + /// Return true if this is a load/store that can be potentially paired/merged. bool isCandidateToMergeOrPair(MachineInstr &MI) const; Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1036,7 +1036,7 @@ /// \brief Return the opcode that does not set flags when possible - otherwise /// return the original opcode. The caller is responsible to do the actual /// substitution and legality checking. -static unsigned convertFlagSettingOpcode(const MachineInstr &MI) { +static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) { // Don't convert all compare instructions, because for some the zero register // encoding becomes the sp register. bool MIDefinesZeroReg = false; @@ -1145,7 +1145,7 @@ return true; } unsigned Opc = CmpInstr.getOpcode(); - unsigned NewOpc = convertFlagSettingOpcode(CmpInstr); + unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr); if (NewOpc == Opc) return false; const MCInstrDesc &MCID = get(NewOpc); @@ -3318,7 +3318,7 @@ // When NZCV is live bail out. if (Cmp_NZCV == -1) return false; - unsigned NewOpc = convertFlagSettingOpcode(Root); + unsigned NewOpc = convertToNonFlagSettingOpc(Root); // When opcode can't change bail out. // CHECKME: do we miss any cases for opcode conversion? if (NewOpc == Opc) Index: lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- lib/Target/AArch64/AArch64TargetMachine.cpp +++ lib/Target/AArch64/AArch64TargetMachine.cpp @@ -47,6 +47,11 @@ cl::desc("Enable the CCMP formation pass"), cl::init(true), cl::Hidden); +static cl::opt + EnableCondBrTuning("aarch64-enable-cond-br-tune", + cl::desc("Enable the conditional branch tuning pass"), + cl::init(true), cl::Hidden); + static cl::opt EnableMCR("aarch64-enable-mcr", cl::desc("Enable the machine combiner pass"), cl::init(true), cl::Hidden); @@ -429,6 +434,8 @@ addPass(createAArch64ConditionalCompares()); if (EnableMCR) addPass(&MachineCombinerID); + if (EnableCondBrTuning) + addPass(createAArch64CondBrTuning()); if (EnableEarlyIfConversion) addPass(&EarlyIfConverterID); if (EnableStPairSuppress) Index: lib/Target/AArch64/CMakeLists.txt =================================================================== --- lib/Target/AArch64/CMakeLists.txt +++ lib/Target/AArch64/CMakeLists.txt @@ -43,6 +43,7 @@ AArch64AsmPrinter.cpp AArch64CleanupLocalDynamicTLSPass.cpp AArch64CollectLOH.cpp + AArch64CondBrTuning.cpp AArch64ConditionalCompares.cpp AArch64DeadRegisterDefinitionsPass.cpp AArch64ExpandPseudoInsts.cpp Index: test/CodeGen/AArch64/arm64-early-ifcvt.ll =================================================================== --- test/CodeGen/AArch64/arm64-early-ifcvt.ll +++ test/CodeGen/AArch64/arm64-early-ifcvt.ll @@ -27,7 +27,7 @@ do.cond: %max.1 = phi i32 [ %0, %do.body ], [ %max.0, %if.else ] %min.1 = phi i32 [ %min.0, %do.body ], [ %.min.0, %if.else ] -; CHECK: cbnz +; CHECK: b.ne %dec = add i32 %n.addr.0, -1 %tobool = icmp eq i32 %dec, 0 br i1 %tobool, label %do.end, label %do.body Index: test/CodeGen/AArch64/arm64-shrink-wrapping.ll =================================================================== --- test/CodeGen/AArch64/arm64-shrink-wrapping.ll +++ test/CodeGen/AArch64/arm64-shrink-wrapping.ll @@ -78,9 +78,9 @@ ; Next BB. ; CHECK: [[LOOP:LBB[0-9_]+]]: ; %for.body ; CHECK: bl _something -; CHECK-NEXT: sub [[IV]], [[IV]], #1 +; CHECK-NEXT: subs [[IV]], [[IV]], #1 ; CHECK-NEXT: add [[SUM]], w0, [[SUM]] -; CHECK-NEXT: cbnz [[IV]], [[LOOP]] +; CHECK-NEXT: b.ne [[LOOP]] ; ; Next BB. ; Copy SUM into the returned register + << 3. @@ -144,9 +144,9 @@ ; Next BB. ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body ; CHECK: bl _something -; CHECK-NEXT: sub [[IV]], [[IV]], #1 +; CHECK-NEXT: subs [[IV]], [[IV]], #1 ; CHECK-NEXT: add [[SUM]], w0, [[SUM]] -; CHECK-NEXT: cbnz [[IV]], [[LOOP_LABEL]] +; CHECK-NEXT: b.ne [[LOOP_LABEL]] ; Next BB. ; CHECK: ; %for.end ; CHECK: mov w0, [[SUM]] @@ -188,9 +188,9 @@ ; ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body ; CHECK: bl _something -; CHECK-NEXT: sub [[IV]], [[IV]], #1 +; CHECK-NEXT: subs [[IV]], [[IV]], #1 ; CHECK-NEXT: add [[SUM]], w0, [[SUM]] -; CHECK-NEXT: cbnz [[IV]], [[LOOP_LABEL]] +; CHECK-NEXT: b.ne [[LOOP_LABEL]] ; Next BB. ; CHECK: bl _somethingElse ; CHECK-NEXT: lsl w0, [[SUM]], #3 @@ -259,9 +259,9 @@ ; ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body ; CHECK: bl _something -; CHECK-NEXT: sub [[IV]], [[IV]], #1 +; CHECK-NEXT: subs [[IV]], [[IV]], #1 ; CHECK-NEXT: add [[SUM]], w0, [[SUM]] -; CHECK-NEXT: cbnz [[IV]], [[LOOP_LABEL]] +; CHECK-NEXT: b.ne [[LOOP_LABEL]] ; Next BB. ; CHECK: lsl w0, [[SUM]], #3 ; @@ -343,9 +343,9 @@ ; CHECK-NEXT: add [[NEXT_VA_ADDR:x[0-9]+]], [[VA_ADDR]], #8 ; CHECK-NEXT: str [[NEXT_VA_ADDR]], [sp, #8] ; CHECK-NEXT: ldr [[VA_VAL:w[0-9]+]], {{\[}}[[VA_ADDR]]] -; CHECK-NEXT: sub w1, w1, #1 +; CHECK-NEXT: subs w1, w1, #1 ; CHECK-NEXT: add [[SUM]], [[SUM]], [[VA_VAL]] -; CHECK-NEXT: cbnz w1, [[LOOP_LABEL]] +; CHECK-NEXT: b.ne [[LOOP_LABEL]] ; CHECK-NEXT: [[IFEND_LABEL]]: ; Epilogue code. ; CHECK: add sp, sp, #16 @@ -409,9 +409,9 @@ ; ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body ; Inline asm statement. -; CHECK: sub [[IV]], [[IV]], #1 +; CHECK: subs [[IV]], [[IV]], #1 ; CHECK: add x19, x19, #1 -; CHECK: cbnz [[IV]], [[LOOP_LABEL]] +; CHECK: b.ne [[LOOP_LABEL]] ; Next BB. ; CHECK: mov w0, wzr ; Epilogue code. Index: test/CodeGen/AArch64/cond-br-tuning.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/cond-br-tuning.ll @@ -0,0 +1,169 @@ +; RUN: llc < %s -O3 -mtriple=aarch64-eabi -verify-machineinstrs | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-linaro-linux-gnueabi" + +; CMN is an alias of ADDS. +; CHECK-LABEL: test_add_cbz: +; CHECK: cmn w0, w1 +; CHECK: b.eq +; CHECK: ret +define void @test_add_cbz(i32 %a, i32 %b, i32* %ptr) { + %c = add nsw i32 %a, %b + %d = icmp ne i32 %c, 0 + br i1 %d, label %L1, label %L2 +L1: + store i32 0, i32* %ptr, align 4 + ret void +L2: + store i32 1, i32* %ptr, align 4 + ret void +} + +; CHECK-LABEL: test_add_cbz_multiple_use: +; CHECK: adds +; CHECK: b.eq +; CHECK: ret +define void @test_add_cbz_multiple_use(i32 %a, i32 %b, i32* %ptr) { + %c = add nsw i32 %a, %b + %d = icmp ne i32 %c, 0 + br i1 %d, label %L1, label %L2 +L1: + store i32 0, i32* %ptr, align 4 + ret void +L2: + store i32 %c, i32* %ptr, align 4 + ret void +} + +; CHECK-LABEL: test_add_cbz_64: +; CHECK: cmn x0, x1 +; CHECK: b.eq +define void @test_add_cbz_64(i64 %a, i64 %b, i64* %ptr) { + %c = add nsw i64 %a, %b + %d = icmp ne i64 %c, 0 + br i1 %d, label %L1, label %L2 +L1: + store i64 0, i64* %ptr, align 4 + ret void +L2: + store i64 1, i64* %ptr, align 4 + ret void +} + +; CHECK-LABEL: test_and_cbz: +; CHECK: tst w0, #0x6 +; CHECK: b.eq +define void @test_and_cbz(i32 %a, i32* %ptr) { + %c = and i32 %a, 6 + %d = icmp ne i32 %c, 0 + br i1 %d, label %L1, label %L2 +L1: + store i32 0, i32* %ptr, align 4 + ret void +L2: + store i32 1, i32* %ptr, align 4 + ret void +} + +; CHECK-LABEL: test_bic_cbnz: +; CHECK: bics wzr, w1, w0 +; CHECK: b.ne +define void @test_bic_cbnz(i32 %a, i32 %b, i32* %ptr) { + %c = and i32 %a, %b + %d = icmp eq i32 %c, %b + br i1 %d, label %L1, label %L2 +L1: + store i32 0, i32* %ptr, align 4 + ret void +L2: + store i32 1, i32* %ptr, align 4 + ret void +} + +; CHECK-LABEL: test_add_tbz: +; CHECK: adds +; CHECK: b.ge +; CHECK: ret +define void @test_add_tbz(i32 %a, i32 %b, i32* %ptr) { +entry: + %add = add nsw i32 %a, %b + %cmp36 = icmp sge i32 %add, 0 + br i1 %cmp36, label %L2, label %L1 +L1: + store i32 %add, i32* %ptr, align 8 + br label %L2 +L2: + ret void +} + +; CHECK-LABEL: test_subs_tbz: +; CHECK: subs +; CHECK: b.ge +; CHECK: ret +define void @test_subs_tbz(i32 %a, i32 %b, i32* %ptr) { +entry: + %sub = sub nsw i32 %a, %b + %cmp36 = icmp sge i32 %sub, 0 + br i1 %cmp36, label %L2, label %L1 +L1: + store i32 %sub, i32* %ptr, align 8 + br label %L2 +L2: + ret void +} + +; CHECK-LABEL: test_add_tbnz +; CHECK: adds +; CHECK: b.lt +; CHECK: ret +define void @test_add_tbnz(i32 %a, i32 %b, i32* %ptr) { +entry: + %add = add nsw i32 %a, %b + %cmp36 = icmp slt i32 %add, 0 + br i1 %cmp36, label %L2, label %L1 +L1: + store i32 %add, i32* %ptr, align 8 + br label %L2 +L2: + ret void +} + +; CHECK-LABEL: test_subs_tbnz +; CHECK: subs +; CHECK: b.lt +; CHECK: ret +define void @test_subs_tbnz(i32 %a, i32 %b, i32* %ptr) { +entry: + %sub = sub nsw i32 %a, %b + %cmp36 = icmp slt i32 %sub, 0 + br i1 %cmp36, label %L2, label %L1 +L1: + store i32 %sub, i32* %ptr, align 8 + br label %L2 +L2: + ret void +} + +declare void @foo() +declare void @bar(i32) + +; Don't transform since the call will clobber the NZCV bits. +; CHECK-LABEL: test_call_clobber: +; CHECK: and w[[DST:[0-9]+]], w1, #0x6 +; CHECK: bl bar +; CHECK: cbnz w[[DST]] +define void @test_call_clobber(i32 %unused, i32 %a) { +entry: + %c = and i32 %a, 6 + call void @bar(i32 %c) + %tobool = icmp eq i32 %c, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: + tail call void @foo() + unreachable + +if.end: + ret void +} Index: test/CodeGen/AArch64/misched-fusion.ll =================================================================== --- test/CodeGen/AArch64/misched-fusion.ll +++ test/CodeGen/AArch64/misched-fusion.ll @@ -7,8 +7,8 @@ ; Make sure sub is scheduled in front of cbnz ; CHECK-LABEL: test_sub_cbz: -; CHECK: sub w[[SUBRES:[0-9]+]], w0, #13 -; CHECK-NEXT: cbnz w[[SUBRES]], {{.?LBB[0-9_]+}} +; CHECK: subs w[[SUBRES:[0-9]+]], w0, #13 +; CHECK: b.ne {{.?LBB[0-9_]+}} define void @test_sub_cbz(i32 %a0, i32 %a1) { entry: ; except for the fusion opportunity the sub/add should be equal so the Index: test/CodeGen/AArch64/stack-guard-remat-bitcast.ll =================================================================== --- test/CodeGen/AArch64/stack-guard-remat-bitcast.ll +++ test/CodeGen/AArch64/stack-guard-remat-bitcast.ll @@ -13,8 +13,8 @@ ; CHECK: ldur [[R3:x[0-9]+]], {{\[}}x29, [[SLOT0]]{{\]}} ; CHECK: ldr [[GUARD_ADDR:x[0-9]+]], {{\[}}[[GUARD_PAGE]], ___stack_chk_guard@GOTPAGEOFF{{\]}} ; CHECK: ldr [[GUARD:x[0-9]+]], {{\[}}[[GUARD_ADDR]]{{\]}} -; CHECK: sub [[R4:x[0-9]+]], [[GUARD]], [[R3]] -; CHECK: cbnz [[R4]], LBB +; CHECK: cmp [[GUARD]], [[R3]] +; CHECK: b.ne LBB define i32 @test_stack_guard_remat2() { entry: Index: test/CodeGen/AArch64/tbz-tbnz.ll =================================================================== --- test/CodeGen/AArch64/tbz-tbnz.ll +++ test/CodeGen/AArch64/tbz-tbnz.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -O1 -mtriple=aarch64-eabi | FileCheck %s +; RUN: llc < %s -O1 -mtriple=aarch64-eabi -aarch64-enable-cond-br-tune=false | FileCheck %s declare void @t() Index: test/CodeGen/AArch64/thread-pointer.ll =================================================================== --- test/CodeGen/AArch64/thread-pointer.ll +++ test/CodeGen/AArch64/thread-pointer.ll @@ -13,8 +13,8 @@ ; CHECK: .LBB0_2: ; CHECK: ldr w0, [x[[REG2]]] ; CHECK: bl bar -; CHECK: sub w[[REG3:[0-9]+]], w{{[0-9]+}}, #1 -; CHECK: cbnz w[[REG3]], .LBB0_2 +; CHECK: subs w[[REG3:[0-9]+]], w{{[0-9]+}}, #1 +; CHECK: b.ne .LBB0_2 define void @test1(i32 %n) local_unnamed_addr { entry: