Index: lib/Target/AArch64/AArch64.h =================================================================== --- lib/Target/AArch64/AArch64.h +++ lib/Target/AArch64/AArch64.h @@ -31,6 +31,7 @@ FunctionPass *createAArch64DeadRegisterDefinitions(); FunctionPass *createAArch64RedundantCopyEliminationPass(); +FunctionPass *createAArch64CondBrTuning(); FunctionPass *createAArch64ConditionalCompares(); FunctionPass *createAArch64AdvSIMDScalar(); FunctionPass *createAArch64ISelDag(AArch64TargetMachine &TM, @@ -55,6 +56,7 @@ void initializeAArch64A57FPLoadBalancingPass(PassRegistry&); void initializeAArch64AdvSIMDScalarPass(PassRegistry&); void initializeAArch64CollectLOHPass(PassRegistry&); +void initializeAArch64CondBrTuningPass(PassRegistry &); void initializeAArch64ConditionalComparesPass(PassRegistry&); void initializeAArch64ConditionOptimizerPass(PassRegistry&); void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&); Index: lib/Target/AArch64/AArch64CondBrTuning.cpp =================================================================== --- /dev/null +++ lib/Target/AArch64/AArch64CondBrTuning.cpp @@ -0,0 +1,368 @@ +//===-- AArch64CondBrTuning.cpp --- Conditional branch tuning for AArch64 -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that transforms CBZ/CBNZ/TBZ/TBNZ instructions into +// a conditional branch (B.cond), when the NZCV flags can be set for "free". +// This is preferred on targets that have more flexibility when scheduling +// B.cond instructions as compared to CBZ/CBNZ/TBZ/TBNZ (assuming all other +// variables are equal). This can also reduce register pressure. +// +// A few examples: +// +// 1) add w8, w0, w1 -> cmn w0, w1 ; CMN is an alias of ADDS. +// cbz w8, .LBB_2 -> b.eq .LBB0_2 +// +// 2) add w8, w0, w1 -> adds w8, w0, w1 ; w8 has multiple uses. +// cbz w8, .LBB1_2 -> b.eq .LBB1_2 +// +// 3) sub w8, w0, w1 -> subs w8, w0, w1 ; w8 has multiple uses. +// tbz w8, #31, .LBB6_2 -> b.ge .LBB6_2 +// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "aarch64-cond-br-tuning" +#define AARCH64_CONDBR_TUNING_NAME "AArch64 Conditional Branch Tuning" + +namespace { +class AArch64CondBrTuning : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + + MachineRegisterInfo *MRI; + +public: + static char ID; + AArch64CondBrTuning() : MachineFunctionPass(ID) { + initializeAArch64CondBrTuningPass(*PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; + StringRef getPassName() const override { return AARCH64_CONDBR_TUNING_NAME; } + +private: + MachineInstr *getOperandDef(const MachineOperand &MO); + MachineInstr *convertToFlagSetting(MachineInstr &MI, bool IsFlagSetting); + MachineInstr *convertToCondBr(MachineInstr &MI); + bool tryToTuneBranch(MachineInstr &MI, MachineInstr &DefMI); +}; +} // end anonymous namespace + +char AArch64CondBrTuning::ID = 0; + +INITIALIZE_PASS(AArch64CondBrTuning, "aarch64-cond-br-tuning", + AARCH64_CONDBR_TUNING_NAME, false, false) + +void AArch64CondBrTuning::getAnalysisUsage(AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); +} + +MachineInstr *AArch64CondBrTuning::getOperandDef(const MachineOperand &MO) { + MachineInstr *DefInstr = nullptr; + if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) + DefInstr = MRI->getUniqueVRegDef(MO.getReg()); + return DefInstr; +} + +MachineInstr *AArch64CondBrTuning::convertToFlagSetting(MachineInstr &MI, + bool IsFlagSetting) { + // If this is already the flag setting version of the instruction (e.g., SUBS) + // just make sure the implicit-def of NZCV isn't marked dead. + if (IsFlagSetting) { + for (unsigned I = MI.getNumExplicitOperands(), E = MI.getNumOperands(); + I != E; ++I) { + MachineOperand &MO = MI.getOperand(I); + if (MO.isReg() && MO.isDead() && MO.getReg() == AArch64::NZCV) + MO.setIsDead(false); + } + return &MI; + } + + unsigned NewOpc; + bool Is64Bit = false; + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected opcode!"); + break; + // 32-bit cases: + case AArch64::ADDWri: NewOpc = AArch64::ADDSWri; break; + case AArch64::ADDWrr: NewOpc = AArch64::ADDSWrr; break; + case AArch64::ADDWrs: NewOpc = AArch64::ADDSWrs; break; + case AArch64::ADDWrx: NewOpc = AArch64::ADDSWrx; break; + case AArch64::ANDWri: NewOpc = AArch64::ANDSWri; break; + case AArch64::ANDWrr: NewOpc = AArch64::ANDSWrr; break; + case AArch64::ANDWrs: NewOpc = AArch64::ANDSWrs; break; + case AArch64::BICWrr: NewOpc = AArch64::BICSWrr; break; + case AArch64::BICWrs: NewOpc = AArch64::BICSWrs; break; + case AArch64::SUBWri: NewOpc = AArch64::SUBSWri; break; + case AArch64::SUBWrr: NewOpc = AArch64::SUBSWrr; break; + case AArch64::SUBWrs: NewOpc = AArch64::SUBSWrs; break; + case AArch64::SUBWrx: NewOpc = AArch64::SUBSWrx; break; + + // 64-bit cases: + case AArch64::ADDXri: NewOpc = AArch64::ADDSXri; Is64Bit = true; break; + case AArch64::ADDXrr: NewOpc = AArch64::ADDSXrr; Is64Bit = true; break; + case AArch64::ADDXrs: NewOpc = AArch64::ADDSXrs; Is64Bit = true; break; + case AArch64::ADDXrx: NewOpc = AArch64::ADDSXrx; Is64Bit = true; break; + case AArch64::ANDXri: NewOpc = AArch64::ANDSXri; Is64Bit = true; break; + case AArch64::ANDXrr: NewOpc = AArch64::ANDSXrr; Is64Bit = true; break; + case AArch64::ANDXrs: NewOpc = AArch64::ANDSXrs; Is64Bit = true; break; + case AArch64::BICXrr: NewOpc = AArch64::BICSXrr; Is64Bit = true; break; + case AArch64::BICXrs: NewOpc = AArch64::BICSXrs; Is64Bit = true; break; + case AArch64::SUBXri: NewOpc = AArch64::SUBSXri; Is64Bit = true; break; + case AArch64::SUBXrr: NewOpc = AArch64::SUBSXrr; Is64Bit = true; break; + case AArch64::SUBXrs: NewOpc = AArch64::SUBSXrs; Is64Bit = true; break; + case AArch64::SUBXrx: NewOpc = AArch64::SUBSXrx; Is64Bit = true; break; + } + + unsigned NewDestReg = MI.getOperand(0).getReg(); + if (MRI->hasOneNonDBGUse(MI.getOperand(0).getReg())) + NewDestReg = Is64Bit ? AArch64::XZR : AArch64::WZR; + + MachineInstrBuilder MIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), + TII->get(NewOpc), NewDestReg); + for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) + MIB.add(MI.getOperand(I)); + + return MIB; +} + +MachineInstr *AArch64CondBrTuning::convertToCondBr(MachineInstr &MI) { + AArch64CC::CondCode CC; + MachineBasicBlock *TargetMBB; + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected opcode!"); + + case AArch64::CBZW: + case AArch64::CBZX: + CC = AArch64CC::EQ; + TargetMBB = MI.getOperand(1).getMBB(); + break; + case AArch64::CBNZW: + case AArch64::CBNZX: + CC = AArch64CC::NE; + TargetMBB = MI.getOperand(1).getMBB(); + break; + case AArch64::TBZW: + case AArch64::TBZX: + CC = AArch64CC::GE; + TargetMBB = MI.getOperand(2).getMBB(); + break; + case AArch64::TBNZW: + case AArch64::TBNZX: + CC = AArch64CC::LT; + TargetMBB = MI.getOperand(2).getMBB(); + break; + } + return BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::Bcc)) + .addImm(CC) + .addMBB(TargetMBB); +} + +bool AArch64CondBrTuning::tryToTuneBranch(MachineInstr &MI, + MachineInstr &DefMI) { + // We don't want NZCV bits live across blocks. + if (MI.getParent() != DefMI.getParent()) + return false; + + // There must not be any instruction between DefMI and MI that clobbers or + // reads NZCV. + MachineBasicBlock::iterator I(DefMI), E(MI); + for (I = std::next(I); I != E; ++I) { + if (I->modifiesRegister(AArch64::NZCV, TRI) || + I->readsRegister(AArch64::NZCV, TRI)) + return false; + } + + bool IsFlagSetting = true; + unsigned MIOpc = MI.getOpcode(); + MachineInstr *NewCmp = nullptr, *NewBr = nullptr; + switch (DefMI.getOpcode()) { + default: + return false; + case AArch64::ADDWri: + case AArch64::ADDWrr: + case AArch64::ADDWrs: + case AArch64::ADDWrx: + case AArch64::ANDWri: + case AArch64::ANDWrr: + case AArch64::ANDWrs: + case AArch64::BICWrr: + case AArch64::BICWrs: + case AArch64::SUBWri: + case AArch64::SUBWrr: + case AArch64::SUBWrs: + case AArch64::SUBWrx: + IsFlagSetting = false; + case AArch64::ADDSWri: + case AArch64::ADDSWrr: + case AArch64::ADDSWrs: + case AArch64::ADDSWrx: + case AArch64::ANDSWri: + case AArch64::ANDSWrr: + case AArch64::ANDSWrs: + case AArch64::BICSWrr: + case AArch64::BICSWrs: + case AArch64::SUBSWri: + case AArch64::SUBSWrr: + case AArch64::SUBSWrs: + case AArch64::SUBSWrx: + switch (MIOpc) { + default: + llvm_unreachable("Unexpected opcode!"); + + case AArch64::CBZW: + case AArch64::CBNZW: + case AArch64::TBZW: + case AArch64::TBNZW: + // Check to see if the TBZ/TBNZ is checking the sign bit. + if ((MIOpc == AArch64::TBZW || MIOpc == AArch64::TBNZW) && + MI.getOperand(1).getImm() != 31) + return false; + + DEBUG(dbgs() << " Replacing instructions:\n "); + DEBUG(DefMI.print(dbgs())); + DEBUG(dbgs() << " "); + DEBUG(MI.print(dbgs())); + + NewCmp = convertToFlagSetting(DefMI, IsFlagSetting); + NewBr = convertToCondBr(MI); + break; + } + break; + + case AArch64::ADDXri: + case AArch64::ADDXrr: + case AArch64::ADDXrs: + case AArch64::ADDXrx: + case AArch64::ANDXri: + case AArch64::ANDXrr: + case AArch64::ANDXrs: + case AArch64::BICXrr: + case AArch64::BICXrs: + case AArch64::SUBXri: + case AArch64::SUBXrr: + case AArch64::SUBXrs: + case AArch64::SUBXrx: + IsFlagSetting = false; + case AArch64::ADDSXri: + case AArch64::ADDSXrr: + case AArch64::ADDSXrs: + case AArch64::ADDSXrx: + case AArch64::ANDSXri: + case AArch64::ANDSXrr: + case AArch64::ANDSXrs: + case AArch64::BICSXrr: + case AArch64::BICSXrs: + case AArch64::SUBSXri: + case AArch64::SUBSXrr: + case AArch64::SUBSXrs: + case AArch64::SUBSXrx: + switch (MIOpc) { + default: + llvm_unreachable("Unexpected opcode!"); + + case AArch64::CBZX: + case AArch64::CBNZX: + case AArch64::TBZX: + case AArch64::TBNZX: { + // Check to see if the TBZ/TBNZ is checking the sign bit. + if ((MIOpc == AArch64::TBZX || MIOpc == AArch64::TBNZX) && + MI.getOperand(1).getImm() != 63) + return false; + + DEBUG(dbgs() << " Replacing instructions:\n "); + DEBUG(DefMI.print(dbgs())); + DEBUG(dbgs() << " "); + DEBUG(MI.print(dbgs())); + + NewCmp = convertToFlagSetting(DefMI, IsFlagSetting); + NewBr = convertToCondBr(MI); + break; + } + } + break; + } + assert(NewCmp && NewBr && "Expected new instructions."); + + DEBUG(dbgs() << " with instruction:\n "); + DEBUG(NewCmp->print(dbgs())); + DEBUG(dbgs() << " "); + DEBUG(NewBr->print(dbgs())); + + // If this was a flag setting version of the instruction, we use the original + // instruction by just clearing the dead marked on the implicit-def of NCZV. + // Therefore, we should not erase this instruction. + if (!IsFlagSetting) + DefMI.eraseFromParent(); + MI.eraseFromParent(); + return true; +} + +bool AArch64CondBrTuning::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(*MF.getFunction())) + return false; + + DEBUG(dbgs() << "********** AArch64 Conditional Branch Tuning **********\n" + << "********** Function: " << MF.getName() << '\n'); + + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + + bool Changed = false; + for (auto &MBB : MF) { + bool LocalChange = false; + for (MachineBasicBlock::iterator I = MBB.getFirstTerminator(), + E = MBB.end(); + I != E; ++I) { + MachineInstr &MI = *I; + switch (MI.getOpcode()) { + default: + break; + case AArch64::CBZW: + case AArch64::CBZX: + case AArch64::CBNZW: + case AArch64::CBNZX: + case AArch64::TBZW: + case AArch64::TBZX: + case AArch64::TBNZW: + case AArch64::TBNZX: + MachineInstr *DefMI = getOperandDef(MI.getOperand(0)); + LocalChange = (DefMI && tryToTuneBranch(MI, *DefMI)); + break; + } + // If the optimization was successful, we can't optimize any other + // branches because doing so would clobber the NZCV flags. + if (LocalChange) { + Changed = true; + break; + } + } + } + return Changed; +} + +FunctionPass *llvm::createAArch64CondBrTuning() { + return new AArch64CondBrTuning(); +} Index: lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- lib/Target/AArch64/AArch64TargetMachine.cpp +++ lib/Target/AArch64/AArch64TargetMachine.cpp @@ -47,6 +47,11 @@ cl::desc("Enable the CCMP formation pass"), cl::init(true), cl::Hidden); +static cl::opt + EnableCondBrTuning("aarch64-enable-cond-br-tune", + cl::desc("Enable the conditional branch tuning pass"), + cl::init(true), cl::Hidden); + static cl::opt EnableMCR("aarch64-enable-mcr", cl::desc("Enable the machine combiner pass"), cl::init(true), cl::Hidden); @@ -429,6 +434,8 @@ addPass(createAArch64ConditionalCompares()); if (EnableMCR) addPass(&MachineCombinerID); + if (EnableCondBrTuning) + addPass(createAArch64CondBrTuning()); if (EnableEarlyIfConversion) addPass(&EarlyIfConverterID); if (EnableStPairSuppress) Index: lib/Target/AArch64/CMakeLists.txt =================================================================== --- lib/Target/AArch64/CMakeLists.txt +++ lib/Target/AArch64/CMakeLists.txt @@ -43,6 +43,7 @@ AArch64AsmPrinter.cpp AArch64CleanupLocalDynamicTLSPass.cpp AArch64CollectLOH.cpp + AArch64CondBrTuning.cpp AArch64ConditionalCompares.cpp AArch64DeadRegisterDefinitionsPass.cpp AArch64ExpandPseudoInsts.cpp Index: test/CodeGen/AArch64/arm64-early-ifcvt.ll =================================================================== --- test/CodeGen/AArch64/arm64-early-ifcvt.ll +++ test/CodeGen/AArch64/arm64-early-ifcvt.ll @@ -27,7 +27,7 @@ do.cond: %max.1 = phi i32 [ %0, %do.body ], [ %max.0, %if.else ] %min.1 = phi i32 [ %min.0, %do.body ], [ %.min.0, %if.else ] -; CHECK: cbnz +; CHECK: b.ne %dec = add i32 %n.addr.0, -1 %tobool = icmp eq i32 %dec, 0 br i1 %tobool, label %do.end, label %do.body Index: test/CodeGen/AArch64/arm64-shrink-wrapping.ll =================================================================== --- test/CodeGen/AArch64/arm64-shrink-wrapping.ll +++ test/CodeGen/AArch64/arm64-shrink-wrapping.ll @@ -78,9 +78,9 @@ ; Next BB. ; CHECK: [[LOOP:LBB[0-9_]+]]: ; %for.body ; CHECK: bl _something -; CHECK-NEXT: sub [[IV]], [[IV]], #1 +; CHECK-NEXT: subs [[IV]], [[IV]], #1 ; CHECK-NEXT: add [[SUM]], w0, [[SUM]] -; CHECK-NEXT: cbnz [[IV]], [[LOOP]] +; CHECK-NEXT: b.ne [[LOOP]] ; ; Next BB. ; Copy SUM into the returned register + << 3. @@ -144,9 +144,9 @@ ; Next BB. ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body ; CHECK: bl _something -; CHECK-NEXT: sub [[IV]], [[IV]], #1 +; CHECK-NEXT: subs [[IV]], [[IV]], #1 ; CHECK-NEXT: add [[SUM]], w0, [[SUM]] -; CHECK-NEXT: cbnz [[IV]], [[LOOP_LABEL]] +; CHECK-NEXT: b.ne [[LOOP_LABEL]] ; Next BB. ; CHECK: ; %for.end ; CHECK: mov w0, [[SUM]] @@ -188,9 +188,9 @@ ; ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body ; CHECK: bl _something -; CHECK-NEXT: sub [[IV]], [[IV]], #1 +; CHECK-NEXT: subs [[IV]], [[IV]], #1 ; CHECK-NEXT: add [[SUM]], w0, [[SUM]] -; CHECK-NEXT: cbnz [[IV]], [[LOOP_LABEL]] +; CHECK-NEXT: b.ne [[LOOP_LABEL]] ; Next BB. ; CHECK: bl _somethingElse ; CHECK-NEXT: lsl w0, [[SUM]], #3 @@ -259,9 +259,9 @@ ; ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body ; CHECK: bl _something -; CHECK-NEXT: sub [[IV]], [[IV]], #1 +; CHECK-NEXT: subs [[IV]], [[IV]], #1 ; CHECK-NEXT: add [[SUM]], w0, [[SUM]] -; CHECK-NEXT: cbnz [[IV]], [[LOOP_LABEL]] +; CHECK-NEXT: b.ne [[LOOP_LABEL]] ; Next BB. ; CHECK: lsl w0, [[SUM]], #3 ; @@ -343,9 +343,9 @@ ; CHECK-NEXT: add [[NEXT_VA_ADDR:x[0-9]+]], [[VA_ADDR]], #8 ; CHECK-NEXT: str [[NEXT_VA_ADDR]], [sp, #8] ; CHECK-NEXT: ldr [[VA_VAL:w[0-9]+]], {{\[}}[[VA_ADDR]]] -; CHECK-NEXT: sub w1, w1, #1 +; CHECK-NEXT: subs w1, w1, #1 ; CHECK-NEXT: add [[SUM]], [[SUM]], [[VA_VAL]] -; CHECK-NEXT: cbnz w1, [[LOOP_LABEL]] +; CHECK-NEXT: b.ne [[LOOP_LABEL]] ; CHECK-NEXT: [[IFEND_LABEL]]: ; Epilogue code. ; CHECK: add sp, sp, #16 @@ -409,9 +409,9 @@ ; ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body ; Inline asm statement. -; CHECK: sub [[IV]], [[IV]], #1 +; CHECK: subs [[IV]], [[IV]], #1 ; CHECK: add x19, x19, #1 -; CHECK: cbnz [[IV]], [[LOOP_LABEL]] +; CHECK: b.ne [[LOOP_LABEL]] ; Next BB. ; CHECK: mov w0, wzr ; Epilogue code. Index: test/CodeGen/AArch64/cond-br-tuning.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/cond-br-tuning.ll @@ -0,0 +1,169 @@ +; RUN: llc < %s -O3 -mtriple=aarch64-eabi -verify-machineinstrs | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-linaro-linux-gnueabi" + +; CMN is an alias of ADDS. +; CHECK-LABEL: test_add_cbz: +; CHECK: cmn w0, w1 +; CHECK: b.eq +; CHECK: ret +define void @test_add_cbz(i32 %a, i32 %b, i32* %ptr) { + %c = add nsw i32 %a, %b + %d = icmp ne i32 %c, 0 + br i1 %d, label %L1, label %L2 +L1: + store i32 0, i32* %ptr, align 4 + ret void +L2: + store i32 1, i32* %ptr, align 4 + ret void +} + +; CHECK-LABEL: test_add_cbz_multiple_use: +; CHECK: adds +; CHECK: b.eq +; CHECK: ret +define void @test_add_cbz_multiple_use(i32 %a, i32 %b, i32* %ptr) { + %c = add nsw i32 %a, %b + %d = icmp ne i32 %c, 0 + br i1 %d, label %L1, label %L2 +L1: + store i32 0, i32* %ptr, align 4 + ret void +L2: + store i32 %c, i32* %ptr, align 4 + ret void +} + +; CHECK-LABEL: test_add_cbz_64: +; CHECK: cmn x0, x1 +; CHECK: b.eq +define void @test_add_cbz_64(i64 %a, i64 %b, i64* %ptr) { + %c = add nsw i64 %a, %b + %d = icmp ne i64 %c, 0 + br i1 %d, label %L1, label %L2 +L1: + store i64 0, i64* %ptr, align 4 + ret void +L2: + store i64 1, i64* %ptr, align 4 + ret void +} + +; CHECK-LABEL: test_and_cbz: +; CHECK: tst w0, #0x6 +; CHECK: b.eq +define void @test_and_cbz(i32 %a, i32* %ptr) { + %c = and i32 %a, 6 + %d = icmp ne i32 %c, 0 + br i1 %d, label %L1, label %L2 +L1: + store i32 0, i32* %ptr, align 4 + ret void +L2: + store i32 1, i32* %ptr, align 4 + ret void +} + +; CHECK-LABEL: test_bic_cbnz: +; CHECK: bics wzr, w1, w0 +; CHECK: b.ne +define void @test_bic_cbnz(i32 %a, i32 %b, i32* %ptr) { + %c = and i32 %a, %b + %d = icmp eq i32 %c, %b + br i1 %d, label %L1, label %L2 +L1: + store i32 0, i32* %ptr, align 4 + ret void +L2: + store i32 1, i32* %ptr, align 4 + ret void +} + +; CHECK-LABEL: test_add_tbz: +; CHECK: adds +; CHECK: b.ge +; CHECK: ret +define void @test_add_tbz(i32 %a, i32 %b, i32* %ptr) { +entry: + %add = add nsw i32 %a, %b + %cmp36 = icmp sge i32 %add, 0 + br i1 %cmp36, label %L2, label %L1 +L1: + store i32 %add, i32* %ptr, align 8 + br label %L2 +L2: + ret void +} + +; CHECK-LABEL: test_subs_tbz: +; CHECK: subs +; CHECK: b.ge +; CHECK: ret +define void @test_subs_tbz(i32 %a, i32 %b, i32* %ptr) { +entry: + %sub = sub nsw i32 %a, %b + %cmp36 = icmp sge i32 %sub, 0 + br i1 %cmp36, label %L2, label %L1 +L1: + store i32 %sub, i32* %ptr, align 8 + br label %L2 +L2: + ret void +} + +; CHECK-LABEL: test_add_tbnz +; CHECK: adds +; CHECK: b.lt +; CHECK: ret +define void @test_add_tbnz(i32 %a, i32 %b, i32* %ptr) { +entry: + %add = add nsw i32 %a, %b + %cmp36 = icmp slt i32 %add, 0 + br i1 %cmp36, label %L2, label %L1 +L1: + store i32 %add, i32* %ptr, align 8 + br label %L2 +L2: + ret void +} + +; CHECK-LABEL: test_subs_tbnz +; CHECK: subs +; CHECK: b.lt +; CHECK: ret +define void @test_subs_tbnz(i32 %a, i32 %b, i32* %ptr) { +entry: + %sub = sub nsw i32 %a, %b + %cmp36 = icmp slt i32 %sub, 0 + br i1 %cmp36, label %L2, label %L1 +L1: + store i32 %sub, i32* %ptr, align 8 + br label %L2 +L2: + ret void +} + +declare void @foo() +declare void @bar(i32) + +; Don't transform since the call will clobber the NZCV bits. +; CHECK-LABEL: test_call_clobber: +; CHECK: and w[[DST:[0-9]+]], w1, #0x6 +; CHECK: bl bar +; CHECK: cbnz w[[DST]] +define void @test_call_clobber(i32 %unused, i32 %a) { +entry: + %c = and i32 %a, 6 + call void @bar(i32 %c) + %tobool = icmp eq i32 %c, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: + tail call void @foo() + unreachable + +if.end: + ret void +} Index: test/CodeGen/AArch64/misched-fusion.ll =================================================================== --- test/CodeGen/AArch64/misched-fusion.ll +++ test/CodeGen/AArch64/misched-fusion.ll @@ -7,8 +7,8 @@ ; Make sure sub is scheduled in front of cbnz ; CHECK-LABEL: test_sub_cbz: -; CHECK: sub w[[SUBRES:[0-9]+]], w0, #13 -; CHECK-NEXT: cbnz w[[SUBRES]], {{.?LBB[0-9_]+}} +; CHECK: subs w[[SUBRES:[0-9]+]], w0, #13 +; CHECK: b.ne {{.?LBB[0-9_]+}} define void @test_sub_cbz(i32 %a0, i32 %a1) { entry: ; except for the fusion opportunity the sub/add should be equal so the Index: test/CodeGen/AArch64/stack-guard-remat-bitcast.ll =================================================================== --- test/CodeGen/AArch64/stack-guard-remat-bitcast.ll +++ test/CodeGen/AArch64/stack-guard-remat-bitcast.ll @@ -13,8 +13,8 @@ ; CHECK: ldur [[R3:x[0-9]+]], {{\[}}x29, [[SLOT0]]{{\]}} ; CHECK: ldr [[GUARD_ADDR:x[0-9]+]], {{\[}}[[GUARD_PAGE]], ___stack_chk_guard@GOTPAGEOFF{{\]}} ; CHECK: ldr [[GUARD:x[0-9]+]], {{\[}}[[GUARD_ADDR]]{{\]}} -; CHECK: sub [[R4:x[0-9]+]], [[GUARD]], [[R3]] -; CHECK: cbnz [[R4]], LBB +; CHECK: cmp [[GUARD]], [[R3]] +; CHECK: b.ne LBB define i32 @test_stack_guard_remat2() { entry: Index: test/CodeGen/AArch64/tbz-tbnz.ll =================================================================== --- test/CodeGen/AArch64/tbz-tbnz.ll +++ test/CodeGen/AArch64/tbz-tbnz.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -O1 -mtriple=aarch64-eabi | FileCheck %s +; RUN: llc < %s -O1 -mtriple=aarch64-eabi -aarch64-enable-cond-br-tune=false | FileCheck %s declare void @t() Index: test/CodeGen/AArch64/thread-pointer.ll =================================================================== --- test/CodeGen/AArch64/thread-pointer.ll +++ test/CodeGen/AArch64/thread-pointer.ll @@ -13,8 +13,8 @@ ; CHECK: .LBB0_2: ; CHECK: ldr w0, [x[[REG2]]] ; CHECK: bl bar -; CHECK: sub w[[REG3:[0-9]+]], w{{[0-9]+}}, #1 -; CHECK: cbnz w[[REG3]], .LBB0_2 +; CHECK: subs w[[REG3:[0-9]+]], w{{[0-9]+}}, #1 +; CHECK: b.ne .LBB0_2 define void @test1(i32 %n) local_unnamed_addr { entry: