Index: llvm/lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -297,6 +297,27 @@ /// Returns an unused general-purpose register which can be used for /// constructing an outlined call if one exists. Returns 0 otherwise. unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const; + + /// + /// Merge AND and TST instructions to ANDS when: + /// 1. AND and TST has the same operands + /// 2. Flag register (which is defined by TST) is not used between AND and ANDS + /// NB: TST is alias for ANDS xzr + /// + /// Example: + /// \code + /// tst x2, x1 + /// and x3, x2, x1 + /// \endcode + /// to + /// \code + /// ands x3, x2, x1 + /// \endcode + /// + /// \param MBB BasicBlock in which instructions will be checked (and merged if suitable) + /// \return True when the instructions were merged + /// + bool mergeAndAnds(MachineBasicBlock& MBB) const; }; /// emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg Index: llvm/lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -54,6 +55,9 @@ #define GET_INSTRINFO_CTOR_DTOR #include "AArch64GenInstrInfo.inc" +#define DEBUG_TYPE "aarch64-instr-info" +STATISTIC(NumOfMergesTstAnd, "Number of merges of AND and TST instructions"); + static cl::opt TBZDisplacementBits( "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)")); @@ -5189,6 +5193,10 @@ /// \return True when the simple conditional branch is generated /// bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const { + + if (mergeAndAnds(*MI.getParent())) + return true; + bool IsNegativeBranch = false; bool IsTestAndBranch = false; unsigned TargetBBInMI = 0; @@ -5325,6 +5333,102 @@ } } +// Helper functions for mergeAndAnds +static MachineBasicBlock::iterator FindInstrWithReg(const MachineBasicBlock::iterator IterBegin, + const MachineBasicBlock::iterator IterEnd, + const Register Reg, + const TargetRegisterInfo& TRI) { + return std::find_if(IterBegin, IterEnd, + [Reg, &TRI](const auto& MI){ return MI.readsRegister(Reg, &TRI); }); +} + +static bool HasSameOperands(const MachineInstr& FirstInstr, + const MachineInstr& SecondInstr) { + const auto FirstOp1 = FirstInstr.getOperand(1).getReg(); + const auto FirstOp2 = SecondInstr.getOperand(1).getReg(); + const auto SecondOp1 = FirstInstr.getOperand(2).getReg(); + const auto SecondOp2 = SecondInstr.getOperand(2).getReg(); + + return (FirstOp1 == FirstOp2 && SecondOp1 == SecondOp2) + ||(FirstOp1 == SecondOp2 && SecondOp1 == FirstOp2); +} + +bool AArch64InstrInfo::mergeAndAnds(MachineBasicBlock& MBB) const { + const auto EndIter = MBB.end(); + auto MIAndIter = EndIter; + + for (auto CurrentInstrIter = MBB.begin(); + CurrentInstrIter != EndIter; ++CurrentInstrIter) { + + const auto Opcode = CurrentInstrIter->getOpcode(); + if (Opcode == AArch64::ANDXrr || Opcode == AArch64::ANDWrr) { + assert(CurrentInstrIter->getNumOperands() == 3 && "Unexpected number of operands in ANDXrr"); + MIAndIter = CurrentInstrIter; + } + else if ((Opcode == AArch64::ANDSXrr || Opcode == AArch64::ANDSWrr) + && (MIAndIter != EndIter)) { + assert(CurrentInstrIter->getNumOperands() == 4 && "Unexpected number of operands in ANDSXrr"); + + auto SiutableForMerge = [](const TargetRegisterInfo& TRI, + MachineBasicBlock::iterator& MIAndIter, + const MachineBasicBlock::iterator& CurrentInstrIter) { + MachineInstr* MIAnd = &*MIAndIter; + MachineInstr* MITst = &*CurrentInstrIter; + const auto MIterEnd = MIAnd->getParent()->end(); + + if (!HasSameOperands(*MIAnd, *MITst)) { + // Operands are differs, not a candidate for merge + return false; + } + + const auto& RetVal = MITst->getOperand(0); + auto TmpIter = CurrentInstrIter; + assert(RetVal.isReg() && "Return value should be stored in register"); + const auto RetValUseIter = FindInstrWithReg(++TmpIter, MIterEnd, RetVal.getReg(), TRI); + + if (RetValUseIter != MIterEnd) { + // Return value of ANDS is used + // This case requires additional analysis + return false; + } + + const auto FlagReg = MITst->getOperand(3); + assert(FlagReg.isReg() && "Third operand in ANDS instruction can be register only"); + const auto FlagUseIter = FindInstrWithReg(MIAndIter, CurrentInstrIter, FlagReg.getReg(), TRI); + + if (FlagUseIter != CurrentInstrIter) { + // Flag is used between AND and TST + // This case requires additional analysis + // Drop this instruction + MIAndIter = MIterEnd; + return false; + } + + return true; + }; + + if (!SiutableForMerge(getRegisterInfo(), MIAndIter, CurrentInstrIter)) { + continue; + } + + // All conditions are satisfied, + // instructions can be merged + MachineInstr* MIAnd = &*MIAndIter; + MachineInstr* MITst = &*CurrentInstrIter; + DebugLoc DL = MIAnd->getDebugLoc(); + BuildMI(MBB, MIAndIter, DL, MITst->getDesc(), MIAnd->getOperand(0).getReg()) + .addReg(MIAnd->getOperand(1).getReg()) + .addReg(MIAnd->getOperand(2).getReg()); + MIAnd->eraseFromParent(); + MITst->eraseFromParent(); + ++NumOfMergesTstAnd; + return true; + } + } + + return false; +} + std::pair AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { const unsigned Mask = AArch64II::MO_FRAGMENT; Index: llvm/test/CodeGen/AArch64/peephole-and-tst.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/peephole-and-tst.ll @@ -0,0 +1,76 @@ +; RUN: llc -mtriple=aarch64-linux-gnu %s -o %t; FileCheck %s --input-file=%t + +%struct.anon = type { i32*, i32* } + +@ptr_wrapper = common dso_local local_unnamed_addr global %struct.anon* null, align 8 + +define dso_local i32 @test_func_i32(i32 %in, i32 %bit, i32 %mask) local_unnamed_addr { +entry: + %0 = load %struct.anon*, %struct.anon** @ptr_wrapper, align 8 + %result = getelementptr inbounds %struct.anon, %struct.anon* %0, i64 0, i32 1 + %tobool2 = icmp ne i32 %mask, 0 + br label %do.body + +do.body: ; preds = %4, %entry +; CHECK-LABEL: test_func_i32: +; CHECK-LABEL: .LBB0_2: +; CHECK: ands [[DSTREG:w[0-9]+]], [[SRCREG1:w[0-9]+]], [[SRCREG2:w[0-9]+]] + %bit.addr.0 = phi i32 [ %bit, %entry ], [ %shl, %4 ] + %retval1.0 = phi i32 [ 0, %entry ], [ %retval1.1, %4 ] + %and = and i32 %bit.addr.0, %in + %tobool = icmp eq i32 %and, 0 + %not.tobool = xor i1 %tobool, true + %inc = zext i1 %not.tobool to i32 + %retval1.1 = add nuw nsw i32 %retval1.0, %inc + %1 = xor i1 %tobool, true + %2 = or i1 %tobool2, %1 + br i1 %2, label %3, label %4 + +3: ; preds = %do.body + store i32* null, i32** %result, align 8 + br label %4 + +4: ; preds = %do.body, %3 + %shl = shl i32 %bit.addr.0, 1 + %tobool6 = icmp eq i32 %shl, 0 + br i1 %tobool6, label %do.end, label %do.body + +do.end: ; preds = %4 + ret i32 %retval1.1 +} + + +define dso_local i32 @test_func_i64(i64 %in, i64 %bit, i64 %mask) local_unnamed_addr #0 { +entry: + %0 = load %struct.anon*, %struct.anon** @ptr_wrapper, align 8 + %result = getelementptr inbounds %struct.anon, %struct.anon* %0, i64 0, i32 1 + %tobool2 = icmp ne i64 %mask, 0 + br label %do.body + +do.body: ; preds = %4, %entry +; CHECK-LABEL: test_func_i64: +; CHECK-LABEL: .LBB1_2: +; CHECK: ands [[DSTREG:x[0-9]+]], [[SRCREG1:x[0-9]+]], [[SRCREG2:x[0-9]+]] + %bit.addr.0 = phi i64 [ %bit, %entry ], [ %shl, %4 ] + %retval1.0 = phi i32 [ 0, %entry ], [ %retval1.1, %4 ] + %and = and i64 %bit.addr.0, %in + %tobool = icmp eq i64 %and, 0 + %not.tobool = xor i1 %tobool, true + %inc = zext i1 %not.tobool to i32 + %retval1.1 = add nuw nsw i32 %retval1.0, %inc + %1 = xor i1 %tobool, true + %2 = or i1 %tobool2, %1 + br i1 %2, label %3, label %4 + +3: ; preds = %do.body + store i32* null, i32** %result, align 8 + br label %4 + +4: ; preds = %do.body, %3 + %shl = shl i64 %bit.addr.0, 1 + %tobool6 = icmp eq i64 %shl, 0 + br i1 %tobool6, label %do.end, label %do.body + +do.end: ; preds = %4 + ret i32 %retval1.1 +}