diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -335,6 +335,8 @@ ArrayRef Cond) const; bool substituteCmpToZero(MachineInstr &CmpInstr, unsigned SrcReg, const MachineRegisterInfo *MRI) const; + bool removeCmpToZeroOrOne(MachineInstr &CmpInstr, unsigned SrcReg, + int CmpValue, const MachineRegisterInfo *MRI) const; /// Returns an unused general-purpose register which can be used for /// constructing an outlined call if one exists. Returns 0 otherwise. diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1462,14 +1462,16 @@ // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare // function. assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!"); - if (CmpValue != 0 || SrcReg2 != 0) + if (SrcReg2 != 0) return false; // CmpInstr is a Compare instruction if destination register is not used. if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg())) return false; - return substituteCmpToZero(CmpInstr, SrcReg, MRI); + if (!CmpValue && substituteCmpToZero(CmpInstr, SrcReg, MRI)) + return true; + return removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, MRI); } /// Get opcode of S version of Instr. @@ -1530,6 +1532,34 @@ return false; } +static int findCondCodeUseOperandIdx(const MachineInstr &Instr) { + switch (Instr.getOpcode()) { + default: + return -1; + + case AArch64::Bcc: { + int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); + assert(Idx >= 2); + return Idx - 2; + } + + case AArch64::CSINVWr: + case AArch64::CSINVXr: + case AArch64::CSINCWr: + case AArch64::CSINCXr: + case AArch64::CSELWr: + case AArch64::CSELXr: + case AArch64::CSNEGWr: + case AArch64::CSNEGXr: + case AArch64::FCSELSrrr: + case AArch64::FCSELDrrr: { + int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); + assert(Idx >= 1); + return Idx - 1; + } + } +} + namespace { struct UsedNZCV { @@ -1555,31 +1585,10 @@ /// Returns AArch64CC::Invalid if either the instruction does not use condition /// codes or we don't optimize CmpInstr in the presence of such instructions. static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) { - switch (Instr.getOpcode()) { - default: - return AArch64CC::Invalid; - - case AArch64::Bcc: { - int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); - assert(Idx >= 2); - return static_cast(Instr.getOperand(Idx - 2).getImm()); - } - - case AArch64::CSINVWr: - case AArch64::CSINVXr: - case AArch64::CSINCWr: - case AArch64::CSINCXr: - case AArch64::CSELWr: - case AArch64::CSELXr: - case AArch64::CSNEGWr: - case AArch64::CSNEGXr: - case AArch64::FCSELSrrr: - case AArch64::FCSELDrrr: { - int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); - assert(Idx >= 1); - return static_cast(Instr.getOperand(Idx - 1).getImm()); - } - } + int CCIdx = findCondCodeUseOperandIdx(Instr); + return CCIdx >= 0 ? static_cast( + Instr.getOperand(CCIdx).getImm()) + : AArch64CC::Invalid; } static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) { @@ -1626,6 +1635,37 @@ return UsedFlags; } +static bool +examineCFlagsUse(MachineInstr *MI, MachineInstr *CmpInstr, + const TargetRegisterInfo *TRI, UsedNZCV *NzcvUse = nullptr, + SmallVectorImpl *CCUseInstrs = nullptr) { + if (MI->getParent() != CmpInstr->getParent()) + return false; + + if (areCFlagsAliveInSuccessors(CmpInstr->getParent())) + return false; + + UsedNZCV NZCVUsedAfterCmp; + for (MachineInstr &Instr : + instructionsWithoutDebug(std::next(CmpInstr->getIterator()), + CmpInstr->getParent()->instr_end())) { + if (Instr.readsRegister(AArch64::NZCV, TRI)) { + AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr); + if (CC == AArch64CC::Invalid) // Unsupported conditional instruction + return false; + NZCVUsedAfterCmp |= getUsedNZCV(CC); + if (CCUseInstrs) + CCUseInstrs->push_back(&Instr); + } + if (Instr.modifiesRegister(AArch64::NZCV, TRI)) + break; + } + + if (NzcvUse) + NzcvUse = &NZCVUsedAfterCmp; + return (!NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V); +} + static bool isADDSRegImm(unsigned Opcode) { return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri; } @@ -1655,34 +1695,13 @@ if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode)) return false; - if (MI->getParent() != CmpInstr->getParent()) - return false; - - if (areCFlagsAliveInSuccessors(CmpInstr->getParent())) + if (!examineCFlagsUse(MI, CmpInstr, TRI)) return false; AccessKind AccessToCheck = AK_Write; if (sForm(*MI) != MI->getOpcode()) AccessToCheck = AK_All; - if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck)) - return false; - - UsedNZCV NZCVUsedAfterCmp; - for (const MachineInstr &Instr : - instructionsWithoutDebug(std::next(CmpInstr->getIterator()), - CmpInstr->getParent()->instr_end())) { - if (Instr.readsRegister(AArch64::NZCV, TRI)) { - AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr); - if (CC == AArch64CC::Invalid) // Unsupported conditional instruction - return false; - NZCVUsedAfterCmp |= getUsedNZCV(CC); - } - - if (Instr.modifiesRegister(AArch64::NZCV, TRI)) - break; - } - - return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V; + return !areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck); } /// Substitute an instruction comparing to zero with another instruction @@ -1717,6 +1736,137 @@ return true; } +/// Check if CmpInstr can be removed and condition code used after must be +/// inverted. +/// +/// CmpInstr can be removed if: +/// - CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0', +/// MI is 'CSINCWr %vreg, wzr, wzr, eq/ne' or 'CSINCXr %vreg, xzr, xzr, eq/ne' +/// - or, CmpInstr is 'SUBS %vreg, 1', +/// MI is 'CSINCWr %vreg, wzr, wzr, eq/ne/mi/pl' +/// or 'CSINCXr %vreg, xzr, xzr, eq/ne/mi/pl' +/// - and, MI and CmpInstr are from the same MachineBB +/// - and, condition flags are defined before MI +/// - and, condition flags are not alive in successors of the CmpInstr parent +/// - and, there must be no defs of flags between MI and CmpInstr +/// - and Z or N are the only flag used after CmpInstr and corresponds to the +/// flag used in MI +/// +/// Condition code must be inverted if: +/// - MI is 'CSINCWr %vreg, wzr, wzr, ne' or 'CSINCXr %vreg, xzr, xzr, ne', +/// CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' +/// - or, MI is 'CSINCWr %vreg, wzr, wzr, eq/pl' or +/// 'CSINCXr %vreg, xzr, xzr, eq/pl', CmpInstr is 'SUBS %vreg, 1' +static bool canCmpInstrBeRemoved(MachineInstr *MI, MachineInstr *CmpInstr, + int CmpValue, const TargetRegisterInfo *TRI, + SmallVectorImpl &CCUseInstrs, + bool &IsInvertCC) { + assert(MI); + assert(CmpInstr); + assert(CmpValue == 0 || CmpValue == 1); + + unsigned MIOpc = MI->getOpcode(); + if (MIOpc == AArch64::CSINCWr) { + if (MI->getOperand(1).getReg() != AArch64::WZR || + MI->getOperand(2).getReg() != AArch64::WZR) + return false; + } else if (MIOpc == AArch64::CSINCXr) { + if (MI->getOperand(1).getReg() != AArch64::XZR || + MI->getOperand(2).getReg() != AArch64::XZR) + return false; + } else { + return false; + } + + AArch64CC::CondCode MICC = findCondCodeUsedByInstr(*MI); + if (MICC == AArch64CC::Invalid) + return false; + + // NZCV needs to be defined + if (MI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1) + return false; + + const unsigned CmpOpcode = CmpInstr->getOpcode(); + if (CmpValue && !isSUBSRegImm(CmpOpcode)) + return false; + if (!CmpValue && !isSUBSRegImm(CmpOpcode) && !isADDSRegImm(CmpOpcode)) + return false; + + UsedNZCV MIUsedNZCV = getUsedNZCV(MICC); + if (MIUsedNZCV.C || MIUsedNZCV.V) + return false; + UsedNZCV NZCVUsedAfterCmp; + if (!examineCFlagsUse(MI, CmpInstr, TRI, &NZCVUsedAfterCmp, &CCUseInstrs)) + return false; + if ((MIUsedNZCV.Z && NZCVUsedAfterCmp.N) || + (MIUsedNZCV.N && NZCVUsedAfterCmp.Z)) + return false; + if (MIUsedNZCV.N && !CmpValue) + return false; + + if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AK_Write)) + return false; + + IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) || + (!CmpValue && MICC == AArch64CC::NE); + return true; +} + +/// Remove comparision in csinc-cmp sequence +/// +/// Examples: +/// 1. \code +/// csinc w9, wzr, wzr, ne +/// cmp w9, #0 +/// b.eq +/// \endcode +/// to +/// \code +/// csinc w9, wzr, wzr, ne +/// b.ne +/// \endcode +/// +/// 2. \code +/// csinc x2, xzr, xzr, mi +/// cmp x2, #1 +/// b.pl +/// \endcode +/// to +/// \code +/// csinc x2, xzr, xzr, mi +/// b.pl +/// \endcode +/// +/// \param CmpInstr comparison instruction +/// \return True when comparison removed +bool AArch64InstrInfo::removeCmpToZeroOrOne( + MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue, + const MachineRegisterInfo *MRI) const { + assert(MRI); + MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); + if (!MI) + return false; + const TargetRegisterInfo *TRI = &getRegisterInfo(); + SmallVector CCUseInstrs; + bool IsInvertCC = false; + if (!canCmpInstrBeRemoved(MI, &CmpInstr, CmpValue, TRI, CCUseInstrs, + IsInvertCC)) + return false; + // Make transformation + CmpInstr.eraseFromParent(); + if (IsInvertCC) + // Invert condition codes in CmpInstr CC users + for (MachineInstr *CCUseInstr : CCUseInstrs) { + int Idx = findCondCodeUseOperandIdx(*CCUseInstr); + assert(Idx >= 0); + MachineOperand &CCOperand = CCUseInstr->getOperand(Idx); + AArch64CC::CondCode CCUse = AArch64CC::getInvertedCondCode( + (AArch64CC::CondCode)CCOperand.getImm()); + CCOperand.setImm(CCUse); + } + return true; +} + bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD && MI.getOpcode() != AArch64::CATCHRET) diff --git a/llvm/test/CodeGen/AArch64/csinc-cmp-removal.mir b/llvm/test/CodeGen/AArch64/csinc-cmp-removal.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/csinc-cmp-removal.mir @@ -0,0 +1,192 @@ +# RUN: llc -mtriple=aarch64 -run-pass=peephole-opt -verify-machineinstrs %s -o - | FileCheck %s +--- +name: remove_subswr_after_csincwr +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: remove_subswr_after_csincwr + ; CHECK-NOT: SUBSWri + ; CHECK: Bcc 1 + bb.0: + liveins: $x1 + successors: %bb.1(0x40000000), %bb.2(0x40000000) + %1:gpr64common = COPY $x1 + %2:gpr64 = IMPLICIT_DEF + %3:gpr64 = SUBSXrr killed %2:gpr64, %1:gpr64common, implicit-def $nzcv + %4:gpr32common = CSINCWr $wzr, $wzr, 1, implicit $nzcv + %5:gpr32 = SUBSWri killed %4:gpr32common, 1, 0, implicit-def $nzcv + Bcc 1, %bb.2, implicit $nzcv + B %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + B %bb.2 + + bb.2: + RET_ReallyLR + +... +--- +name: remove_subsxr_after_csincxr_invertcc +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: remove_subsxr_after_csincxr_invertcc + ; CHECK-NOT: SUBSXri + ; CHECK: Bcc 1 + bb.0: + liveins: $x1 + successors: %bb.1(0x40000000), %bb.2(0x40000000) + %1:gpr64common = COPY $x1 + %2:gpr64 = IMPLICIT_DEF + %3:gpr64 = SUBSXrr killed %2:gpr64, %1:gpr64common, implicit-def $nzcv + %4:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + %5:gpr64 = SUBSXri killed %4:gpr64common, 0, 0, implicit-def $nzcv + Bcc 0, %bb.2, implicit $nzcv + B %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + B %bb.2 + + bb.2: + RET_ReallyLR + +... +--- +name: cflags_alive_in_succs +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: cflags_alive_in_succs + ; CHECK: ADDSXri + ; CHECK: Bcc 0 + bb.0: + liveins: $nzcv + successors: %bb.1(0x40000000), %bb.2(0x40000000) + %1:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv + %2:gpr64 = ADDSXri killed %1:gpr64common, 0, 0, implicit-def $nzcv + Bcc 0, %bb.2, implicit $nzcv + B %bb.1 + + bb.1: + liveins: $nzcv + successors: %bb.2(0x80000000) + B %bb.2 + + bb.2: + RET_ReallyLR + +... +--- +name: remove_addswr_after_csincwr_invertcc +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: remove_addswr_after_csincwr_invertcc + ; CHECK-NOT: ADDSWri + ; CHECK: Bcc 0 + bb.0: + liveins: $nzcv + successors: %bb.1(0x40000000), %bb.2(0x40000000) + %1:gpr32common = CSINCWr $wzr, $wzr, 1, implicit $nzcv + %2:gpr32 = ADDSWri killed %1:gpr32common, 0, 0, implicit-def $nzcv + Bcc 1, %bb.2, implicit $nzcv + B %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + B %bb.2 + + bb.2: + RET_ReallyLR + +... +--- +name: cv_used_after_cmp +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: cv_used_after_cmp + ; CHECK: SUBSWri + ; CHECK: Bcc 2 + bb.0: + liveins: $nzcv + successors: %bb.1(0x40000000), %bb.2(0x40000000) + %1:gpr32common = CSINCWr $wzr, $wzr, 1, implicit $nzcv + %2:gpr32 = SUBSWri killed %1:gpr32common, 0, 0, implicit-def $nzcv + Bcc 2, %bb.2, implicit $nzcv + B %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + B %bb.2 + + bb.2: + RET_ReallyLR + +... +--- +name: csinc_add0 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: csinc_add0 + ; CHECK: ADDSWri + ; CHECK: Bcc 1 + bb.0: + liveins: $nzcv + successors: %bb.1(0x40000000), %bb.2(0x40000000) + %1:gpr32common = CSINCWr $wzr, $wzr, 4, implicit $nzcv + %2:gpr32 = ADDSWri killed %1:gpr32common, 0, 0, implicit-def $nzcv + Bcc 1, %bb.2, implicit $nzcv + B %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + B %bb.2 + + bb.2: + RET_ReallyLR + +... +--- +name: remove_subs1_after_csincN_invertcc +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: remove_subs1_after_csincN_invertcc + ; CHECK-NOT: SUBSWri + ; CHECK: Bcc 0 + bb.0: + liveins: $nzcv + successors: %bb.1(0x40000000), %bb.2(0x40000000) + %1:gpr32common = CSINCWr $wzr, $wzr, 5, implicit $nzcv + %2:gpr32 = SUBSWri killed %1:gpr32common, 1, 0, implicit-def $nzcv + Bcc 1, %bb.2, implicit $nzcv + B %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + B %bb.2 + + bb.2: + RET_ReallyLR + +... +--- +name: csinc_wrong_cc +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: csinc_wrong_cc + ; CHECK: SUBSXri + ; CHECK: Bcc 0 + bb.0: + liveins: $nzcv + successors: %bb.1(0x40000000), %bb.2(0x40000000) + %1:gpr64common = CSINCXr $xzr, $xzr, 2, implicit $nzcv + %2:gpr64 = SUBSXri killed %1:gpr64common, 1, 0, implicit-def $nzcv + Bcc 0, %bb.2, implicit $nzcv + B %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + B %bb.2 + + bb.2: + RET_ReallyLR + +... diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll --- a/llvm/test/CodeGen/AArch64/f16-instructions.ll +++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll @@ -189,8 +189,6 @@ ; CHECK-CVT-DAG: fcvt s1, h1 ; CHECK-CVT-DAG: fcvt s0, h0 ; CHECK-CVT-DAG: fcmp s2, s3 -; CHECK-CVT-DAG: cset [[CC:w[0-9]+]], ne -; CHECK-CVT-DAG: cmp [[CC]], #0 ; CHECK-CVT-NEXT: fcsel s0, s0, s1, ne ; CHECK-CVT-NEXT: fcvt h0, s0 ; CHECK-CVT-NEXT: ret @@ -228,8 +226,6 @@ ; CHECK-CVT-DAG: fcvt s0, h0 ; CHECK-CVT-DAG: fcvt s1, h1 ; CHECK-CVT-DAG: fcmp s2, s3 -; CHECK-CVT-DAG: cset w8, ne -; CHECK-CVT-NEXT: cmp w8, #0 ; CHECK-CVT-NEXT: fcsel s0, s0, s1, ne ; CHECK-CVT-NEXT: fcvt h0, s0 ; CHECK-CVT-NEXT: ret