Index: lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.h +++ lib/Target/AArch64/AArch64InstrInfo.h @@ -204,6 +204,11 @@ void instantiateCondBranch(MachineBasicBlock &MBB, DebugLoc DL, MachineBasicBlock *TBB, ArrayRef Cond) const; + bool substituteCmpInstr(MachineInstr *CmpInstr, + unsigned SrcReg, + AArch64CC::CondCode NeededCondCode, + const MachineRegisterInfo *MRI) const; + bool optimizeCmpAndBrcSeq(MachineInstr *BrcInstr) const; }; /// emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -22,6 +22,7 @@ #include "llvm/MC/MCInst.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" +#include using namespace llvm; @@ -790,11 +791,24 @@ } } -/// True when condition code could be modified on the instruction -/// trace starting at from and ending at to. -static bool modifiesConditionCode(MachineInstr *From, MachineInstr *To, - const bool CheckOnlyCCWrites, - const TargetRegisterInfo *TRI) { +enum AccessKind { + AK_Write = 0x01, + AK_Read = 0x10, + AK_All = 0x11 +}; + +inline bool isAccess(AccessKind Access1, AccessKind Access2) { + return Access1 & Access2; +} + +/// True when condition flags are accessed (either by writing or reading) +/// on the instruction trace starting at From and ending at To. +/// +/// Note: If From and To are from different blocks it's assumed CC are accessed +/// on the path. +static bool areCFlagsAccessedBetweenInstrs(MachineInstr *From, MachineInstr *To, + const TargetRegisterInfo *TRI, + const AccessKind AccessToCheck = AK_All) { // We iterate backward starting \p To until we hit \p From MachineBasicBlock::iterator I = To, E = From, B = To->getParent()->begin(); @@ -802,36 +816,238 @@ if (I == B) return true; - // Check whether the definition of SrcReg is in the same basic block as - // Compare. If not, assume the condition code gets modified on some path. + // Check whether the instructions are in the same basic block + // If not, assume the condition flags might get modified somewhere. if (To->getParent() != From->getParent()) return true; - // Check that NZCV isn't set on the trace. + // From must be above To. + assert(std::find_if(MachineBasicBlock::reverse_iterator(To), + To->getParent()->rend(), + [From](MachineInstr &MI) { + return &MI == From; + }) != To->getParent()->rend()); + for (--I; I != E; --I) { const MachineInstr &Instr = *I; - if (Instr.modifiesRegister(AArch64::NZCV, TRI) || - (!CheckOnlyCCWrites && Instr.readsRegister(AArch64::NZCV, TRI))) - // This instruction modifies or uses NZCV after the one we want to - // change. + if ((isAccess(AccessToCheck, AK_Write) && Instr.modifiesRegister(AArch64::NZCV, TRI)) || + (isAccess(AccessToCheck, AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI))) return true; - if (I == B) - // We currently don't allow the instruction trace to cross basic - // block boundaries + } + return false; +} + +/// Check if AArch64::NZCV should be alive in successors of MBB. +static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) { + for (auto *BB : MBB->successors()) + if (BB->isLiveIn(AArch64::NZCV)) return true; + return false; +} + +/// Find the first modification of AArch64::NZCV after the specified instruction. +static MachineBasicBlock::iterator +firstModificationOfCondFlagsAfterInstr(MachineInstr *Instr, + const TargetRegisterInfo *TRI) { + assert(Instr); + MachineBasicBlock::iterator BeginInstrI = Instr; + ++BeginInstrI; + return std::find_if(BeginInstrI, Instr->getParent()->end(), + [TRI](MachineInstr &MI) { + return MI.modifiesRegister(AArch64::NZCV, TRI); + }); +} + +/// Find a condition code used by the instruction. +/// Returns AArch64CC::Invalid if either the instruction does not use condition +/// codes or we don't optimize CmpInstr in the presence of such instructions. +static AArch64CC::CondCode findCondCodeUsedByInstr(MachineInstr &Instr) { + switch (Instr.getOpcode()) { + default: + return AArch64CC::Invalid; + + case AArch64::Bcc: + { + int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); + assert(Idx >= 2); + return static_cast(Instr.getOperand(Idx - 2).getImm()); + } + + case AArch64::CSINVWr: + case AArch64::CSINVXr: + case AArch64::CSINCWr: + case AArch64::CSINCXr: + case AArch64::CSELWr: + case AArch64::CSELXr: + case AArch64::CSNEGWr: + case AArch64::CSNEGXr: + case AArch64::FCSELSrrr: + case AArch64::FCSELDrrr: + { + int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); + assert(Idx >= 1); + return static_cast(Instr.getOperand(Idx - 1).getImm()); + } + } +} + +/// Get opcode of S version of Instr. +/// If Instr is S version its opcode is returned. +/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version +/// or we are not interested in it. +static unsigned sForm(MachineInstr &Instr) { + switch (Instr.getOpcode()) { + default: + return AArch64::INSTRUCTION_LIST_END; + + case AArch64::ADDSWrr: + case AArch64::ADDSWri: + case AArch64::ADDSXrr: + case AArch64::ADDSXri: + case AArch64::SUBSWrr: + case AArch64::SUBSWri: + case AArch64::SUBSXrr: + case AArch64::SUBSXri: + return Instr.getOpcode(); + + case AArch64::ADDWrr: + return AArch64::ADDSWrr; + + case AArch64::ADDWri: + return AArch64::ADDSWri; + + case AArch64::ADDXrr: + return AArch64::ADDSXrr; + + case AArch64::ADDXri: + return AArch64::ADDSXri; + + case AArch64::ADCWr: + return AArch64::ADCSWr; + + case AArch64::ADCXr: + return AArch64::ADCSXr; + + case AArch64::SUBWrr: + return AArch64::SUBSWrr; + + case AArch64::SUBWri: + return AArch64::SUBSWri; + + case AArch64::SUBXrr: + return AArch64::SUBSXrr; + + case AArch64::SUBXri: + return AArch64::SUBSXri; + + case AArch64::SBCWr: + return AArch64::SBCSWr; + + case AArch64::SBCXr: + return AArch64::SBCSXr; + + case AArch64::ANDWri: + return AArch64::ANDSWri; + + case AArch64::ANDXri: + return AArch64::ANDSXri; + } +} + +/// Check if Instr can produce a needed condition code. +static bool canInstrProduceCondCode(MachineInstr &Instr, + AArch64CC::CondCode NeededCondCode) { + switch (NeededCondCode) { + default: + return false; + + case AArch64CC::NE: + switch (Instr.getOpcode()) { + default: + break; + + case AArch64::ADDWri: + case AArch64::ADDXri: + case AArch64::ADDSWri: + case AArch64::ADDSXri: + return Instr.getOperand(1).isFI() + && !Instr.getOperand(2).getImm(); + } + break; + } + return false; +} + +static bool isADDS(unsigned Opcode) { + return AArch64::ADDSWri <= Opcode && Opcode <= AArch64::ADDSXrx64; +} + +static bool isSUBS(unsigned Opcode) { + return AArch64::SUBSWri <= Opcode && Opcode <= AArch64::SUBSXrx64; +} + +/// Substitute CmpInstr with another instruction which produces a needed +/// condition code. +/// Return true on success. +bool AArch64InstrInfo::substituteCmpInstr(MachineInstr *CmpInstr, + unsigned SrcReg, + AArch64CC::CondCode NeededCondCode, + const MachineRegisterInfo *MRI) const { + assert(CmpInstr); + assert(MRI); + + const TargetRegisterInfo *TRI = &getRegisterInfo(); + MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); + if (!MI) + return false; + + switch (NeededCondCode) { + default: + return false; + + case AArch64CC::EQ: + case AArch64CC::NE: + case AArch64CC::MI: + case AArch64CC::PL: + { + unsigned MiSOpcode = sForm(*MI); + if ((isADDS(CmpInstr->getOpcode()) && isADDS(MiSOpcode)) + || (isSUBS(CmpInstr->getOpcode()) && isSUBS(MiSOpcode)) + || canInstrProduceCondCode(*MI, NeededCondCode)) { + MI->setDesc(get(MiSOpcode)); + CmpInstr->eraseFromParent(); + bool succeeded = UpdateOperandRegClass(MI); + (void)succeeded; + assert(succeeded && "Some operands reg class are incompatible!"); + MI->addRegisterDefined(AArch64::NZCV, TRI); + return true; + } + } } + return false; } -/// optimizeCompareInstr - Convert the instruction supplying the argument to the -/// comparison into one that sets the zero bit in the flags register. + +/// Try to optimize a compare instruction. A compare instruction is an +/// instruction which produces AArch64::NZCV. It can be truly compare instruction +/// when there are no uses of its destination register. +/// +/// The following steps are tried in order: +/// 1. Convert CmpInstr into an unconditional version. +/// 2. Remove CmpInstr if above there is an instruction producing a needed +/// condition code or an instruction which can be converted into such an instruction. +/// Only comparison with zero is supported. bool AArch64InstrInfo::optimizeCompareInstr( MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, int CmpValue, const MachineRegisterInfo *MRI) const { + assert(CmpInstr); + assert(CmpInstr->getParent()); + assert(MRI); // Replace SUBSWrr with SUBWrr if NZCV is not used. - int Cmp_NZCV = CmpInstr->findRegisterDefOperandIdx(AArch64::NZCV, true); - if (Cmp_NZCV != -1) { + int DeadNZCVIdx = CmpInstr->findRegisterDefOperandIdx(AArch64::NZCV, true); + if (DeadNZCVIdx != -1) { if (CmpInstr->definesRegister(AArch64::WZR) || CmpInstr->definesRegister(AArch64::XZR)) { CmpInstr->eraseFromParent(); @@ -843,7 +1059,7 @@ return false; const MCInstrDesc &MCID = get(NewOpc); CmpInstr->setDesc(MCID); - CmpInstr->RemoveOperand(Cmp_NZCV); + CmpInstr->RemoveOperand(DeadNZCVIdx); bool succeeded = UpdateOperandRegClass(CmpInstr); (void)succeeded; assert(succeeded && "Some operands reg class are incompatible!"); @@ -861,126 +1077,43 @@ if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg())) return false; - // Get the unique definition of SrcReg. - MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); - if (!MI) - return false; - - bool CheckOnlyCCWrites = false; const TargetRegisterInfo *TRI = &getRegisterInfo(); - if (modifiesConditionCode(MI, CmpInstr, CheckOnlyCCWrites, TRI)) - return false; - unsigned NewOpc = MI->getOpcode(); - switch (MI->getOpcode()) { - default: - return false; - case AArch64::ADDSWrr: - case AArch64::ADDSWri: - case AArch64::ADDSXrr: - case AArch64::ADDSXri: - case AArch64::SUBSWrr: - case AArch64::SUBSWri: - case AArch64::SUBSXrr: - case AArch64::SUBSXri: - break; - case AArch64::ADDWrr: NewOpc = AArch64::ADDSWrr; break; - case AArch64::ADDWri: NewOpc = AArch64::ADDSWri; break; - case AArch64::ADDXrr: NewOpc = AArch64::ADDSXrr; break; - case AArch64::ADDXri: NewOpc = AArch64::ADDSXri; break; - case AArch64::ADCWr: NewOpc = AArch64::ADCSWr; break; - case AArch64::ADCXr: NewOpc = AArch64::ADCSXr; break; - case AArch64::SUBWrr: NewOpc = AArch64::SUBSWrr; break; - case AArch64::SUBWri: NewOpc = AArch64::SUBSWri; break; - case AArch64::SUBXrr: NewOpc = AArch64::SUBSXrr; break; - case AArch64::SUBXri: NewOpc = AArch64::SUBSXri; break; - case AArch64::SBCWr: NewOpc = AArch64::SBCSWr; break; - case AArch64::SBCXr: NewOpc = AArch64::SBCSXr; break; - case AArch64::ANDWri: NewOpc = AArch64::ANDSWri; break; - case AArch64::ANDXri: NewOpc = AArch64::ANDSXri; break; - } - - // Scan forward for the use of NZCV. - // When checking against MI: if it's a conditional code requires - // checking of V bit, then this is not safe to do. - // It is safe to remove CmpInstr if NZCV is redefined or killed. - // If we are done with the basic block, we need to check whether NZCV is - // live-out. - bool IsSafe = false; - for (MachineBasicBlock::iterator I = CmpInstr, - E = CmpInstr->getParent()->end(); - !IsSafe && ++I != E;) { - const MachineInstr &Instr = *I; - for (unsigned IO = 0, EO = Instr.getNumOperands(); !IsSafe && IO != EO; - ++IO) { - const MachineOperand &MO = Instr.getOperand(IO); - if (MO.isRegMask() && MO.clobbersPhysReg(AArch64::NZCV)) { - IsSafe = true; - break; - } - if (!MO.isReg() || MO.getReg() != AArch64::NZCV) - continue; - if (MO.isDef()) { - IsSafe = true; - break; - } + AArch64CC::CondCode NeededCondCode = AArch64CC::Invalid; + MachineBasicBlock::iterator CFlagsFirstModificationLoc + = firstModificationOfCondFlagsAfterInstr(CmpInstr, TRI); + for (MachineBasicBlock::iterator I = CmpInstr; + ++I != CFlagsFirstModificationLoc;) { + MachineInstr &MI = *I; + if (!MI.readsRegister(AArch64::NZCV, TRI)) + continue; - // Decode the condition code. - unsigned Opc = Instr.getOpcode(); - AArch64CC::CondCode CC; - switch (Opc) { - default: - return false; - case AArch64::Bcc: - CC = (AArch64CC::CondCode)Instr.getOperand(IO - 2).getImm(); - break; - case AArch64::CSINVWr: - case AArch64::CSINVXr: - case AArch64::CSINCWr: - case AArch64::CSINCXr: - case AArch64::CSELWr: - case AArch64::CSELXr: - case AArch64::CSNEGWr: - case AArch64::CSNEGXr: - case AArch64::FCSELSrrr: - case AArch64::FCSELDrrr: - CC = (AArch64CC::CondCode)Instr.getOperand(IO - 1).getImm(); - break; - } + AArch64CC::CondCode CC = findCondCodeUsedByInstr(MI); + if (CC == AArch64CC::Invalid) + return false; - // It is not safe to remove Compare instruction if Overflow(V) is used. - switch (CC) { - default: - // NZCV can be used multiple times, we should continue. - break; - case AArch64CC::VS: - case AArch64CC::VC: - case AArch64CC::GE: - case AArch64CC::LT: - case AArch64CC::GT: - case AArch64CC::LE: - return false; - } + if (NeededCondCode == AArch64CC::Invalid) + NeededCondCode = CC; + else if (CC != NeededCondCode) { + // Different condition codes are used it might not be safe to remove CmpInstr. + return false; } } - // If NZCV is not killed nor re-defined, we should check whether it is - // live-out. If it is live-out, do not optimize. - if (!IsSafe) { - MachineBasicBlock *ParentBlock = CmpInstr->getParent(); - for (auto *MBB : ParentBlock->successors()) - if (MBB->isLiveIn(AArch64::NZCV)) - return false; + // There are instructions which read and write flags at the same time. + if (NeededCondCode == AArch64CC::Invalid + && CFlagsFirstModificationLoc != CmpInstr->getParent()->end()) { + NeededCondCode = findCondCodeUsedByInstr(*CFlagsFirstModificationLoc); + if (NeededCondCode == AArch64CC::Invalid) + return false; } - // Update the instruction to set NZCV. - MI->setDesc(get(NewOpc)); - CmpInstr->eraseFromParent(); - bool succeeded = UpdateOperandRegClass(MI); - (void)succeeded; - assert(succeeded && "Some operands reg class are incompatible!"); - MI->addRegisterDefined(AArch64::NZCV, TRI); - return true; + // Uses are not in the parent of CmpInstr. + if (NeededCondCode == AArch64CC::Invalid + || areCFlagsAliveInSuccessors(CmpInstr->getParent())) + return false; + + return substituteCmpInstr(CmpInstr, SrcReg, NeededCondCode, MRI); } bool @@ -3066,6 +3199,95 @@ return; } +static bool isCmpWithZero(MachineInstr *CmpInstr) { + assert(CmpInstr); + return (CmpInstr->getOpcode() == AArch64::SUBSWri + || CmpInstr->getOpcode() == AArch64::SUBSXri) + && !CmpInstr->getOperand(2).getImm(); +} + +/// Optimize Cmp and Brc sequences. +/// When we compare with zero a result is known. We can figure out +/// which path is taken by Brc. We substitute Brc by an unconditional +/// branch to that path. +/// +/// Rules: +/// SUBS reg, 0; B.LO => B +/// SUBS reg, 0; B.HS