Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -929,91 +929,180 @@ return false; } -/// Substitute CmpInstr with another instruction which produces a needed -/// condition code. +struct UsedNZCV { + bool N; + bool Z; + bool C; + bool V; + UsedNZCV(): N(false), Z(false), C(false), V(false) {} + UsedNZCV& operator |=(const UsedNZCV& UsedFlags) { + this->N |= UsedFlags.N; + this->Z |= UsedFlags.Z; + this->C |= UsedFlags.C; + this->V |= UsedFlags.V; + return *this; + } +}; + +/// Find a condition code used by the instruction. +/// Returns AArch64CC::Invalid if either the instruction does not use condition +/// codes or we don't optimize CmpInstr in the presence of such instructions. +static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) { + switch (Instr.getOpcode()) { + default: + return AArch64CC::Invalid; + + case AArch64::Bcc: { + int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); + assert(Idx >= 2); + return static_cast(Instr.getOperand(Idx - 2).getImm()); + } + + case AArch64::CSINVWr: + case AArch64::CSINVXr: + case AArch64::CSINCWr: + case AArch64::CSINCXr: + case AArch64::CSELWr: + case AArch64::CSELXr: + case AArch64::CSNEGWr: + case AArch64::CSNEGXr: + case AArch64::FCSELSrrr: + case AArch64::FCSELDrrr: { + int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); + assert(Idx >= 1); + return static_cast(Instr.getOperand(Idx - 1).getImm()); + } + } +} + +static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) { + assert(CC != AArch64CC::Invalid); + UsedNZCV NZCV; + switch (CC) { + default: + break; + + case AArch64CC::EQ: + case AArch64CC::NE: + NZCV.Z = true; + break; + + case AArch64CC::HI: + case AArch64CC::LS: + NZCV.Z = true; + case AArch64CC::HS: + case AArch64CC::LO: + NZCV.C = true; + break; + + case AArch64CC::MI: + case AArch64CC::PL: + NZCV.N = true; + break; + + case AArch64CC::VS: + case AArch64CC::VC: + NZCV.V = true; + + case AArch64CC::GT: + case AArch64CC::LE: + NZCV.Z = true; + case AArch64CC::GE: + case AArch64CC::LT: + NZCV.N = true; + NZCV.V = true; + break; + } + return NZCV; +} + +static bool isADDSRegImm(unsigned Opcode) { + return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri; +} + +static bool isSUBSRegImm(unsigned Opcode) { + return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri; +} + +// Check if CmpInstr can be substituted by MI. +// +// +// CmpInstr can be substituted: +// - CmpInstr is either ADDS or SUBS +// - and, MI and CmpInstr are from the same MachineBB +// - and, condition flags are not alive in successors of the CmpInstr parent +// - and, no writes to NZCV between MI and CmpInstr +// - and, C/V flags are not used between MI and CmpInstr +// - and C/V flags are not used after CmpInstr +static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr, + const TargetRegisterInfo *TRI) { + assert(MI); + assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END); + assert(CmpInstr); + + const unsigned CmpOpcode = CmpInstr->getOpcode(); + if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode)) + return false; + + if (MI->getParent() != CmpInstr->getParent()) + return false; + + if (areCFlagsAliveInSuccessors(CmpInstr->getParent())) + return false; + + UsedNZCV NZCVUsedBetweenMIAndCmp; + for (auto I = std::next(MI->getIterator()), E = CmpInstr->getIterator(); + I != E; ++I) { + const MachineInstr &Instr = *I; + if (Instr.modifiesRegister(AArch64::NZCV, TRI)) + return false; + if (!Instr.readsRegister(AArch64::NZCV, TRI)) + continue; + + AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr); + if (CC == AArch64CC::Invalid) // Unsupported conditional instruction + return false; + + NZCVUsedBetweenMIAndCmp |= getUsedNZCV(CC); + } + UsedNZCV NZCVUsedAfterCmp; + for (auto I = std::next(CmpInstr->getIterator()), E = CmpInstr->getParent()->instr_end(); + I != E; ++I) { + const MachineInstr &Instr = *I; + if (Instr.readsRegister(AArch64::NZCV, TRI)) { + AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr); + if (CC == AArch64CC::Invalid) // Unsupported conditional instruction + return false; + NZCVUsedAfterCmp |= getUsedNZCV(CC); + } + + if (Instr.modifiesRegister(AArch64::NZCV, TRI)) + break; + } + + return !NZCVUsedBetweenMIAndCmp.C && !NZCVUsedBetweenMIAndCmp.V + && !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V; +} + +/// Substitute CmpInstr with another instruction which produces needed +/// condition flags. /// Return true on success. bool AArch64InstrInfo::substituteCmpInstr(MachineInstr *CmpInstr, unsigned SrcReg, const MachineRegisterInfo *MRI) const { + assert(CmpInstr); + assert(MRI); // Get the unique definition of SrcReg. MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); if (!MI) return false; const TargetRegisterInfo *TRI = &getRegisterInfo(); - if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI)) - return false; unsigned NewOpc = sForm(*MI); if (NewOpc == AArch64::INSTRUCTION_LIST_END) return false; - // Scan forward for the use of NZCV. - // When checking against MI: if it's a conditional code requires - // checking of V bit, then this is not safe to do. - // It is safe to remove CmpInstr if NZCV is redefined or killed. - // If we are done with the basic block, we need to check whether NZCV is - // live-out. - bool IsSafe = false; - for (MachineBasicBlock::iterator I = CmpInstr, - E = CmpInstr->getParent()->end(); - !IsSafe && ++I != E;) { - const MachineInstr &Instr = *I; - for (unsigned IO = 0, EO = Instr.getNumOperands(); !IsSafe && IO != EO; - ++IO) { - const MachineOperand &MO = Instr.getOperand(IO); - if (MO.isRegMask() && MO.clobbersPhysReg(AArch64::NZCV)) { - IsSafe = true; - break; - } - if (!MO.isReg() || MO.getReg() != AArch64::NZCV) - continue; - if (MO.isDef()) { - IsSafe = true; - break; - } - - // Decode the condition code. - unsigned Opc = Instr.getOpcode(); - AArch64CC::CondCode CC; - switch (Opc) { - default: - return false; - case AArch64::Bcc: - CC = (AArch64CC::CondCode)Instr.getOperand(IO - 2).getImm(); - break; - case AArch64::CSINVWr: - case AArch64::CSINVXr: - case AArch64::CSINCWr: - case AArch64::CSINCXr: - case AArch64::CSELWr: - case AArch64::CSELXr: - case AArch64::CSNEGWr: - case AArch64::CSNEGXr: - case AArch64::FCSELSrrr: - case AArch64::FCSELDrrr: - CC = (AArch64CC::CondCode)Instr.getOperand(IO - 1).getImm(); - break; - } - - // It is not safe to remove Compare instruction if Overflow(V) is used. - switch (CC) { - default: - // NZCV can be used multiple times, we should continue. - break; - case AArch64CC::VS: - case AArch64CC::VC: - case AArch64CC::GE: - case AArch64CC::LT: - case AArch64CC::GT: - case AArch64CC::LE: - return false; - } - } - } - - // If NZCV is not killed nor re-defined, we should check whether it is - // live-out. If it is live-out, do not optimize. - if (!IsSafe && areCFlagsAliveInSuccessors(CmpInstr->getParent())) + if (!canInstrSubstituteCmpInstr(MI, CmpInstr, TRI)) return false; // Update the instruction to set NZCV. Index: test/CodeGen/AArch64/arm64-regress-opt-cmp.mir =================================================================== --- /dev/null +++ test/CodeGen/AArch64/arm64-regress-opt-cmp.mir @@ -0,0 +1,113 @@ +# RUN: llc -mtriple=aarch64-linux-gnu -run-pass peephole-opts %s 2>&1 | FileCheck %s +# CHECK: %8 = LSLVWr {{.*}} +# CHECK-NEXT: %9 = ANDWri {{.*}} +# CHECK-NEXT: %10 = SUBSWri {{.*}} +--- | + ; ModuleID = 'arm64-regress-opt-cmp.ll' + source_filename = "arm64-regress-opt-cmp.ll" + target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" + target triple = "aarch64--linux-gnu" + + @d = internal global [4 x i8] c"\01\00\00\00", align 1 + @c = internal global i8 2, align 1 + + declare void @a(i32) + + ; Function Attrs: nounwind + define i32 @test01() #0 { + entry: + %0 = load i8, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @d, i64 0, i64 3), align 1 + store i8 %0, i8* @c, align 1 + %1 = load i8, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @d, i64 0, i64 1), align 1 + %conv = zext i8 %1 to i32 + %2 = load i8, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @d, i64 0, i64 2), align 1 + %conv1 = zext i8 %2 to i32 + %shl = shl i32 %conv, %conv1 + %conv3 = and i32 %shl, 65535 + %cmp = icmp ult i32 %conv3, zext (i1 icmp eq (i8* getelementptr inbounds ([4 x i8], [4 x i8]* @d, i64 0, i64 3), i8* @c) to i32) + br i1 %cmp, label %if.end, label %if.then + + if.then: ; preds = %entry + call void @a(i32 0) + br label %if.end + + if.end: ; preds = %if.then, %entry + ret i32 0 + } + + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #0 + + attributes #0 = { nounwind } + +... +--- +name: test01 +alignment: 2 +exposesReturnsTwice: false +hasInlineAsm: false +allVRegsAllocated: false +isSSA: true +tracksRegLiveness: true +tracksSubRegLiveness: false +registers: + - { id: 0, class: gpr64common } + - { id: 1, class: gpr64common } + - { id: 2, class: gpr32 } + - { id: 3, class: gpr64common } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } + - { id: 6, class: gpr64all } + - { id: 7, class: gpr32 } + - { id: 8, class: gpr32 } + - { id: 9, class: gpr32common } + - { id: 10, class: gpr32 } + - { id: 11, class: gpr32all } + - { id: 12, class: gpr32all } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: true + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + successors: %bb.2.if.end, %bb.1.if.then + + %0 = MOVaddr target-flags(aarch64-page) @d, target-flags(aarch64-pageoff, aarch64-nc) @d + early-clobber %1, %2 = LDRBBpre %0, 3 + %3 = MOVaddr target-flags(aarch64-page) @c, target-flags(aarch64-pageoff, aarch64-nc) @c + STRBBui killed %2, %3, 0 :: (store 1 into @c) + %4 = LDURBBi %1, -2 :: (load 1 from `i8* getelementptr inbounds ([4 x i8], [4 x i8]* @d, i64 0, i64 1)`) + %5 = LDURBBi %1, -1 :: (load 1 from `i8* getelementptr inbounds ([4 x i8], [4 x i8]* @d, i64 0, i64 2)`) + %6 = SUBREG_TO_REG 0, killed %5, 15 + %7 = COPY %6:sub_32 + %8 = LSLVWr killed %4, killed %7 + %9 = ANDWri killed %8, 15 + %10 = SUBSWri killed %9, 0, 0, implicit-def %nzcv + Bcc 3, %bb.2.if.end, implicit %nzcv + B %bb.1.if.then + + bb.1.if.then: + successors: %bb.2.if.end + + ADJCALLSTACKDOWN 0, implicit-def dead %sp, implicit %sp + %11 = COPY %wzr + %w0 = COPY %11 + BL @a, csr_aarch64_aapcs, implicit-def dead %lr, implicit %sp, implicit %w0, implicit-def %sp + ADJCALLSTACKUP 0, 0, implicit-def dead %sp, implicit %sp + + bb.2.if.end: + %12 = COPY %wzr + %w0 = COPY %12 + RET_ReallyLR implicit %w0 + +... Index: test/CodeGen/AArch64/subs-to-sub-opt.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/subs-to-sub-opt.ll @@ -0,0 +1,23 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -O3 -o - %s | FileCheck %s + +@a = external global i8, align 1 +@b = external global i8, align 1 + +; Test that SUBS is replaced by SUB if condition flags are not used. +define i32 @test01() nounwind { +; CHECK: ldrb {{.*}} +; CHECK-NEXT: ldrb {{.*}} +; CHECK-NEXT: sub {{.*}} +; CHECK-NEXT: cmn {{.*}} +entry: + %0 = load i8, i8* @a, align 1 + %conv = zext i8 %0 to i32 + %1 = load i8, i8* @b, align 1 + %conv1 = zext i8 %1 to i32 + %s = sub nsw i32 %conv1, %conv + %cmp0 = icmp eq i32 %s, -1 + %cmp1 = sext i1 %cmp0 to i8 + store i8 %cmp1, i8* @a + ret i32 0 +} +