Index: llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -174,6 +174,9 @@ bool tryOptVectorDup(MachineInstr &MI) const; bool tryOptSelect(MachineInstr &MI) const; + // Return true if MI can be represented as a CMN. + bool tryFoldCMN(MachineInstr &MI, MachineRegisterInfo &MRI) const; + const AArch64TargetMachine &TM; const AArch64Subtarget &STI; const AArch64InstrInfo &TII; @@ -1847,12 +1850,17 @@ unsigned CmpOpc = 0; unsigned ZReg = 0; + // Check if this compare can be represented as a cmn, and perform any + // necessary transformations to do so. + bool GotCMN = tryFoldCMN(I, MRI); + LLT CmpTy = MRI.getType(I.getOperand(2).getReg()); if (CmpTy == LLT::scalar(32)) { - CmpOpc = AArch64::SUBSWrr; + // ADDS is aliased to CMN. Use that for consistency with SUBS. + CmpOpc = GotCMN ? AArch64::ADDSWrr : AArch64::SUBSWrr; ZReg = AArch64::WZR; } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) { - CmpOpc = AArch64::SUBSXrr; + CmpOpc = GotCMN ? AArch64::ADDSXrr : AArch64::SUBSXrr; ZReg = AArch64::XZR; } else { return false; @@ -1860,8 +1868,24 @@ // Try to match immediate forms. auto ImmFns = selectArithImmed(I.getOperand(3)); - if (ImmFns) - CmpOpc = CmpOpc == AArch64::SUBSWrr ? AArch64::SUBSWri : AArch64::SUBSXri; + if (ImmFns) { + switch (CmpOpc) { + case AArch64::SUBSWrr: + CmpOpc = AArch64::SUBSWri; + break; + case AArch64::SUBSXrr: + CmpOpc = AArch64::SUBSXri; + break; + case AArch64::ADDSWrr: + CmpOpc = AArch64::ADDSWri; + break; + case AArch64::ADDSXrr: + CmpOpc = AArch64::ADDSXri; + break; + default: + llvm_unreachable("Unknown opcode?!"); + } + } // CSINC increments the result by one when the condition code is false. // Therefore, we have to invert the predicate to get an increment by 1 when @@ -2938,6 +2962,93 @@ return true; } +bool AArch64InstructionSelector::tryFoldCMN(MachineInstr &I, + MachineRegisterInfo &MRI) const { + assert(I.getOpcode() == TargetOpcode::G_ICMP && "Expected G_ICMP"); + // We want to find this sort of thing: + // x = G_SUB 0, y + // G_ICMP z, x + // + // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead. + // e.g: + // + // cmn z, y + + // Helper lambda to find the def. + auto FindDef = [&](unsigned VReg) { + MachineInstr *Def = MRI.getVRegDef(VReg); + while (Def) { + if (Def->getOpcode() != TargetOpcode::COPY) + break; + // Copies can be from physical registers. If we hit this, we're done. + if (TargetRegisterInfo::isPhysicalRegister(Def->getOperand(1).getReg())) + break; + Def = MRI.getVRegDef(Def->getOperand(1).getReg()); + } + return Def; + }; + + // Helper lambda to detect the subtract followed by the compare. + // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0. + auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) { + if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB) + return false; + + // Need to make sure NZCV is the same at the end of the transformation. + if (CC != AArch64CC::EQ && CC != AArch64CC::NE) + return false; + + // We want to match against SUBs. + if (DefMI->getOpcode() != TargetOpcode::G_SUB) + return false; + + // Make sure that we're getting + // x = G_SUB 0, y + auto ValAndVReg = + getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI); + if (!ValAndVReg || ValAndVReg->Value != 0) + return false; + + // This can safely be represented as a CMN. + return true; + }; + + // Check if the RHS or LHS of the G_ICMP is defined by a SUB + MachineInstr *LHSDef = FindDef(I.getOperand(2).getReg()); + MachineInstr *RHSDef = FindDef(I.getOperand(3).getReg()); + const AArch64CC::CondCode CC = changeICMPPredToAArch64CC( + (CmpInst::Predicate)I.getOperand(1).getPredicate()); + if (IsCMN(LHSDef, CC)) { + // We're doing this: + // + // Given: + // + // x = G_SUB 0, y + // G_ICMP x, z + // + // Update the G_ICMP: + // + // G_ICMP y, z + I.getOperand(2).setReg(LHSDef->getOperand(2).getReg()); + return true; + } else if (IsCMN(RHSDef, CC)) { + // Same idea here, but with the RHS of the compare instead: + // + // Given: + // + // x = G_SUB 0, y + // G_ICMP z, x + // + // Update the G_ICMP: + // + // G_ICMP z, y + I.getOperand(3).setReg(RHSDef->getOperand(2).getReg()); + return true; + } + + return false; +} + bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const { // Try to match a vector splat operation into a dup instruction. // We're looking for this pattern: Index: llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-cmn.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-cmn.mir @@ -0,0 +1,291 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +# +# Verify that we can fold G_SUB into G_ICMP when we have a pattern like this: +# +# x = G_SUB 0, y +# G_ICMP intpred(something_safe) z, x +# +# Where "something_safe" is ne or eq. +# +# Tests whose names start with cmn_ should use ADDS for the G_ICMP. Tests whose +# names start with no_cmn should use SUBS. +# + +... +--- +name: cmn_s32_rhs +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: cmn_s32_rhs + ; CHECK: liveins: $w0, $w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 0 + ; CHECK: [[MOVi32imm1:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK: $wzr = ADDSWrr [[COPY]], [[COPY1]], implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv + ; CHECK: $wzr = ANDSWri [[CSINCWr]], 0, implicit-def $nzcv + ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[MOVi32imm1]], [[MOVi32imm]], 1, implicit $nzcv + ; CHECK: $w0 = COPY [[CSELWr]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = COPY $w1 + %2:gpr(s32) = G_CONSTANT i32 0 + %6:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32) = G_SUB %2, %1 + %7:gpr(s32) = G_ICMP intpred(ne), %0(s32), %3 + %4:gpr(s1) = G_TRUNC %7(s32) + %5:gpr(s32) = G_SELECT %4(s1), %6, %2 + $w0 = COPY %5(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: cmn_s32_lhs +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: cmn_s32_lhs + ; CHECK: liveins: $w0, $w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 0 + ; CHECK: [[MOVi32imm1:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK: $wzr = ADDSWrr [[COPY]], [[COPY1]], implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv + ; CHECK: $wzr = ANDSWri [[CSINCWr]], 0, implicit-def $nzcv + ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[MOVi32imm1]], [[MOVi32imm]], 1, implicit $nzcv + ; CHECK: $w0 = COPY [[CSELWr]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = COPY $w1 + %2:gpr(s32) = G_CONSTANT i32 0 + %6:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32) = G_SUB %2, %0 + %7:gpr(s32) = G_ICMP intpred(ne), %3(s32), %1 + %4:gpr(s1) = G_TRUNC %7(s32) + %5:gpr(s32) = G_SELECT %4(s1), %6, %2 + $w0 = COPY %5(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: no_cmn_s32_rhs +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: no_cmn_s32_rhs + ; CHECK: liveins: $w0, $w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 0 + ; CHECK: [[MOVi32imm1:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[MOVi32imm]], [[COPY1]], implicit-def $nzcv + ; CHECK: $wzr = SUBSWrr [[COPY]], [[SUBSWrr]], implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv + ; CHECK: $wzr = ANDSWri [[CSINCWr]], 0, implicit-def $nzcv + ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[MOVi32imm1]], [[MOVi32imm]], 1, implicit $nzcv + ; CHECK: $w0 = COPY [[CSELWr]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = COPY $w1 + %2:gpr(s32) = G_CONSTANT i32 0 + %6:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32) = G_SUB %2, %1 + %7:gpr(s32) = G_ICMP intpred(slt), %0(s32), %3 + %4:gpr(s1) = G_TRUNC %7(s32) + %5:gpr(s32) = G_SELECT %4(s1), %6, %2 + $w0 = COPY %5(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: no_cmn_s32_lhs +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: no_cmn_s32_lhs + ; CHECK: liveins: $w0, $w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 0 + ; CHECK: [[MOVi32imm1:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[MOVi32imm]], [[COPY]], implicit-def $nzcv + ; CHECK: $wzr = SUBSWrr [[SUBSWrr]], [[COPY1]], implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv + ; CHECK: $wzr = ANDSWri [[CSINCWr]], 0, implicit-def $nzcv + ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[MOVi32imm1]], [[MOVi32imm]], 1, implicit $nzcv + ; CHECK: $w0 = COPY [[CSELWr]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = COPY $w1 + %2:gpr(s32) = G_CONSTANT i32 0 + %6:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32) = G_SUB %2, %0 + %7:gpr(s32) = G_ICMP intpred(slt), %3(s32), %1 + %4:gpr(s1) = G_TRUNC %7(s32) + %5:gpr(s32) = G_SELECT %4(s1), %6, %2 + $w0 = COPY %5(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: cmn_s64_rhs +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: cmn_s64_rhs + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK: [[MOVi64imm:%[0-9]+]]:gpr64 = MOVi64imm 0 + ; CHECK: [[MOVi64imm1:%[0-9]+]]:gpr64 = MOVi64imm 1 + ; CHECK: $xzr = ADDSXrr [[COPY]], [[COPY1]], implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv + ; CHECK: $wzr = ANDSWri [[CSINCWr]], 0, implicit-def $nzcv + ; CHECK: [[CSELXr:%[0-9]+]]:gpr64 = CSELXr [[MOVi64imm1]], [[MOVi64imm]], 1, implicit $nzcv + ; CHECK: $x0 = COPY [[CSELXr]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %2:gpr(s64) = G_CONSTANT i64 0 + %6:gpr(s64) = G_CONSTANT i64 1 + %3:gpr(s64) = G_SUB %2, %1 + %7:gpr(s32) = G_ICMP intpred(ne), %0(s64), %3 + %4:gpr(s1) = G_TRUNC %7(s32) + %5:gpr(s64) = G_SELECT %4(s1), %6, %2 + $x0 = COPY %5(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: cmn_s64_lhs +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: cmn_s64_lhs + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK: [[MOVi64imm:%[0-9]+]]:gpr64 = MOVi64imm 0 + ; CHECK: [[MOVi64imm1:%[0-9]+]]:gpr64 = MOVi64imm 1 + ; CHECK: $xzr = ADDSXrr [[COPY]], [[COPY1]], implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv + ; CHECK: $wzr = ANDSWri [[CSINCWr]], 0, implicit-def $nzcv + ; CHECK: [[CSELXr:%[0-9]+]]:gpr64 = CSELXr [[MOVi64imm1]], [[MOVi64imm]], 1, implicit $nzcv + ; CHECK: $x0 = COPY [[CSELXr]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %2:gpr(s64) = G_CONSTANT i64 0 + %6:gpr(s64) = G_CONSTANT i64 1 + %3:gpr(s64) = G_SUB %2, %0 + %7:gpr(s32) = G_ICMP intpred(ne), %3(s64), %1 + %4:gpr(s1) = G_TRUNC %7(s32) + %5:gpr(s64) = G_SELECT %4(s1), %6, %2 + $x0 = COPY %5(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: no_cmn_s64_rhs +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: no_cmn_s64_rhs + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK: [[MOVi64imm:%[0-9]+]]:gpr64 = MOVi64imm 0 + ; CHECK: [[MOVi64imm1:%[0-9]+]]:gpr64 = MOVi64imm 1 + ; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[MOVi64imm]], [[COPY1]], implicit-def $nzcv + ; CHECK: $xzr = SUBSXrr [[COPY]], [[SUBSXrr]], implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv + ; CHECK: $wzr = ANDSWri [[CSINCWr]], 0, implicit-def $nzcv + ; CHECK: [[CSELXr:%[0-9]+]]:gpr64 = CSELXr [[MOVi64imm1]], [[MOVi64imm]], 1, implicit $nzcv + ; CHECK: $x0 = COPY [[CSELXr]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %2:gpr(s64) = G_CONSTANT i64 0 + %6:gpr(s64) = G_CONSTANT i64 1 + %3:gpr(s64) = G_SUB %2, %1 + %7:gpr(s32) = G_ICMP intpred(slt), %0(s64), %3 + %4:gpr(s1) = G_TRUNC %7(s32) + %5:gpr(s64) = G_SELECT %4(s1), %6, %2 + $x0 = COPY %5(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: no_cmn_s64_lhs +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: no_cmn_s64_lhs + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK: [[MOVi64imm:%[0-9]+]]:gpr64 = MOVi64imm 0 + ; CHECK: [[MOVi64imm1:%[0-9]+]]:gpr64 = MOVi64imm 1 + ; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[MOVi64imm]], [[COPY]], implicit-def $nzcv + ; CHECK: $xzr = SUBSXrr [[SUBSXrr]], [[COPY1]], implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv + ; CHECK: $wzr = ANDSWri [[CSINCWr]], 0, implicit-def $nzcv + ; CHECK: [[CSELXr:%[0-9]+]]:gpr64 = CSELXr [[MOVi64imm1]], [[MOVi64imm]], 1, implicit $nzcv + ; CHECK: $x0 = COPY [[CSELXr]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %2:gpr(s64) = G_CONSTANT i64 0 + %6:gpr(s64) = G_CONSTANT i64 1 + %3:gpr(s64) = G_SUB %2, %0 + %7:gpr(s32) = G_ICMP intpred(slt), %3(s64), %1 + %4:gpr(s1) = G_TRUNC %7(s32) + %5:gpr(s64) = G_SELECT %4(s1), %6, %2 + $x0 = COPY %5(s64) + RET_ReallyLR implicit $x0 + +...