Index: llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -378,8 +378,23 @@ // Optimization methods. bool tryOptSelect(MachineInstr &MI) const; + + /// Helper function for comparison selection. + /// \returns true if \p MI, as an operand of a comparison with condition code + /// \p CC is a CMN. + bool isCMN(MachineInstr *MI, const AArch64CC::CondCode &CC, + const MachineRegisterInfo &MRI) const; + + /// \returns the MachineOperands and CmpInst::Predicate which should be used + /// to emit a compare. If swapping \p LHS and \p RHS would introduce + /// profitable folding opportunities, swap them and return an updated + /// predicate. + std::tuple + trySwapCmpLHSAndRHS(MachineOperand &LHS, MachineOperand &RHS, + CmpInst::Predicate P, + const MachineRegisterInfo &MRI) const; MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, - MachineOperand &Predicate, + CmpInst::Predicate P, MachineIRBuilder &MIRBuilder) const; MachineInstr *tryOptArithImmedIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, @@ -441,6 +456,14 @@ { } +/// \returns true if \p C is a legal arithmetic immediate. +static bool isLegalArithImmed(uint64_t C) { + bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0); + LLVM_DEBUG(dbgs() << "Is imm " << C + << " legal: " << (IsLegal ? "yes\n" : "no\n")); + return IsLegal; +} + // FIXME: This should be target-independent, inferred from the types declared // for each class in the bank. static const TargetRegisterClass * @@ -3876,6 +3899,93 @@ return emitInstr(OpcTable[2][Is32Bit], {ZReg}, {LHS, RHS}, MIRBuilder); } +/// \returns how profitable it is to fold a comparison's operand's shift and/or +/// extension operations. This refers to how many instructions may be folded +/// if \p Reg is made the RHS of a compare. +static unsigned getCmpOperandFoldingProfit(Register Reg, + const MachineRegisterInfo &MRI) { + if (!MRI.hasOneNonDBGUse(Reg)) + return 0; + + auto IsSupportedExtend = [&](const MachineInstr &MI) { + if (MI.getOpcode() == TargetOpcode::G_SEXT_INREG) + return true; + if (MI.getOpcode() != TargetOpcode::G_AND) + return false; + auto ValAndVReg = + getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); + if (!ValAndVReg) + return false; + uint64_t Mask = ValAndVReg->Value; + return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF); + }; + + MachineInstr *Def = getDefIgnoringCopies(Reg, MRI); + if (IsSupportedExtend(*Def)) + return 1; + + unsigned Opc = Def->getOpcode(); + if (Opc != TargetOpcode::G_SHL && Opc != TargetOpcode::G_ASHR && + Opc != TargetOpcode::G_LSHR) + return 0; + + auto ValAndVReg = + getConstantVRegValWithLookThrough(Def->getOperand(2).getReg(), MRI); + if (!ValAndVReg) + return 0; + uint64_t Shift = ValAndVReg->Value; + MachineInstr *ShiftLHS = + getDefIgnoringCopies(Def->getOperand(1).getReg(), MRI); + if (IsSupportedExtend(*ShiftLHS)) + return (Shift <= 4) ? 2 : 1; + LLT Ty = MRI.getType(Def->getOperand(0).getReg()); + if (Ty.isVector()) + return 0; + unsigned ShiftSize = Ty.getSizeInBits(); + if ((ShiftSize == 32 && Shift <= 31) || (ShiftSize == 64 && Shift <= 63)) + return 1; + return 0; +} + +std::tuple +AArch64InstructionSelector::trySwapCmpLHSAndRHS( + MachineOperand &LHS, MachineOperand &RHS, CmpInst::Predicate P, + const MachineRegisterInfo &MRI) const { + // Swap the operands if it would introduce a profitable folding opportunity. + // (e.g. a shift + extend). + // + // For example: + // lsl w13, w11, #1 + // cmp w13, w12 + // can be turned into: + // cmp w12, w11, lsl #1 + + // Don't swap if there's a constant on the RHS, because we know we can fold + // that. + Register RHSReg = RHS.getReg(); + auto RHSCst = getConstantVRegValWithLookThrough(RHSReg, MRI); + if (RHSCst && isLegalArithImmed(RHSCst->Value)) + return {LHS, RHS, P}; + auto CC = changeICMPPredToAArch64CC(P); + auto GetRegForProfit = [&](Register Reg) { + MachineInstr *Def = getDefIgnoringCopies(Reg, MRI); + return isCMN(Def, CC, MRI) ? Def->getOperand(2).getReg() : Reg; + }; + + // Don't have a constant on the RHS. Would swapping the LHS and RHS introduce + // an opportunity to fold a constant? + Register TheLHS = GetRegForProfit(LHS.getReg()); + Register TheRHS = GetRegForProfit(RHS.getReg()); + + // If the LHS is more likely to give us a folding opportunity, then swap the + // LHS and RHS. + if (getCmpOperandFoldingProfit(TheLHS, MRI) > + getCmpOperandFoldingProfit(TheRHS, MRI)) + return {RHS, LHS, CmpInst::getSwappedPredicate(P)}; + + return {LHS, RHS, P}; +} + std::pair AArch64InstructionSelector::emitIntegerCompare( MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, @@ -3884,11 +3994,15 @@ assert(Predicate.isPredicate() && "Expected predicate?"); MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); - CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate(); + auto MaybeSwappedCmp = trySwapCmpLHSAndRHS( + LHS, RHS, static_cast(Predicate.getPredicate()), MRI); + MachineOperand &CmpLHS = std::get<0>(MaybeSwappedCmp); + MachineOperand &CmpRHS = std::get<1>(MaybeSwappedCmp); + CmpInst::Predicate P = std::get<2>(MaybeSwappedCmp); // Fold the compare if possible. MachineInstr *FoldCmp = - tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder); + tryFoldIntegerCompare(CmpLHS, CmpRHS, P, MIRBuilder); if (FoldCmp) return {FoldCmp, P}; @@ -3896,7 +4010,7 @@ unsigned CmpOpc = 0; Register ZReg; - LLT CmpTy = MRI.getType(LHS.getReg()); + LLT CmpTy = MRI.getType(CmpLHS.getReg()); assert((CmpTy.isScalar() || CmpTy.isPointer()) && "Expected scalar or pointer"); if (CmpTy == LLT::scalar(32)) { @@ -3911,18 +4025,19 @@ // Try to match immediate forms. MachineInstr *ImmedCmp = - tryOptArithImmedIntegerCompare(LHS, RHS, P, MIRBuilder); + tryOptArithImmedIntegerCompare(CmpLHS, CmpRHS, P, MIRBuilder); if (ImmedCmp) return {ImmedCmp, P}; // If we don't have an immediate, we may have a shift which can be folded // into the compare. - MachineInstr *ShiftedCmp = tryOptArithShiftedCompare(LHS, RHS, MIRBuilder); + MachineInstr *ShiftedCmp = + tryOptArithShiftedCompare(CmpLHS, CmpRHS, MIRBuilder); if (ShiftedCmp) return {ShiftedCmp, P}; auto CmpMI = - MIRBuilder.buildInstr(CmpOpc, {ZReg}, {LHS.getReg(), RHS.getReg()}); + MIRBuilder.buildInstr(CmpOpc, {ZReg}, {CmpLHS.getReg(), CmpRHS.getReg()}); // Make sure that we can constrain the compare that we emitted. constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); return {&*CmpMI, P}; @@ -4203,11 +4318,25 @@ return true; } +bool AArch64InstructionSelector::isCMN(MachineInstr *MI, + const AArch64CC::CondCode &CC, + const MachineRegisterInfo &MRI) const { + if (!MI || MI->getOpcode() != TargetOpcode::G_SUB) + return false; + // Need to make sure NZCV is the same at the end of the transformation. + if (CC != AArch64CC::EQ && CC != AArch64CC::NE) + return false; + + // Match: x = G_SUB 0, y + auto ValAndVReg = + getConstantVRegValWithLookThrough(MI->getOperand(1).getReg(), MRI); + return ValAndVReg && ValAndVReg->Value == 0; +} + MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare( - MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, + MachineOperand &LHS, MachineOperand &RHS, CmpInst::Predicate P, MachineIRBuilder &MIRBuilder) const { - assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && - "Unexpected MachineOperand"); + assert(LHS.isReg() && RHS.isReg() && "Unexpected MachineOperand"); MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); // We want to find this sort of thing: // x = G_SUB 0, y @@ -4218,35 +4347,9 @@ // // cmn z, y - // Helper lambda to detect the subtract followed by the compare. - // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0. - auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) { - if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB) - return false; - - // Need to make sure NZCV is the same at the end of the transformation. - if (CC != AArch64CC::EQ && CC != AArch64CC::NE) - return false; - - // We want to match against SUBs. - if (DefMI->getOpcode() != TargetOpcode::G_SUB) - return false; - - // Make sure that we're getting - // x = G_SUB 0, y - auto ValAndVReg = - getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI); - if (!ValAndVReg || ValAndVReg->Value != 0) - return false; - - // This can safely be represented as a CMN. - return true; - }; - // Check if the RHS or LHS of the G_ICMP is defined by a SUB MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI); MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI); - CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate(); const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P); // Given this: @@ -4257,7 +4360,7 @@ // Produce this: // // cmn y, z - if (IsCMN(LHSDef, CC)) + if (isCMN(LHSDef, CC, MRI)) return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder); // Same idea here, but with the RHS of the compare instead: @@ -4270,7 +4373,7 @@ // Produce this: // // cmn z, y - if (IsCMN(RHSDef, CC)) + if (isCMN(RHSDef, CC, MRI)) return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder); // Given this: Index: llvm/test/CodeGen/AArch64/GlobalISel/select-swap-compare-operands.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/select-swap-compare-operands.mir @@ -0,0 +1,571 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +# +# Check that we swap the order of operands on comparisons when it is likely +# to introduce a folding opportunity. +# +# The condition code for the compare should be changed when appropriate. + +... +--- +name: swap_sextinreg_lhs +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; CHECK-LABEL: name: swap_sextinreg_lhs + ; CHECK: liveins: $x0, $x1 + ; CHECK: %reg:gpr64 = COPY $x0 + ; CHECK: %cmp_lhs:gpr64 = SBFMXri %reg, 0, 0 + ; CHECK: %cmp_rhs:gpr64 = COPY $x1 + ; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_rhs, %cmp_lhs, implicit-def $nzcv + ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv + ; CHECK: $w0 = COPY %cmp + ; CHECK: RET_ReallyLR implicit $w0 + %reg:gpr(s64) = COPY $x0 + %cmp_lhs:gpr(s64) = G_SEXT_INREG %reg, 1 + %cmp_rhs:gpr(s64) = COPY $x1 + %cmp:gpr(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: dont_swap_more_than_one_use +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + + ; The LHS of the compare is used in an add, and a second compare. Don't + ; swap, since we don't gain any folding opportunities here. + + ; CHECK-LABEL: name: dont_swap_more_than_one_use + ; CHECK: liveins: $x0, $x1 + ; CHECK: %reg0:gpr64 = COPY $x0 + ; CHECK: %cmp_lhs:gpr64 = SBFMXri %reg0, 0, 0 + ; CHECK: %add:gpr64 = ADDXrr %cmp_lhs, %reg0 + ; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_lhs, %add, implicit-def $nzcv + ; CHECK: %cmp2:gpr32 = CSINCWr $wzr, $wzr, 11, implicit $nzcv + ; CHECK: $w0 = COPY %cmp2 + ; CHECK: RET_ReallyLR implicit $w0 + %reg0:gpr(s64) = COPY $x0 + %cmp_lhs:gpr(s64) = G_SEXT_INREG %reg0, 1 + %reg1:gpr(s64) = COPY $x1 + %cmp1:gpr(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %reg1 + + %add:gpr(s64) = G_ADD %cmp_lhs(s64), %reg0 + %cmp2:gpr(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %add + + $w0 = COPY %cmp2(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: dont_swap_legal_arith_immed_on_rhs +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; Arithmetic immediates can be folded into compares. If we have one, then + ; don't bother changing anything. + + ; CHECK-LABEL: name: dont_swap_legal_arith_immed_on_rhs + ; CHECK: liveins: $x0, $x1 + ; CHECK: %reg:gpr64 = COPY $x0 + ; CHECK: %cmp_lhs:gpr64common = SBFMXri %reg, 0, 0 + ; CHECK: $xzr = SUBSXri %cmp_lhs, 12, 0, implicit-def $nzcv + ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 11, implicit $nzcv + ; CHECK: $w0 = COPY %cmp + ; CHECK: RET_ReallyLR implicit $w0 + %reg:gpr(s64) = COPY $x0 + %cmp_lhs:gpr(s64) = G_SEXT_INREG %reg, 1 + %cmp_rhs:gpr(s64) = G_CONSTANT i64 12 + %cmp:gpr(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: swap_non_arith_immed_on_rhs +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; If we have a non-arithmetic immediate on the rhs, then we can swap to get + ; a guaranteed folding opportunity. + + ; CHECK-LABEL: name: swap_non_arith_immed_on_rhs + ; CHECK: liveins: $x0, $x1 + ; CHECK: %reg:gpr64 = COPY $x0 + ; CHECK: %cmp_lhs:gpr64 = SBFMXri %reg, 0, 0 + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1234567 + ; CHECK: %cmp_rhs:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32 + ; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_rhs, %cmp_lhs, implicit-def $nzcv + ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv + ; CHECK: $w0 = COPY %cmp + ; CHECK: RET_ReallyLR implicit $w0 + %reg:gpr(s64) = COPY $x0 + %cmp_lhs:gpr(s64) = G_SEXT_INREG %reg, 1 + %cmp_rhs:gpr(s64) = G_CONSTANT i64 1234567 + %cmp:gpr(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: swap_and_lhs_0xFF +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; CHECK-LABEL: name: swap_and_lhs_0xFF + ; CHECK: liveins: $x0, $x1 + ; CHECK: %cmp_rhs:gpr64 = COPY $x1 + ; CHECK: %and_lhs:gpr64 = COPY $x0 + ; CHECK: %cmp_lhs:gpr64common = ANDXri %and_lhs, 4103 + ; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_rhs, %cmp_lhs, implicit-def $nzcv + ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv + ; CHECK: $w0 = COPY %cmp + ; CHECK: RET_ReallyLR implicit $w0 + %cmp_rhs:gpr(s64) = COPY $x1 + + %and_lhs:gpr(s64) = COPY $x0 + %cst:gpr(s64) = G_CONSTANT i64 255 + %cmp_lhs:gpr(s64) = G_AND %and_lhs, %cst(s64) + + %cmp:gpr(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: swap_and_lhs_0xFFFF +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; CHECK-LABEL: name: swap_and_lhs_0xFFFF + ; CHECK: liveins: $x0, $x1 + ; CHECK: %cmp_rhs:gpr64 = COPY $x1 + ; CHECK: %and_lhs:gpr64 = COPY $x0 + ; CHECK: %cmp_lhs:gpr64common = ANDXri %and_lhs, 4111 + ; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_rhs, %cmp_lhs, implicit-def $nzcv + ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv + ; CHECK: $w0 = COPY %cmp + ; CHECK: RET_ReallyLR implicit $w0 + %cmp_rhs:gpr(s64) = COPY $x1 + + %cst:gpr(s64) = G_CONSTANT i64 65535 + %and_lhs:gpr(s64) = COPY $x0 + %cmp_lhs:gpr(s64) = G_AND %and_lhs, %cst(s64) + + %cmp:gpr(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: swap_and_lhs_0xFFFFFFFF +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; CHECK-LABEL: name: swap_and_lhs_0xFFFFFFFF + ; CHECK: liveins: $x0, $x1 + ; CHECK: %cmp_rhs:gpr64 = COPY $x1 + ; CHECK: %and_lhs:gpr64 = COPY $x0 + ; CHECK: %cmp_lhs:gpr64common = ANDXri %and_lhs, 4127 + ; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_rhs, %cmp_lhs, implicit-def $nzcv + ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv + ; CHECK: $w0 = COPY %cmp + ; CHECK: RET_ReallyLR implicit $w0 + %cmp_rhs:gpr(s64) = COPY $x1 + + %and_lhs:gpr(s64) = COPY $x0 + %cst:gpr(s64) = G_CONSTANT i64 4294967295 + %cmp_lhs:gpr(s64) = G_AND %and_lhs, %cst(s64) + + %cmp:gpr(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: dont_swap_and_lhs_wrong_mask +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; 7 isn't an extend mask for G_AND, so there's no folding opportunities + ; here. + ; + ; CHECK-LABEL: name: dont_swap_and_lhs_wrong_mask + ; CHECK: liveins: $x0, $x1 + ; CHECK: %cmp_rhs:gpr64 = COPY $x1 + ; CHECK: %and_lhs:gpr64 = COPY $x0 + ; CHECK: %cmp_lhs:gpr64common = ANDXri %and_lhs, 4098 + ; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv + ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 11, implicit $nzcv + ; CHECK: $w0 = COPY %cmp + ; CHECK: RET_ReallyLR implicit $w0 + %cmp_rhs:gpr(s64) = COPY $x1 + + %and_lhs:gpr(s64) = COPY $x0 + %not_an_extend_mask:gpr(s64) = G_CONSTANT i64 7 + %cmp_lhs:gpr(s64) = G_AND %and_lhs, %not_an_extend_mask(s64) + + %cmp:gpr(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: swap_shl_lhs +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: swap_shl_lhs + ; CHECK: liveins: $x0, $x1 + ; CHECK: %cmp_rhs:gpr64 = COPY $x1 + ; CHECK: %shl_lhs:gpr64 = COPY $x0 + ; CHECK: $xzr = SUBSXrs %cmp_rhs, %shl_lhs, 1, implicit-def $nzcv + ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv + ; CHECK: $w0 = COPY %cmp + ; CHECK: RET_ReallyLR implicit $w0 + %cmp_rhs:gpr(s64) = COPY $x1 + + %shl_lhs:gpr(s64) = COPY $x0 + %cst:gpr(s64) = G_CONSTANT i64 1 + %cmp_lhs:gpr(s64) = G_SHL %shl_lhs, %cst(s64) + + %cmp:gpr(s32) = G_ICMP intpred(slt), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: swap_ashr_lhs +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: swap_ashr_lhs + ; CHECK: liveins: $x0, $x1 + ; CHECK: %cmp_rhs:gpr64 = COPY $x1 + ; CHECK: %ashr_lhs:gpr64 = COPY $x0 + ; CHECK: $xzr = SUBSXrs %cmp_rhs, %ashr_lhs, 129, implicit-def $nzcv + ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv + ; CHECK: $w0 = COPY %cmp + ; CHECK: RET_ReallyLR implicit $w0 + %cmp_rhs:gpr(s64) = COPY $x1 + + %ashr_lhs:gpr(s64) = COPY $x0 + %cst:gpr(s64) = G_CONSTANT i64 1 + %cmp_lhs:gpr(s64) = G_ASHR %ashr_lhs, %cst(s64) + + %cmp:gpr(s32) = G_ICMP intpred(slt), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: swap_lshr_lhs +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: swap_lshr_lhs + ; CHECK: liveins: $x0, $x1 + ; CHECK: %cmp_rhs:gpr64 = COPY $x1 + ; CHECK: %lshr_lhs:gpr64 = COPY $x0 + ; CHECK: $xzr = SUBSXrs %cmp_rhs, %lshr_lhs, 65, implicit-def $nzcv + ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv + ; CHECK: $w0 = COPY %cmp + ; CHECK: RET_ReallyLR implicit $w0 + %cmp_rhs:gpr(s64) = COPY $x1 + + %lshr_lhs:gpr(s64) = COPY $x0 + %cst:gpr(s64) = G_CONSTANT i64 1 + %cmp_lhs:gpr(s64) = G_LSHR %lshr_lhs, %cst(s64) + + %cmp:gpr(s32) = G_ICMP intpred(slt), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: dont_swap_shift_s64_cst_too_large +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + + ; Constant for the shift must be <= 63. + + ; CHECK-LABEL: name: dont_swap_shift_s64_cst_too_large + ; CHECK: liveins: $x0, $x1 + ; CHECK: %cmp_rhs:gpr64 = COPY $x1 + ; CHECK: %shl_lhs:gpr64 = COPY $x0 + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 64 + ; CHECK: %too_large:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32 + ; CHECK: %cmp_lhs:gpr64 = LSLVXr %shl_lhs, %too_large + ; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv + ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv + ; CHECK: $w0 = COPY %cmp + ; CHECK: RET_ReallyLR implicit $w0 + %cmp_rhs:gpr(s64) = COPY $x1 + + %shl_lhs:gpr(s64) = COPY $x0 + %too_large:gpr(s64) = G_CONSTANT i64 64 + %cmp_lhs:gpr(s64) = G_SHL %shl_lhs, %too_large(s64) + + %cmp:gpr(s32) = G_ICMP intpred(slt), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + + +... +--- +name: dont_swap_shift_s32_cst_too_large +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + ; Constant for the shift must be <= 32. + + ; CHECK-LABEL: name: dont_swap_shift_s32_cst_too_large + ; CHECK: liveins: $w0, $w1 + ; CHECK: %cmp_rhs:gpr32 = COPY $w1 + ; CHECK: %shl_lhs:gpr32 = COPY $w0 + ; CHECK: %cst:gpr32 = MOVi32imm 32 + ; CHECK: %cmp_lhs:gpr32 = LSLVWr %shl_lhs, %cst + ; CHECK: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv + ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv + ; CHECK: $w0 = COPY %cmp + ; CHECK: RET_ReallyLR implicit $w0 + %cmp_rhs:gpr(s32) = COPY $w1 + + %shl_lhs:gpr(s32) = COPY $w0 + %cst:gpr(s32) = G_CONSTANT i32 32 + %cmp_lhs:gpr(s32) = G_SHL %shl_lhs, %cst(s32) + + %cmp:gpr(s32) = G_ICMP intpred(slt), %cmp_lhs(s32), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: dont_swap_cmn_lhs_no_folding_opportunities +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; No reason to swap a CMN on the LHS when it won't introduce a constant + ; folding opportunity. We can recognise CMNs on the LHS and RHS, so there's + ; nothing to gain here. + + ; CHECK-LABEL: name: dont_swap_cmn_lhs_no_folding_opportunities + ; CHECK: liveins: $x0, $x1 + ; CHECK: %cmp_rhs:gpr64 = COPY $x1 + ; CHECK: %sub_rhs:gpr64 = COPY $x0 + ; CHECK: $xzr = ADDSXrr %sub_rhs, %cmp_rhs, implicit-def $nzcv + ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv + ; CHECK: $w0 = COPY %cmp + ; CHECK: RET_ReallyLR implicit $w0 + %cmp_rhs:gpr(s64) = COPY $x1 + + %sub_rhs:gpr(s64) = COPY $x0 + %zero:gpr(s64) = G_CONSTANT i64 0 + %cmp_lhs:gpr(s64) = G_SUB %zero, %sub_rhs + + %cmp:gpr(s32) = G_ICMP intpred(ne), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: swap_cmn_lhs +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; Swap when we can see a constant folding opportunity through the sub on + ; the LHS. + + + ; CHECK-LABEL: name: swap_cmn_lhs + ; CHECK: liveins: $x0, $x1 + ; CHECK: %cmp_rhs:gpr64 = COPY $x1 + ; CHECK: %shl_lhs:gpr64 = COPY $x0 + ; CHECK: $xzr = ADDSXrs %cmp_rhs, %shl_lhs, 63, implicit-def $nzcv + ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv + ; CHECK: $w0 = COPY %cmp + ; CHECK: RET_ReallyLR implicit $w0 + %cmp_rhs:gpr(s64) = COPY $x1 + + %shl_lhs:gpr(s64) = COPY $x0 + %zero:gpr(s64) = G_CONSTANT i64 0 + %cst:gpr(s64) = G_CONSTANT i64 63 + %sub_rhs:gpr(s64) = G_SHL %shl_lhs, %cst(s64) + %cmp_lhs:gpr(s64) = G_SUB %zero, %sub_rhs + + %cmp:gpr(s32) = G_ICMP intpred(ne), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: dont_swap_cmn_lhs_when_rhs_more_profitable +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; Don't swap when the RHS's subtract offers a better constant folding + ; opportunity than the LHS's subtract. + ; + ; In this case, the RHS has a supported extend, plus a shift with a constant + ; <= 4. + + ; CHECK-LABEL: name: dont_swap_cmn_lhs_when_rhs_more_profitable + ; CHECK: liveins: $x0, $x1 + ; CHECK: %zero:gpr64 = COPY $xzr + ; CHECK: %reg0:gpr64 = COPY $x0 + ; CHECK: %shl:gpr64 = UBFMXri %reg0, 1, 0 + ; CHECK: %reg1:gpr64 = COPY $x1 + ; CHECK: %sext_in_reg:gpr64 = SBFMXri %reg1, 0, 0 + ; CHECK: %cmp_rhs:gpr64 = SUBSXrs %zero, %sext_in_reg, 131, implicit-def $nzcv + ; CHECK: $xzr = ADDSXrr %shl, %cmp_rhs, implicit-def $nzcv + ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv + ; CHECK: $w0 = COPY %cmp + ; CHECK: RET_ReallyLR implicit $w0 + %zero:gpr(s64) = G_CONSTANT i64 0 + + %reg0:gpr(s64) = COPY $x0 + %shl_cst:gpr(s64) = G_CONSTANT i64 63 + %shl:gpr(s64) = G_SHL %reg0, %shl_cst(s64) + %cmp_lhs:gpr(s64) = G_SUB %zero, %shl + + %reg1:gpr(s64) = COPY $x1 + %sext_in_reg:gpr(s64) = G_SEXT_INREG %reg1, 1 + %ashr_cst:gpr(s64) = G_CONSTANT i64 3 + %ashr:gpr(s64) = G_ASHR %sext_in_reg, %ashr_cst(s64) + %cmp_rhs:gpr(s64) = G_SUB %zero, %ashr + + %cmp:gpr(s32) = G_ICMP intpred(eq), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: dont_swap_rhs_with_supported_extend +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; The RHS offers more constant folding opportunities than the LHS. + + ; CHECK-LABEL: name: dont_swap_rhs_with_supported_extend + ; CHECK: liveins: $x0, $x1 + ; CHECK: %reg0:gpr64 = COPY $x0 + ; CHECK: %cmp_lhs:gpr64 = UBFMXri %reg0, 63, 62 + ; CHECK: %reg1:gpr64 = COPY $x1 + ; CHECK: %and:gpr64common = ANDXri %reg1, 4103 + ; CHECK: $xzr = SUBSXrs %cmp_lhs, %and, 129, implicit-def $nzcv + ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv + ; CHECK: $w0 = COPY %cmp + ; CHECK: RET_ReallyLR implicit $w0 + %reg0:gpr(s64) = COPY $x0 + %lhs_cst:gpr(s64) = G_CONSTANT i64 1 + %cmp_lhs:gpr(s64) = G_SHL %reg0, %lhs_cst(s64) + + %reg1:gpr(s64) = COPY $x1 + %and_mask:gpr(s64) = G_CONSTANT i64 255 + %and:gpr(s64) = G_AND %reg1, %and_mask(s64) + %rhs_cst:gpr(s64) = G_CONSTANT i64 1 + %cmp_rhs:gpr(s64) = G_ASHR %and, %rhs_cst(s64) + + %cmp:gpr(s32) = G_ICMP intpred(slt), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + + +... +--- +name: swap_rhs_with_supported_extend +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + + ; In this case, both the LHS and RHS are fed by a supported extend. However, + ; the LHS' shift has a constant <= 4. This makes it more profitable, so + ; we should swap the operands. + + ; CHECK-LABEL: name: swap_rhs_with_supported_extend + ; CHECK: liveins: $x0, $x1 + ; CHECK: %reg0:gpr64 = COPY $x0 + ; CHECK: %and:gpr64common = ANDXri %reg0, 4103 + ; CHECK: %cmp_rhs:gpr64 = SBFMXri %and, 5, 63 + ; CHECK: $xzr = SUBSXrs %cmp_rhs, %and, 1, implicit-def $nzcv + ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv + ; CHECK: $w0 = COPY %cmp + ; CHECK: RET_ReallyLR implicit $w0 + %reg0:gpr(s64) = COPY $x0 + %and_mask:gpr(s64) = G_CONSTANT i64 255 + %and:gpr(s64) = G_AND %reg0, %and_mask(s64) + + %lhs_cst:gpr(s64) = G_CONSTANT i64 1 + %cmp_lhs:gpr(s64) = G_SHL %and, %lhs_cst(s64) + + %rhs_cst:gpr(s64) = G_CONSTANT i64 5 + %cmp_rhs:gpr(s64) = G_ASHR %and, %rhs_cst(s64) + + %cmp:gpr(s32) = G_ICMP intpred(sgt), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 +