diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -127,7 +127,14 @@ (apply [{ applyAdjustICmpImmAndPred(*${root}, ${matchinfo}, B, Observer); }]) >; -def icmp_lowering : GICombineGroup<[adjust_icmp_imm]>; +def swap_icmp_operands : GICombineRule < + (defs root:$root), + (match (wip_match_opcode G_ICMP):$root, + [{ return trySwapICmpOperands(*${root}, MRI); }]), + (apply [{ applySwapICmpOperands(*${root}, Observer); }]) +>; + +def icmp_lowering : GICombineGroup<[adjust_icmp_imm, swap_icmp_operands]>; def extractvecelt_pairwise_add_matchdata : GIDefMatchData<"std::tuple">; def extractvecelt_pairwise_add : GICombineRule< diff --git a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h --- a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h +++ b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h @@ -15,9 +15,12 @@ #include "llvm/ADT/Optional.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/Register.h" +#include "MCTargetDesc/AArch64AddressingModes.h" +#include "llvm/IR/InstrTypes.h" #include namespace llvm { + namespace AArch64GISelUtils { /// \returns true if \p C is a legal immediate operand for an arithmetic @@ -36,6 +39,11 @@ Optional getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI); +/// \returns true if \p MaybeSub and \p Pred are part of a CMN tree for an +/// integer compare. +bool isCMN(const MachineInstr *MaybeSub, const CmpInst::Predicate &Pred, + const MachineRegisterInfo &MRI); + } // namespace AArch64GISelUtils } // namespace llvm diff --git a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp @@ -29,10 +29,31 @@ return RegOrConstant(Src); } -Optional AArch64GISelUtils::getAArch64VectorSplatScalar( - const MachineInstr &MI, const MachineRegisterInfo &MRI) { +Optional +AArch64GISelUtils::getAArch64VectorSplatScalar(const MachineInstr &MI, + const MachineRegisterInfo &MRI) { auto Splat = getAArch64VectorSplat(MI, MRI); if (!Splat || Splat->isReg()) return None; return Splat->getCst(); } + +bool AArch64GISelUtils::isCMN(const MachineInstr *MaybeSub, + const CmpInst::Predicate &Pred, + const MachineRegisterInfo &MRI) { + // Match: + // + // %sub = G_SUB 0, %y + // %cmp = G_ICMP eq/ne, %sub, %z + // + // Or + // + // %sub = G_SUB 0, %y + // %cmp = G_ICMP eq/ne, %z, %sub + if (!MaybeSub || MaybeSub->getOpcode() != TargetOpcode::G_SUB || + !CmpInst::isEquality(Pred)) + return false; + auto MaybeZero = + getConstantVRegValWithLookThrough(MaybeSub->getOperand(1).getReg(), MRI); + return MaybeZero && MaybeZero->Value.getZExtValue() == 0; +} diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -18,6 +18,7 @@ #include "AArch64RegisterInfo.h" #include "AArch64Subtarget.h" #include "AArch64TargetMachine.h" +#include "AArch64GlobalISelUtils.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" #include "llvm/ADT/Optional.h" @@ -4577,37 +4578,10 @@ // // cmn z, y - // Helper lambda to detect the subtract followed by the compare. - // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0. - auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) { - if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB) - return false; - - // Need to make sure NZCV is the same at the end of the transformation. - if (CC != AArch64CC::EQ && CC != AArch64CC::NE) - return false; - - // We want to match against SUBs. - if (DefMI->getOpcode() != TargetOpcode::G_SUB) - return false; - - // Make sure that we're getting - // x = G_SUB 0, y - auto ValAndVReg = - getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI); - if (!ValAndVReg || ValAndVReg->Value != 0) - return false; - - // This can safely be represented as a CMN. - return true; - }; - // Check if the RHS or LHS of the G_ICMP is defined by a SUB MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI); MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI); - CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate(); - const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P); - + auto P = static_cast(Predicate.getPredicate()); // Given this: // // x = G_SUB 0, y @@ -4616,7 +4590,7 @@ // Produce this: // // cmn y, z - if (IsCMN(LHSDef, CC)) + if (isCMN(LHSDef, P, MRI)) return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder); // Same idea here, but with the RHS of the compare instead: @@ -4629,7 +4603,7 @@ // Produce this: // // cmn z, y - if (IsCMN(RHSDef, CC)) + if (isCMN(RHSDef, P, MRI)) return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder); // Given this: diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp @@ -735,6 +735,113 @@ return true; } +/// \returns how many instructions would be saved by folding a G_ICMP's shift +/// and/or extension operations. +static unsigned getCmpOperandFoldingProfit(Register CmpOp, + const MachineRegisterInfo &MRI) { + // No instructions to save if there's more than one use or no uses. + if (!MRI.hasOneNonDBGUse(CmpOp)) + return 0; + + // FIXME: This is duplicated with the selector. (See: selectShiftedRegister) + auto IsSupportedExtend = [&](const MachineInstr &MI) { + if (MI.getOpcode() == TargetOpcode::G_SEXT_INREG) + return true; + if (MI.getOpcode() != TargetOpcode::G_AND) + return false; + auto ValAndVReg = + getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); + if (!ValAndVReg) + return false; + uint64_t Mask = ValAndVReg->Value.getZExtValue(); + return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF); + }; + + MachineInstr *Def = getDefIgnoringCopies(CmpOp, MRI); + if (IsSupportedExtend(*Def)) + return 1; + + unsigned Opc = Def->getOpcode(); + if (Opc != TargetOpcode::G_SHL && Opc != TargetOpcode::G_ASHR && + Opc != TargetOpcode::G_LSHR) + return 0; + + auto MaybeShiftAmt = + getConstantVRegValWithLookThrough(Def->getOperand(2).getReg(), MRI); + if (!MaybeShiftAmt) + return 0; + uint64_t ShiftAmt = MaybeShiftAmt->Value.getZExtValue(); + MachineInstr *ShiftLHS = + getDefIgnoringCopies(Def->getOperand(1).getReg(), MRI); + + // Check if we can fold an extend and a shift. + // FIXME: This is duplicated with the selector. (See: + // selectArithExtendedRegister) + if (IsSupportedExtend(*ShiftLHS)) + return (ShiftAmt <= 4) ? 2 : 1; + + LLT Ty = MRI.getType(Def->getOperand(0).getReg()); + if (Ty.isVector()) + return 0; + unsigned ShiftSize = Ty.getSizeInBits(); + if ((ShiftSize == 32 && ShiftAmt <= 31) || + (ShiftSize == 64 && ShiftAmt <= 63)) + return 1; + return 0; +} + +/// \returns true if it would be profitable to swap the LHS and RHS of a G_ICMP +/// instruction \p MI. +static bool trySwapICmpOperands(MachineInstr &MI, + const MachineRegisterInfo &MRI) { + assert(MI.getOpcode() == TargetOpcode::G_ICMP); + // Swap the operands if it would introduce a profitable folding opportunity. + // (e.g. a shift + extend). + // + // For example: + // lsl w13, w11, #1 + // cmp w13, w12 + // can be turned into: + // cmp w12, w11, lsl #1 + + // Don't swap if there's a constant on the RHS, because we know we can fold + // that. + Register RHS = MI.getOperand(3).getReg(); + auto RHSCst = getConstantVRegValWithLookThrough(RHS, MRI); + if (RHSCst && isLegalArithImmed(RHSCst->Value.getSExtValue())) + return false; + + Register LHS = MI.getOperand(2).getReg(); + auto Pred = static_cast(MI.getOperand(1).getPredicate()); + auto GetRegForProfit = [&](Register Reg) { + MachineInstr *Def = getDefIgnoringCopies(Reg, MRI); + return isCMN(Def, Pred, MRI) ? Def->getOperand(2).getReg() : Reg; + }; + + // Don't have a constant on the RHS. If we swap the LHS and RHS of the + // compare, would we be able to fold more instructions? + Register TheLHS = GetRegForProfit(LHS); + Register TheRHS = GetRegForProfit(RHS); + + // If the LHS is more likely to give us a folding opportunity, then swap the + // LHS and RHS. + return (getCmpOperandFoldingProfit(TheLHS, MRI) > + getCmpOperandFoldingProfit(TheRHS, MRI)); +} + +static bool applySwapICmpOperands(MachineInstr &MI, + GISelChangeObserver &Observer) { + auto Pred = static_cast(MI.getOperand(1).getPredicate()); + Register LHS = MI.getOperand(2).getReg(); + Register RHS = MI.getOperand(3).getReg(); + Observer.changedInstr(MI); + MI.getOperand(1).setPredicate(CmpInst::getSwappedPredicate(Pred)); + MI.getOperand(2).setReg(RHS); + MI.getOperand(3).setReg(LHS); + Observer.changedInstr(MI); + return true; +} + #define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS #include "AArch64GenPostLegalizeGILowering.inc" #undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-swap-compare-operands.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-swap-compare-operands.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-swap-compare-operands.mir @@ -0,0 +1,737 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64 -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=LOWER +# RUN: llc -mtriple=aarch64 -global-isel -start-before=aarch64-postlegalizer-lowering -stop-after=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=SELECT +# +# Check that we swap the order of operands on comparisons when it is likely +# to introduce a folding opportunity. +# +# The condition code for the compare should be changed when appropriate. +# +# TODO: emitBinOp doesn't know about selectArithExtendedRegister, so some of +# these cases don't hit in selection yet. + +... +--- +name: swap_sextinreg_lhs +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + + ; LOWER-LABEL: name: swap_sextinreg_lhs + ; LOWER: liveins: $x0, $x1 + ; LOWER: %reg:_(s64) = COPY $x0 + ; LOWER: %cmp_lhs:_(s64) = G_SEXT_INREG %reg, 8 + ; LOWER: %cmp_rhs:_(s64) = COPY $x1 + ; LOWER: %cmp:_(s32) = G_ICMP intpred(sle), %cmp_rhs(s64), %cmp_lhs + ; LOWER: $w0 = COPY %cmp(s32) + ; LOWER: RET_ReallyLR implicit $w0 + ; SELECT-LABEL: name: swap_sextinreg_lhs + ; SELECT: liveins: $x0, $x1 + ; SELECT: %reg:gpr64all = COPY $x0 + ; SELECT: [[COPY:%[0-9]+]]:gpr32all = COPY %reg.sub_32 + ; SELECT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; SELECT: %cmp_rhs:gpr64sp = COPY $x1 + ; SELECT: [[SUBSXrx:%[0-9]+]]:gpr64 = SUBSXrx %cmp_rhs, [[COPY1]], 32, implicit-def $nzcv + ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv + ; SELECT: $w0 = COPY %cmp + ; SELECT: RET_ReallyLR implicit $w0 + %reg:_(s64) = COPY $x0 + %cmp_lhs:_(s64) = G_SEXT_INREG %reg, 8 + %cmp_rhs:_(s64) = COPY $x1 + %cmp:_(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: dont_swap_more_than_one_use +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + + ; The LHS of the compare is used in an add, and a second compare. Don't + ; swap, since we don't gain any folding opportunities here. + + ; LOWER-LABEL: name: dont_swap_more_than_one_use + ; LOWER: liveins: $x0, $x1 + ; LOWER: %reg0:_(s64) = COPY $x0 + ; LOWER: %cmp_lhs:_(s64) = G_SEXT_INREG %reg0, 8 + ; LOWER: %add:_(s64) = G_ADD %cmp_lhs, %reg0 + ; LOWER: %cmp2:_(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %add + ; LOWER: $w0 = COPY %cmp2(s32) + ; LOWER: RET_ReallyLR implicit $w0 + ; SELECT-LABEL: name: dont_swap_more_than_one_use + ; SELECT: liveins: $x0, $x1 + ; SELECT: %reg0:gpr64 = COPY $x0 + ; SELECT: %cmp_lhs:gpr64 = SBFMXri %reg0, 0, 7 + ; SELECT: %add:gpr64 = ADDXrr %cmp_lhs, %reg0 + ; SELECT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_lhs, %add, implicit-def $nzcv + ; SELECT: %cmp2:gpr32 = CSINCWr $wzr, $wzr, 11, implicit $nzcv + ; SELECT: $w0 = COPY %cmp2 + ; SELECT: RET_ReallyLR implicit $w0 + %reg0:_(s64) = COPY $x0 + %cmp_lhs:_(s64) = G_SEXT_INREG %reg0, 8 + %reg1:_(s64) = COPY $x1 + %cmp1:_(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %reg1 + + %add:_(s64) = G_ADD %cmp_lhs(s64), %reg0 + %cmp2:_(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %add + + $w0 = COPY %cmp2(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: dont_swap_legal_arith_immed_on_rhs +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; Arithmetic immediates can be folded into compares. If we have one, then + ; don't bother changing anything. + + ; LOWER-LABEL: name: dont_swap_legal_arith_immed_on_rhs + ; LOWER: liveins: $x0, $x1 + ; LOWER: %reg:_(s64) = COPY $x0 + ; LOWER: %cmp_lhs:_(s64) = G_SEXT_INREG %reg, 8 + ; LOWER: %cmp_rhs:_(s64) = G_CONSTANT i64 12 + ; LOWER: %cmp:_(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs + ; LOWER: $w0 = COPY %cmp(s32) + ; LOWER: RET_ReallyLR implicit $w0 + ; SELECT-LABEL: name: dont_swap_legal_arith_immed_on_rhs + ; SELECT: liveins: $x0, $x1 + ; SELECT: %reg:gpr64 = COPY $x0 + ; SELECT: %cmp_lhs:gpr64common = SBFMXri %reg, 0, 7 + ; SELECT: [[SUBSXri:%[0-9]+]]:gpr64 = SUBSXri %cmp_lhs, 12, 0, implicit-def $nzcv + ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 11, implicit $nzcv + ; SELECT: $w0 = COPY %cmp + ; SELECT: RET_ReallyLR implicit $w0 + %reg:_(s64) = COPY $x0 + %cmp_lhs:_(s64) = G_SEXT_INREG %reg, 8 + %cmp_rhs:_(s64) = G_CONSTANT i64 12 + %cmp:_(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: swap_non_arith_immed_on_rhs +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; If we have a non-arithmetic immediate on the rhs, then we can swap to get + ; a guaranteed folding opportunity. + + ; LOWER-LABEL: name: swap_non_arith_immed_on_rhs + ; LOWER: liveins: $x0, $x1 + ; LOWER: %reg:_(s64) = COPY $x0 + ; LOWER: %cmp_lhs:_(s64) = G_SEXT_INREG %reg, 8 + ; LOWER: %cmp_rhs:_(s64) = G_CONSTANT i64 1234567 + ; LOWER: %cmp:_(s32) = G_ICMP intpred(sle), %cmp_rhs(s64), %cmp_lhs + ; LOWER: $w0 = COPY %cmp(s32) + ; LOWER: RET_ReallyLR implicit $w0 + ; SELECT-LABEL: name: swap_non_arith_immed_on_rhs + ; SELECT: liveins: $x0, $x1 + ; SELECT: %reg:gpr64all = COPY $x0 + ; SELECT: [[COPY:%[0-9]+]]:gpr32all = COPY %reg.sub_32 + ; SELECT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; SELECT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1234567 + ; SELECT: %cmp_rhs:gpr64sp = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32 + ; SELECT: [[SUBSXrx:%[0-9]+]]:gpr64 = SUBSXrx %cmp_rhs, [[COPY1]], 32, implicit-def $nzcv + ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv + ; SELECT: $w0 = COPY %cmp + ; SELECT: RET_ReallyLR implicit $w0 + %reg:_(s64) = COPY $x0 + %cmp_lhs:_(s64) = G_SEXT_INREG %reg, 8 + %cmp_rhs:_(s64) = G_CONSTANT i64 1234567 + %cmp:_(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: swap_and_lhs_0xFF +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; LOWER-LABEL: name: swap_and_lhs_0xFF + ; LOWER: liveins: $x0, $x1 + ; LOWER: %cmp_rhs:_(s64) = COPY $x1 + ; LOWER: %and_lhs:_(s64) = COPY $x0 + ; LOWER: %cst:_(s64) = G_CONSTANT i64 255 + ; LOWER: %cmp_lhs:_(s64) = G_AND %and_lhs, %cst + ; LOWER: %cmp:_(s32) = G_ICMP intpred(sle), %cmp_rhs(s64), %cmp_lhs + ; LOWER: $w0 = COPY %cmp(s32) + ; LOWER: RET_ReallyLR implicit $w0 + ; SELECT-LABEL: name: swap_and_lhs_0xFF + ; SELECT: liveins: $x0, $x1 + ; SELECT: %cmp_rhs:gpr64sp = COPY $x1 + ; SELECT: %and_lhs:gpr64all = COPY $x0 + ; SELECT: [[COPY:%[0-9]+]]:gpr32all = COPY %and_lhs.sub_32 + ; SELECT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; SELECT: [[SUBSXrx:%[0-9]+]]:gpr64 = SUBSXrx %cmp_rhs, [[COPY1]], 0, implicit-def $nzcv + ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv + ; SELECT: $w0 = COPY %cmp + ; SELECT: RET_ReallyLR implicit $w0 + %cmp_rhs:_(s64) = COPY $x1 + + %and_lhs:_(s64) = COPY $x0 + %cst:_(s64) = G_CONSTANT i64 255 + %cmp_lhs:_(s64) = G_AND %and_lhs, %cst(s64) + + %cmp:_(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: swap_and_lhs_0xFFFF +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; LOWER-LABEL: name: swap_and_lhs_0xFFFF + ; LOWER: liveins: $x0, $x1 + ; LOWER: %cmp_rhs:_(s64) = COPY $x1 + ; LOWER: %cst:_(s64) = G_CONSTANT i64 65535 + ; LOWER: %and_lhs:_(s64) = COPY $x0 + ; LOWER: %cmp_lhs:_(s64) = G_AND %and_lhs, %cst + ; LOWER: %cmp:_(s32) = G_ICMP intpred(sle), %cmp_rhs(s64), %cmp_lhs + ; LOWER: $w0 = COPY %cmp(s32) + ; LOWER: RET_ReallyLR implicit $w0 + ; SELECT-LABEL: name: swap_and_lhs_0xFFFF + ; SELECT: liveins: $x0, $x1 + ; SELECT: %cmp_rhs:gpr64sp = COPY $x1 + ; SELECT: %and_lhs:gpr64all = COPY $x0 + ; SELECT: [[COPY:%[0-9]+]]:gpr32all = COPY %and_lhs.sub_32 + ; SELECT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; SELECT: [[SUBSXrx:%[0-9]+]]:gpr64 = SUBSXrx %cmp_rhs, [[COPY1]], 8, implicit-def $nzcv + ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv + ; SELECT: $w0 = COPY %cmp + ; SELECT: RET_ReallyLR implicit $w0 + %cmp_rhs:_(s64) = COPY $x1 + + %cst:_(s64) = G_CONSTANT i64 65535 + %and_lhs:_(s64) = COPY $x0 + %cmp_lhs:_(s64) = G_AND %and_lhs, %cst(s64) + + %cmp:_(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: swap_and_lhs_0xFFFFFFFF +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; LOWER-LABEL: name: swap_and_lhs_0xFFFFFFFF + ; LOWER: liveins: $x0, $x1 + ; LOWER: %cmp_rhs:_(s64) = COPY $x1 + ; LOWER: %and_lhs:_(s64) = COPY $x0 + ; LOWER: %cst:_(s64) = G_CONSTANT i64 4294967295 + ; LOWER: %cmp_lhs:_(s64) = G_AND %and_lhs, %cst + ; LOWER: %cmp:_(s32) = G_ICMP intpred(sle), %cmp_rhs(s64), %cmp_lhs + ; LOWER: $w0 = COPY %cmp(s32) + ; LOWER: RET_ReallyLR implicit $w0 + ; SELECT-LABEL: name: swap_and_lhs_0xFFFFFFFF + ; SELECT: liveins: $x0, $x1 + ; SELECT: %cmp_rhs:gpr64sp = COPY $x1 + ; SELECT: %and_lhs:gpr64all = COPY $x0 + ; SELECT: [[COPY:%[0-9]+]]:gpr32all = COPY %and_lhs.sub_32 + ; SELECT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; SELECT: [[SUBSXrx:%[0-9]+]]:gpr64 = SUBSXrx %cmp_rhs, [[COPY1]], 16, implicit-def $nzcv + ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv + ; SELECT: $w0 = COPY %cmp + ; SELECT: RET_ReallyLR implicit $w0 + %cmp_rhs:_(s64) = COPY $x1 + + %and_lhs:_(s64) = COPY $x0 + %cst:_(s64) = G_CONSTANT i64 4294967295 + %cmp_lhs:_(s64) = G_AND %and_lhs, %cst(s64) + + %cmp:_(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: dont_swap_and_lhs_wrong_mask +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; 7 isn't an extend mask for G_AND, so there's no folding opportunities + ; here. + ; + ; LOWER-LABEL: name: dont_swap_and_lhs_wrong_mask + ; LOWER: liveins: $x0, $x1 + ; LOWER: %cmp_rhs:_(s64) = COPY $x1 + ; LOWER: %and_lhs:_(s64) = COPY $x0 + ; LOWER: %not_an_extend_mask:_(s64) = G_CONSTANT i64 7 + ; LOWER: %cmp_lhs:_(s64) = G_AND %and_lhs, %not_an_extend_mask + ; LOWER: %cmp:_(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs + ; LOWER: $w0 = COPY %cmp(s32) + ; LOWER: RET_ReallyLR implicit $w0 + ; SELECT-LABEL: name: dont_swap_and_lhs_wrong_mask + ; SELECT: liveins: $x0, $x1 + ; SELECT: %cmp_rhs:gpr64 = COPY $x1 + ; SELECT: %and_lhs:gpr64 = COPY $x0 + ; SELECT: %cmp_lhs:gpr64common = ANDXri %and_lhs, 4098 + ; SELECT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv + ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 11, implicit $nzcv + ; SELECT: $w0 = COPY %cmp + ; SELECT: RET_ReallyLR implicit $w0 + %cmp_rhs:_(s64) = COPY $x1 + + %and_lhs:_(s64) = COPY $x0 + %not_an_extend_mask:_(s64) = G_CONSTANT i64 7 + %cmp_lhs:_(s64) = G_AND %and_lhs, %not_an_extend_mask(s64) + + %cmp:_(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: swap_shl_lhs +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + + ; LOWER-LABEL: name: swap_shl_lhs + ; LOWER: liveins: $x0, $x1 + ; LOWER: %cmp_rhs:_(s64) = COPY $x1 + ; LOWER: %shl_lhs:_(s64) = COPY $x0 + ; LOWER: %cst:_(s64) = G_CONSTANT i64 1 + ; LOWER: %cmp_lhs:_(s64) = G_SHL %shl_lhs, %cst(s64) + ; LOWER: %cmp:_(s32) = G_ICMP intpred(sgt), %cmp_rhs(s64), %cmp_lhs + ; LOWER: $w0 = COPY %cmp(s32) + ; LOWER: RET_ReallyLR implicit $w0 + ; SELECT-LABEL: name: swap_shl_lhs + ; SELECT: liveins: $x0, $x1 + ; SELECT: %cmp_rhs:gpr64 = COPY $x1 + ; SELECT: %shl_lhs:gpr64 = COPY $x0 + ; SELECT: [[SUBSXrs:%[0-9]+]]:gpr64 = SUBSXrs %cmp_rhs, %shl_lhs, 1, implicit-def $nzcv + ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv + ; SELECT: $w0 = COPY %cmp + ; SELECT: RET_ReallyLR implicit $w0 + %cmp_rhs:_(s64) = COPY $x1 + + %shl_lhs:_(s64) = COPY $x0 + %cst:_(s64) = G_CONSTANT i64 1 + %cmp_lhs:_(s64) = G_SHL %shl_lhs, %cst(s64) + + %cmp:_(s32) = G_ICMP intpred(slt), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: swap_ashr_lhs +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + + ; LOWER-LABEL: name: swap_ashr_lhs + ; LOWER: liveins: $x0, $x1 + ; LOWER: %cmp_rhs:_(s64) = COPY $x1 + ; LOWER: %ashr_lhs:_(s64) = COPY $x0 + ; LOWER: %cst:_(s64) = G_CONSTANT i64 1 + ; LOWER: %cmp_lhs:_(s64) = G_ASHR %ashr_lhs, %cst(s64) + ; LOWER: %cmp:_(s32) = G_ICMP intpred(sgt), %cmp_rhs(s64), %cmp_lhs + ; LOWER: $w0 = COPY %cmp(s32) + ; LOWER: RET_ReallyLR implicit $w0 + ; SELECT-LABEL: name: swap_ashr_lhs + ; SELECT: liveins: $x0, $x1 + ; SELECT: %cmp_rhs:gpr64 = COPY $x1 + ; SELECT: %ashr_lhs:gpr64 = COPY $x0 + ; SELECT: [[SUBSXrs:%[0-9]+]]:gpr64 = SUBSXrs %cmp_rhs, %ashr_lhs, 129, implicit-def $nzcv + ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv + ; SELECT: $w0 = COPY %cmp + ; SELECT: RET_ReallyLR implicit $w0 + %cmp_rhs:_(s64) = COPY $x1 + + %ashr_lhs:_(s64) = COPY $x0 + %cst:_(s64) = G_CONSTANT i64 1 + %cmp_lhs:_(s64) = G_ASHR %ashr_lhs, %cst(s64) + + %cmp:_(s32) = G_ICMP intpred(slt), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: swap_lshr_lhs +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + + ; LOWER-LABEL: name: swap_lshr_lhs + ; LOWER: liveins: $x0, $x1 + ; LOWER: %cmp_rhs:_(s64) = COPY $x1 + ; LOWER: %lshr_lhs:_(s64) = COPY $x0 + ; LOWER: %cst:_(s64) = G_CONSTANT i64 1 + ; LOWER: %cmp_lhs:_(s64) = G_LSHR %lshr_lhs, %cst(s64) + ; LOWER: %cmp:_(s32) = G_ICMP intpred(sgt), %cmp_rhs(s64), %cmp_lhs + ; LOWER: $w0 = COPY %cmp(s32) + ; LOWER: RET_ReallyLR implicit $w0 + ; SELECT-LABEL: name: swap_lshr_lhs + ; SELECT: liveins: $x0, $x1 + ; SELECT: %cmp_rhs:gpr64 = COPY $x1 + ; SELECT: %lshr_lhs:gpr64 = COPY $x0 + ; SELECT: [[SUBSXrs:%[0-9]+]]:gpr64 = SUBSXrs %cmp_rhs, %lshr_lhs, 65, implicit-def $nzcv + ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv + ; SELECT: $w0 = COPY %cmp + ; SELECT: RET_ReallyLR implicit $w0 + %cmp_rhs:_(s64) = COPY $x1 + + %lshr_lhs:_(s64) = COPY $x0 + %cst:_(s64) = G_CONSTANT i64 1 + %cmp_lhs:_(s64) = G_LSHR %lshr_lhs, %cst(s64) + + %cmp:_(s32) = G_ICMP intpred(slt), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: dont_swap_shift_s64_cst_too_large +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + + ; Constant for the shift must be <= 63. + + ; LOWER-LABEL: name: dont_swap_shift_s64_cst_too_large + ; LOWER: liveins: $x0, $x1 + ; LOWER: %cmp_rhs:_(s64) = COPY $x1 + ; LOWER: %shl_lhs:_(s64) = COPY $x0 + ; LOWER: %too_large:_(s64) = G_CONSTANT i64 64 + ; LOWER: %cmp_lhs:_(s64) = G_SHL %shl_lhs, %too_large(s64) + ; LOWER: %cmp:_(s32) = G_ICMP intpred(slt), %cmp_lhs(s64), %cmp_rhs + ; LOWER: $w0 = COPY %cmp(s32) + ; LOWER: RET_ReallyLR implicit $w0 + ; SELECT-LABEL: name: dont_swap_shift_s64_cst_too_large + ; SELECT: liveins: $x0, $x1 + ; SELECT: %cmp_rhs:gpr64 = COPY $x1 + ; SELECT: %shl_lhs:gpr64 = COPY $x0 + ; SELECT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 64 + ; SELECT: %too_large:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32 + ; SELECT: %cmp_lhs:gpr64 = LSLVXr %shl_lhs, %too_large + ; SELECT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv + ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv + ; SELECT: $w0 = COPY %cmp + ; SELECT: RET_ReallyLR implicit $w0 + %cmp_rhs:_(s64) = COPY $x1 + + %shl_lhs:_(s64) = COPY $x0 + %too_large:_(s64) = G_CONSTANT i64 64 + %cmp_lhs:_(s64) = G_SHL %shl_lhs, %too_large(s64) + + %cmp:_(s32) = G_ICMP intpred(slt), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + + +... +--- +name: dont_swap_shift_s32_cst_too_large +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + ; Constant for the shift must be <= 32. + + ; LOWER-LABEL: name: dont_swap_shift_s32_cst_too_large + ; LOWER: liveins: $w0, $w1 + ; LOWER: %cmp_rhs:_(s32) = COPY $w1 + ; LOWER: %shl_lhs:_(s32) = COPY $w0 + ; LOWER: %cst:_(s32) = G_CONSTANT i32 32 + ; LOWER: %cmp_lhs:_(s32) = G_SHL %shl_lhs, %cst(s32) + ; LOWER: %cmp:_(s32) = G_ICMP intpred(slt), %cmp_lhs(s32), %cmp_rhs + ; LOWER: $w0 = COPY %cmp(s32) + ; LOWER: RET_ReallyLR implicit $w0 + ; SELECT-LABEL: name: dont_swap_shift_s32_cst_too_large + ; SELECT: liveins: $w0, $w1 + ; SELECT: %cmp_rhs:gpr32 = COPY $w1 + ; SELECT: %shl_lhs:gpr32 = COPY $w0 + ; SELECT: %cst:gpr32 = MOVi32imm 32 + ; SELECT: %cmp_lhs:gpr32 = LSLVWr %shl_lhs, %cst + ; SELECT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv + ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv + ; SELECT: $w0 = COPY %cmp + ; SELECT: RET_ReallyLR implicit $w0 + %cmp_rhs:_(s32) = COPY $w1 + + %shl_lhs:_(s32) = COPY $w0 + %cst:_(s32) = G_CONSTANT i32 32 + %cmp_lhs:_(s32) = G_SHL %shl_lhs, %cst(s32) + + %cmp:_(s32) = G_ICMP intpred(slt), %cmp_lhs(s32), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: dont_swap_cmn_lhs_no_folding_opportunities +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; No reason to swap a CMN on the LHS when it won't introduce a constant + ; folding opportunity. We can recognise CMNs on the LHS and RHS, so there's + ; nothing to gain here. + + ; LOWER-LABEL: name: dont_swap_cmn_lhs_no_folding_opportunities + ; LOWER: liveins: $x0, $x1 + ; LOWER: %cmp_rhs:_(s64) = COPY $x1 + ; LOWER: %sub_rhs:_(s64) = COPY $x0 + ; LOWER: %zero:_(s64) = G_CONSTANT i64 0 + ; LOWER: %cmp_lhs:_(s64) = G_SUB %zero, %sub_rhs + ; LOWER: %cmp:_(s32) = G_ICMP intpred(ne), %cmp_lhs(s64), %cmp_rhs + ; LOWER: $w0 = COPY %cmp(s32) + ; LOWER: RET_ReallyLR implicit $w0 + ; SELECT-LABEL: name: dont_swap_cmn_lhs_no_folding_opportunities + ; SELECT: liveins: $x0, $x1 + ; SELECT: %cmp_rhs:gpr64 = COPY $x1 + ; SELECT: %sub_rhs:gpr64 = COPY $x0 + ; SELECT: [[ADDSXrr:%[0-9]+]]:gpr64 = ADDSXrr %sub_rhs, %cmp_rhs, implicit-def $nzcv + ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv + ; SELECT: $w0 = COPY %cmp + ; SELECT: RET_ReallyLR implicit $w0 + %cmp_rhs:_(s64) = COPY $x1 + + %sub_rhs:_(s64) = COPY $x0 + %zero:_(s64) = G_CONSTANT i64 0 + %cmp_lhs:_(s64) = G_SUB %zero, %sub_rhs + + %cmp:_(s32) = G_ICMP intpred(ne), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: swap_cmn_lhs +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; Swap when we can see a constant folding opportunity through the sub on + ; the LHS. + + + ; LOWER-LABEL: name: swap_cmn_lhs + ; LOWER: liveins: $x0, $x1 + ; LOWER: %cmp_rhs:_(s64) = COPY $x1 + ; LOWER: %shl_lhs:_(s64) = COPY $x0 + ; LOWER: %zero:_(s64) = G_CONSTANT i64 0 + ; LOWER: %cst:_(s64) = G_CONSTANT i64 63 + ; LOWER: %sub_rhs:_(s64) = G_SHL %shl_lhs, %cst(s64) + ; LOWER: %cmp_lhs:_(s64) = G_SUB %zero, %sub_rhs + ; LOWER: %cmp:_(s32) = G_ICMP intpred(ne), %cmp_rhs(s64), %cmp_lhs + ; LOWER: $w0 = COPY %cmp(s32) + ; LOWER: RET_ReallyLR implicit $w0 + ; SELECT-LABEL: name: swap_cmn_lhs + ; SELECT: liveins: $x0, $x1 + ; SELECT: %cmp_rhs:gpr64 = COPY $x1 + ; SELECT: %shl_lhs:gpr64 = COPY $x0 + ; SELECT: [[ADDSXrs:%[0-9]+]]:gpr64 = ADDSXrs %cmp_rhs, %shl_lhs, 63, implicit-def $nzcv + ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv + ; SELECT: $w0 = COPY %cmp + ; SELECT: RET_ReallyLR implicit $w0 + %cmp_rhs:_(s64) = COPY $x1 + + %shl_lhs:_(s64) = COPY $x0 + %zero:_(s64) = G_CONSTANT i64 0 + %cst:_(s64) = G_CONSTANT i64 63 + %sub_rhs:_(s64) = G_SHL %shl_lhs, %cst(s64) + %cmp_lhs:_(s64) = G_SUB %zero, %sub_rhs + + %cmp:_(s32) = G_ICMP intpred(ne), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: dont_swap_cmn_lhs_when_rhs_more_profitable +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; Don't swap when the RHS's subtract offers a better constant folding + ; opportunity than the LHS's subtract. + ; + ; In this case, the RHS has a supported extend, plus a shift with a constant + ; <= 4. + + ; LOWER-LABEL: name: dont_swap_cmn_lhs_when_rhs_more_profitable + ; LOWER: liveins: $x0, $x1 + ; LOWER: %zero:_(s64) = G_CONSTANT i64 0 + ; LOWER: %reg0:_(s64) = COPY $x0 + ; LOWER: %shl_cst:_(s64) = G_CONSTANT i64 63 + ; LOWER: %shl:_(s64) = G_SHL %reg0, %shl_cst(s64) + ; LOWER: %cmp_lhs:_(s64) = G_SUB %zero, %shl + ; LOWER: %reg1:_(s64) = COPY $x1 + ; LOWER: %sext_in_reg:_(s64) = G_SEXT_INREG %reg1, 1 + ; LOWER: %ashr_cst:_(s64) = G_CONSTANT i64 3 + ; LOWER: %ashr:_(s64) = G_ASHR %sext_in_reg, %ashr_cst(s64) + ; LOWER: %cmp_rhs:_(s64) = G_SUB %zero, %ashr + ; LOWER: %cmp:_(s32) = G_ICMP intpred(eq), %cmp_lhs(s64), %cmp_rhs + ; LOWER: $w0 = COPY %cmp(s32) + ; LOWER: RET_ReallyLR implicit $w0 + ; SELECT-LABEL: name: dont_swap_cmn_lhs_when_rhs_more_profitable + ; SELECT: liveins: $x0, $x1 + ; SELECT: %reg0:gpr64 = COPY $x0 + ; SELECT: %shl:gpr64 = UBFMXri %reg0, 1, 0 + ; SELECT: %zero:gpr64 = COPY $xzr + ; SELECT: %reg1:gpr64 = COPY $x1 + ; SELECT: %sext_in_reg:gpr64 = SBFMXri %reg1, 0, 0 + ; SELECT: %cmp_rhs:gpr64 = SUBSXrs %zero, %sext_in_reg, 131, implicit-def $nzcv + ; SELECT: [[ADDSXrr:%[0-9]+]]:gpr64 = ADDSXrr %shl, %cmp_rhs, implicit-def $nzcv + ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv + ; SELECT: $w0 = COPY %cmp + ; SELECT: RET_ReallyLR implicit $w0 + %zero:_(s64) = G_CONSTANT i64 0 + + %reg0:_(s64) = COPY $x0 + %shl_cst:_(s64) = G_CONSTANT i64 63 + %shl:_(s64) = G_SHL %reg0, %shl_cst(s64) + %cmp_lhs:_(s64) = G_SUB %zero, %shl + + %reg1:_(s64) = COPY $x1 + %sext_in_reg:_(s64) = G_SEXT_INREG %reg1, 1 + %ashr_cst:_(s64) = G_CONSTANT i64 3 + %ashr:_(s64) = G_ASHR %sext_in_reg, %ashr_cst(s64) + %cmp_rhs:_(s64) = G_SUB %zero, %ashr + + %cmp:_(s32) = G_ICMP intpred(eq), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: dont_swap_rhs_with_supported_extend +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; The RHS offers more constant folding opportunities than the LHS. + + ; LOWER-LABEL: name: dont_swap_rhs_with_supported_extend + ; LOWER: liveins: $x0, $x1 + ; LOWER: %reg0:_(s64) = COPY $x0 + ; LOWER: %lhs_cst:_(s64) = G_CONSTANT i64 1 + ; LOWER: %cmp_lhs:_(s64) = G_SHL %reg0, %lhs_cst(s64) + ; LOWER: %reg1:_(s64) = COPY $x1 + ; LOWER: %and_mask:_(s64) = G_CONSTANT i64 255 + ; LOWER: %and:_(s64) = G_AND %reg1, %and_mask + ; LOWER: %rhs_cst:_(s64) = G_CONSTANT i64 1 + ; LOWER: %cmp_rhs:_(s64) = G_ASHR %and, %rhs_cst(s64) + ; LOWER: %cmp:_(s32) = G_ICMP intpred(slt), %cmp_lhs(s64), %cmp_rhs + ; LOWER: $w0 = COPY %cmp(s32) + ; LOWER: RET_ReallyLR implicit $w0 + ; SELECT-LABEL: name: dont_swap_rhs_with_supported_extend + ; SELECT: liveins: $x0, $x1 + ; SELECT: %reg0:gpr64 = COPY $x0 + ; SELECT: %cmp_lhs:gpr64 = UBFMXri %reg0, 63, 62 + ; SELECT: %reg1:gpr64 = COPY $x1 + ; SELECT: %and:gpr64common = ANDXri %reg1, 4103 + ; SELECT: [[SUBSXrs:%[0-9]+]]:gpr64 = SUBSXrs %cmp_lhs, %and, 129, implicit-def $nzcv + ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv + ; SELECT: $w0 = COPY %cmp + ; SELECT: RET_ReallyLR implicit $w0 + %reg0:_(s64) = COPY $x0 + %lhs_cst:_(s64) = G_CONSTANT i64 1 + %cmp_lhs:_(s64) = G_SHL %reg0, %lhs_cst(s64) + + %reg1:_(s64) = COPY $x1 + %and_mask:_(s64) = G_CONSTANT i64 255 + %and:_(s64) = G_AND %reg1, %and_mask(s64) + %rhs_cst:_(s64) = G_CONSTANT i64 1 + %cmp_rhs:_(s64) = G_ASHR %and, %rhs_cst(s64) + + %cmp:_(s32) = G_ICMP intpred(slt), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + + +... +--- +name: swap_rhs_with_supported_extend +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + + ; In this case, both the LHS and RHS are fed by a supported extend. However, + ; the LHS' shift has a constant <= 4. This makes it more profitable, so + ; we should swap the operands. + + ; LOWER-LABEL: name: swap_rhs_with_supported_extend + ; LOWER: liveins: $x0, $x1 + ; LOWER: %reg0:_(s64) = COPY $x0 + ; LOWER: %and_mask:_(s64) = G_CONSTANT i64 255 + ; LOWER: %and:_(s64) = G_AND %reg0, %and_mask + ; LOWER: %lhs_cst:_(s64) = G_CONSTANT i64 1 + ; LOWER: %cmp_lhs:_(s64) = G_SHL %and, %lhs_cst(s64) + ; LOWER: %rhs_cst:_(s64) = G_CONSTANT i64 5 + ; LOWER: %cmp_rhs:_(s64) = G_ASHR %and, %rhs_cst(s64) + ; LOWER: %cmp:_(s32) = G_ICMP intpred(slt), %cmp_rhs(s64), %cmp_lhs + ; LOWER: $w0 = COPY %cmp(s32) + ; LOWER: RET_ReallyLR implicit $w0 + ; SELECT-LABEL: name: swap_rhs_with_supported_extend + ; SELECT: liveins: $x0, $x1 + ; SELECT: %reg0:gpr64 = COPY $x0 + ; SELECT: %and:gpr64common = ANDXri %reg0, 4103 + ; SELECT: [[COPY:%[0-9]+]]:gpr32all = COPY %reg0.sub_32 + ; SELECT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; SELECT: %cmp_rhs:gpr64common = SBFMXri %and, 5, 63 + ; SELECT: [[SUBSXrx:%[0-9]+]]:gpr64 = SUBSXrx %cmp_rhs, [[COPY1]], 1, implicit-def $nzcv + ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv + ; SELECT: $w0 = COPY %cmp + ; SELECT: RET_ReallyLR implicit $w0 + %reg0:_(s64) = COPY $x0 + %and_mask:_(s64) = G_CONSTANT i64 255 + %and:_(s64) = G_AND %reg0, %and_mask(s64) + + %lhs_cst:_(s64) = G_CONSTANT i64 1 + %cmp_lhs:_(s64) = G_SHL %and, %lhs_cst(s64) + + %rhs_cst:_(s64) = G_CONSTANT i64 5 + %cmp_rhs:_(s64) = G_ASHR %and, %rhs_cst(s64) + + %cmp:_(s32) = G_ICMP intpred(sgt), %cmp_lhs(s64), %cmp_rhs + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0