diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -276,13 +276,9 @@ const RegisterBank &DstRB, LLT ScalarTy, Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const; - - /// Emit a CSet for an integer compare. - /// - /// \p DefReg and \p SrcReg are expected to be 32-bit scalar registers. - MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred, - MachineIRBuilder &MIRBuilder, - Register SrcReg = AArch64::WZR) const; + MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2, + AArch64CC::CondCode Pred, + MachineIRBuilder &MIRBuilder) const; /// Emit a CSet for a FP compare. /// /// \p Dst is expected to be a 32-bit scalar register. @@ -2213,27 +2209,55 @@ // fold the add into the cset for the cmp by using cinc. // // FIXME: This would probably be a lot nicer in PostLegalizerLowering. - Register X = I.getOperand(1).getReg(); - - // Only handle scalars. Scalar G_ICMP is only legal for s32, so bail out - // early if we see it. - LLT Ty = MRI.getType(X); - if (Ty.isVector() || Ty.getSizeInBits() != 32) + Register AddDst = I.getOperand(0).getReg(); + Register AddLHS = I.getOperand(1).getReg(); + Register AddRHS = I.getOperand(2).getReg(); + // Only handle scalars. + LLT Ty = MRI.getType(AddLHS); + if (Ty.isVector()) return false; - - Register CmpReg = I.getOperand(2).getReg(); - MachineInstr *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI); + // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64 + // bits. + unsigned Size = Ty.getSizeInBits(); + if (Size != 32 && Size != 64) + return false; + auto MatchCmp = [&](Register Reg) -> MachineInstr * { + if (!MRI.hasOneNonDBGUse(Reg)) + return nullptr; + // If the LHS of the add is 32 bits, then we want to fold a 32-bit + // compare. + if (Size == 32) + return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI); + // We model scalar compares using 32-bit destinations right now. + // If it's a 64-bit compare, it'll have 64-bit sources. + Register ZExt; + if (!mi_match(Reg, MRI, + m_OneNonDBGUse(m_GZExt(m_OneNonDBGUse(m_Reg(ZExt)))))) + return nullptr; + auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI); + if (!Cmp || + MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64) + return nullptr; + return Cmp; + }; + // Try to match + // z + (cmp pred, x, y) + MachineInstr *Cmp = MatchCmp(AddRHS); if (!Cmp) { - std::swap(X, CmpReg); - Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI); + // (cmp pred, x, y) + z + std::swap(AddLHS, AddRHS); + Cmp = MatchCmp(AddRHS); if (!Cmp) return false; } - auto Pred = - static_cast(Cmp->getOperand(1).getPredicate()); - emitIntegerCompare(Cmp->getOperand(2), Cmp->getOperand(3), - Cmp->getOperand(1), MIB); - emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIB, X); + auto &PredOp = Cmp->getOperand(1); + auto Pred = static_cast(PredOp.getPredicate()); + const AArch64CC::CondCode InvCC = + changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred)); + MIB.setInstrAndDebugLoc(I); + emitIntegerCompare(/*LHS=*/Cmp->getOperand(2), + /*RHS=*/Cmp->getOperand(3), PredOp, MIB); + emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB); I.eraseFromParent(); return true; } @@ -2963,10 +2987,8 @@ // false, so to get the increment when it's true, we need to use the // inverse. In this case, we want to increment when carry is set. Register ZReg = AArch64::WZR; - auto CsetMI = MIB.buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()}, - {ZReg, ZReg}) - .addImm(getInvertedCondCode(OpAndCC.second)); - constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI); + emitCSINC(/*Dst=*/I.getOperand(1).getReg(), /*Src1=*/ZReg, /*Src2=*/ZReg, + getInvertedCondCode(OpAndCC.second), MIB); I.eraseFromParent(); return true; } @@ -3303,9 +3325,11 @@ } auto Pred = static_cast(I.getOperand(1).getPredicate()); - emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), - MIB); - emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIB); + const AArch64CC::CondCode InvCC = + changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred)); + emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB); + emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR, + /*Src2=*/AArch64::WZR, InvCC, MIB); I.eraseFromParent(); return true; } @@ -4451,25 +4475,19 @@ assert(!Ty.isVector() && Ty.getSizeInBits() == 32 && "Expected a 32-bit scalar register?"); #endif - const Register ZeroReg = AArch64::WZR; - auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) { - auto CSet = - MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg}) - .addImm(getInvertedCondCode(CC)); - constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI); - return &*CSet; - }; - + const Register ZReg = AArch64::WZR; AArch64CC::CondCode CC1, CC2; changeFCMPPredToAArch64CC(Pred, CC1, CC2); + auto InvCC1 = AArch64CC::getInvertedCondCode(CC1); if (CC2 == AArch64CC::AL) - return EmitCSet(Dst, CC1); - + return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, + MIRBuilder); const TargetRegisterClass *RC = &AArch64::GPR32RegClass; Register Def1Reg = MRI.createVirtualRegister(RC); Register Def2Reg = MRI.createVirtualRegister(RC); - EmitCSet(Def1Reg, CC1); - EmitCSet(Def2Reg, CC2); + auto InvCC2 = AArch64CC::getInvertedCondCode(CC2); + emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder); + emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder); auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg}); constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI); return &*OrMI; @@ -4578,16 +4596,25 @@ } MachineInstr * -AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred, - MachineIRBuilder &MIRBuilder, - Register SrcReg) const { - // CSINC increments the result when the predicate is false. Invert it. - const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC( - CmpInst::getInversePredicate((CmpInst::Predicate)Pred)); - auto I = MIRBuilder.buildInstr(AArch64::CSINCWr, {DefReg}, {SrcReg, SrcReg}) - .addImm(InvCC); - constrainSelectedInstRegOperands(*I, TII, TRI, RBI); - return &*I; +AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1, + Register Src2, AArch64CC::CondCode Pred, + MachineIRBuilder &MIRBuilder) const { + auto &MRI = *MIRBuilder.getMRI(); + const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst); + // If we used a register class, then this won't necessarily have an LLT. + // Compute the size based off whether or not we have a class or bank. + unsigned Size; + if (const auto *RC = RegClassOrBank.dyn_cast()) + Size = TRI.getRegSizeInBits(*RC); + else + Size = MRI.getType(Dst).getSizeInBits(); + // Some opcodes use s1. + assert(Size <= 64 && "Expected 64 bits or less only!"); + static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr}; + unsigned Opc = OpcTable[Size == 64]; + auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred); + constrainSelectedInstRegOperands(*CSINC, TII, TRI, RBI); + return &*CSINC; } std::pair diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir @@ -11,11 +11,12 @@ ; CHECK-LABEL: name: cmp_imm_32 ; CHECK: liveins: $w0 - ; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0 - ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 42, 0, implicit-def $nzcv - ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv - ; CHECK: $w0 = COPY [[CSINCWr]] - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0 + ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 42, 0, implicit-def $nzcv + ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv + ; CHECK-NEXT: $w0 = COPY [[CSINCWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %0:gpr(s32) = COPY $w0 %1:gpr(s32) = G_CONSTANT i32 42 %5:gpr(s32) = G_ICMP intpred(eq), %0(s32), %1 @@ -34,11 +35,12 @@ ; CHECK-LABEL: name: cmp_imm_64 ; CHECK: liveins: $x0 - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[SUBSXri:%[0-9]+]]:gpr64 = SUBSXri [[COPY]], 42, 0, implicit-def $nzcv - ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv - ; CHECK: $w0 = COPY [[CSINCWr]] - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: [[SUBSXri:%[0-9]+]]:gpr64 = SUBSXri [[COPY]], 42, 0, implicit-def $nzcv + ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv + ; CHECK-NEXT: $w0 = COPY [[CSINCWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %0:gpr(s64) = COPY $x0 %1:gpr(s64) = G_CONSTANT i64 42 %5:gpr(s32) = G_ICMP intpred(eq), %0(s64), %1 @@ -57,13 +59,14 @@ ; CHECK-LABEL: name: cmp_imm_out_of_range ; CHECK: liveins: $x0 - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 13132 - ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32 - ; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[SUBREG_TO_REG]], implicit-def $nzcv - ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv - ; CHECK: $w0 = COPY [[CSINCWr]] - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 13132 + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32 + ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[SUBREG_TO_REG]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv + ; CHECK-NEXT: $w0 = COPY [[CSINCWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %0:gpr(s64) = COPY $x0 %1:gpr(s64) = G_CONSTANT i64 13132 %5:gpr(s32) = G_ICMP intpred(eq), %0(s64), %1 @@ -81,11 +84,12 @@ liveins: $w0 ; CHECK-LABEL: name: cmp_imm_lookthrough ; CHECK: liveins: $w0 - ; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0 - ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 42, 0, implicit-def $nzcv - ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv - ; CHECK: $w0 = COPY [[CSINCWr]] - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0 + ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 42, 0, implicit-def $nzcv + ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv + ; CHECK-NEXT: $w0 = COPY [[CSINCWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %0:gpr(s32) = COPY $w0 %1:gpr(s64) = G_CONSTANT i64 42 %2:gpr(s32) = G_TRUNC %1(s64) @@ -104,11 +108,12 @@ liveins: $w0 ; CHECK-LABEL: name: cmp_imm_lookthrough_bad_trunc ; CHECK: liveins: $w0 - ; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0 - ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 0, 0, implicit-def $nzcv - ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv - ; CHECK: $w0 = COPY [[CSINCWr]] - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0 + ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 0, 0, implicit-def $nzcv + ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv + ; CHECK-NEXT: $w0 = COPY [[CSINCWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %0:gpr(s32) = COPY $w0 %1:gpr(s64) = G_CONSTANT i64 68719476736 ; 0x1000000000 %2:gpr(s32) = G_TRUNC %1(s64) ; Value truncates to 0 @@ -127,11 +132,12 @@ liveins: $w0 ; CHECK-LABEL: name: cmp_neg_imm_32 ; CHECK: liveins: $w0 - ; CHECK: %reg0:gpr32sp = COPY $w0 - ; CHECK: [[ADDSWri:%[0-9]+]]:gpr32 = ADDSWri %reg0, 10, 0, implicit-def $nzcv - ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv - ; CHECK: $w0 = COPY %cmp - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %reg0:gpr32sp = COPY $w0 + ; CHECK-NEXT: [[ADDSWri:%[0-9]+]]:gpr32 = ADDSWri %reg0, 10, 0, implicit-def $nzcv + ; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv + ; CHECK-NEXT: $w0 = COPY %cmp + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %reg0:gpr(s32) = COPY $w0 %cst:gpr(s32) = G_CONSTANT i32 -10 %cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s32), %cst @@ -149,11 +155,12 @@ liveins: $x0 ; CHECK-LABEL: name: cmp_neg_imm_64 ; CHECK: liveins: $x0 - ; CHECK: %reg0:gpr64sp = COPY $x0 - ; CHECK: [[ADDSXri:%[0-9]+]]:gpr64 = ADDSXri %reg0, 10, 0, implicit-def $nzcv - ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv - ; CHECK: $w0 = COPY %cmp - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %reg0:gpr64sp = COPY $x0 + ; CHECK-NEXT: [[ADDSXri:%[0-9]+]]:gpr64 = ADDSXri %reg0, 10, 0, implicit-def $nzcv + ; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv + ; CHECK-NEXT: $w0 = COPY %cmp + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %reg0:gpr(s64) = COPY $x0 %cst:gpr(s64) = G_CONSTANT i64 -10 %cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s64), %cst @@ -171,12 +178,13 @@ liveins: $w0 ; CHECK-LABEL: name: cmp_neg_imm_invalid ; CHECK: liveins: $w0 - ; CHECK: %reg0:gpr32 = COPY $w0 - ; CHECK: %cst:gpr32 = MOVi32imm -5000 - ; CHECK: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr %reg0, %cst, implicit-def $nzcv - ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv - ; CHECK: $w0 = COPY %cmp - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %reg0:gpr32 = COPY $w0 + ; CHECK-NEXT: %cst:gpr32 = MOVi32imm -5000 + ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr %reg0, %cst, implicit-def $nzcv + ; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv + ; CHECK-NEXT: $w0 = COPY %cmp + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %reg0:gpr(s32) = COPY $w0 %cst:gpr(s32) = G_CONSTANT i32 -5000 %cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s32), %cst @@ -194,12 +202,13 @@ ; CHECK-LABEL: name: cmp_arith_extended_s64 ; CHECK: liveins: $w0, $x1 - ; CHECK: %reg0:gpr32 = COPY $w0 - ; CHECK: %reg1:gpr64sp = COPY $x1 - ; CHECK: [[SUBSXrx:%[0-9]+]]:gpr64 = SUBSXrx %reg1, %reg0, 18, implicit-def $nzcv - ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv - ; CHECK: $w0 = COPY %cmp - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %reg0:gpr32 = COPY $w0 + ; CHECK-NEXT: %reg1:gpr64sp = COPY $x1 + ; CHECK-NEXT: [[SUBSXrx:%[0-9]+]]:gpr64 = SUBSXrx %reg1, %reg0, 18, implicit-def $nzcv + ; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv + ; CHECK-NEXT: $w0 = COPY %cmp + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %reg0:gpr(s32) = COPY $w0 %reg1:gpr(s64) = COPY $x1 %ext:gpr(s64) = G_ZEXT %reg0(s32) @@ -221,14 +230,15 @@ ; CHECK-LABEL: name: cmp_arith_extended_s32 ; CHECK: liveins: $w0, $w1, $h0 - ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, $h0, %subreg.hsub - ; CHECK: %reg0:gpr32all = COPY [[SUBREG_TO_REG]] - ; CHECK: %reg1:gpr32sp = COPY $w1 - ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %reg0 - ; CHECK: [[SUBSWrx:%[0-9]+]]:gpr32 = SUBSWrx %reg1, [[COPY]], 10, implicit-def $nzcv - ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv - ; CHECK: $w0 = COPY %cmp - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, $h0, %subreg.hsub + ; CHECK-NEXT: %reg0:gpr32all = COPY [[SUBREG_TO_REG]] + ; CHECK-NEXT: %reg1:gpr32sp = COPY $w1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY %reg0 + ; CHECK-NEXT: [[SUBSWrx:%[0-9]+]]:gpr32 = SUBSWrx %reg1, [[COPY]], 10, implicit-def $nzcv + ; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv + ; CHECK-NEXT: $w0 = COPY %cmp + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %reg0:gpr(s16) = COPY $h0 %reg1:gpr(s32) = COPY $w1 %ext:gpr(s32) = G_ZEXT %reg0(s16) @@ -252,14 +262,15 @@ ; CHECK-LABEL: name: cmp_arith_extended_shl_too_large ; CHECK: liveins: $w0, $x1 - ; CHECK: %reg0:gpr32 = COPY $w0 - ; CHECK: %reg1:gpr64 = COPY $x1 - ; CHECK: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %reg0, 0 - ; CHECK: %ext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32 - ; CHECK: [[SUBSXrs:%[0-9]+]]:gpr64 = SUBSXrs %reg1, %ext, 5, implicit-def $nzcv - ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv - ; CHECK: $w0 = COPY %cmp - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %reg0:gpr32 = COPY $w0 + ; CHECK-NEXT: %reg1:gpr64 = COPY $x1 + ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %reg0, 0 + ; CHECK-NEXT: %ext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32 + ; CHECK-NEXT: [[SUBSXrs:%[0-9]+]]:gpr64 = SUBSXrs %reg1, %ext, 5, implicit-def $nzcv + ; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv + ; CHECK-NEXT: $w0 = COPY %cmp + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %reg0:gpr(s32) = COPY $w0 %reg1:gpr(s64) = COPY $x1 %ext:gpr(s64) = G_ZEXT %reg0(s32) @@ -284,13 +295,14 @@ ; CHECK-LABEL: name: cmp_add_rhs ; CHECK: liveins: $w0, $w1, $w2 - ; CHECK: %cmp_lhs:gpr32 = COPY $w0 - ; CHECK: %cmp_rhs:gpr32 = COPY $w1 - ; CHECK: %add_rhs:gpr32 = COPY $w2 - ; CHECK: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv - ; CHECK: %add:gpr32 = CSINCWr %add_rhs, %add_rhs, 1, implicit $nzcv - ; CHECK: $w0 = COPY %add - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %cmp_lhs:gpr32 = COPY $w0 + ; CHECK-NEXT: %cmp_rhs:gpr32 = COPY $w1 + ; CHECK-NEXT: %add_rhs:gpr32 = COPY $w2 + ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv + ; CHECK-NEXT: %add:gpr32 = CSINCWr %add_rhs, %add_rhs, 1, implicit $nzcv + ; CHECK-NEXT: $w0 = COPY %add + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %cmp_lhs:gpr(s32) = COPY $w0 %cmp_rhs:gpr(s32) = COPY $w1 %add_rhs:gpr(s32) = COPY $w2 @@ -314,13 +326,14 @@ ; CHECK-LABEL: name: cmp_add_lhs ; CHECK: liveins: $w0, $w1, $w2 - ; CHECK: %cmp_lhs:gpr32 = COPY $w0 - ; CHECK: %cmp_rhs:gpr32 = COPY $w1 - ; CHECK: %add_lhs:gpr32 = COPY $w2 - ; CHECK: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv - ; CHECK: %add:gpr32 = CSINCWr %add_lhs, %add_lhs, 1, implicit $nzcv - ; CHECK: $w0 = COPY %add - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %cmp_lhs:gpr32 = COPY $w0 + ; CHECK-NEXT: %cmp_rhs:gpr32 = COPY $w1 + ; CHECK-NEXT: %add_lhs:gpr32 = COPY $w2 + ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv + ; CHECK-NEXT: %add:gpr32 = CSINCWr %add_lhs, %add_lhs, 1, implicit $nzcv + ; CHECK-NEXT: $w0 = COPY %add + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %cmp_lhs:gpr(s32) = COPY $w0 %cmp_rhs:gpr(s32) = COPY $w1 %add_lhs:gpr(s32) = COPY $w2 @@ -344,13 +357,14 @@ ; CHECK-LABEL: name: cmp_add_lhs_vector ; CHECK: liveins: $q0, $q1, $q2 - ; CHECK: %cmp_lhs:fpr128 = COPY $q0 - ; CHECK: %cmp_rhs:fpr128 = COPY $q1 - ; CHECK: %add_lhs:fpr128 = COPY $q2 - ; CHECK: [[CMEQv4i32_:%[0-9]+]]:fpr128 = CMEQv4i32 %cmp_lhs, %cmp_rhs - ; CHECK: %add:fpr128 = ADDv4i32 %add_lhs, [[CMEQv4i32_]] - ; CHECK: $q0 = COPY %add - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %cmp_lhs:fpr128 = COPY $q0 + ; CHECK-NEXT: %cmp_rhs:fpr128 = COPY $q1 + ; CHECK-NEXT: %add_lhs:fpr128 = COPY $q2 + ; CHECK-NEXT: [[CMEQv4i32_:%[0-9]+]]:fpr128 = CMEQv4i32 %cmp_lhs, %cmp_rhs + ; CHECK-NEXT: %add:fpr128 = ADDv4i32 %add_lhs, [[CMEQv4i32_]] + ; CHECK-NEXT: $q0 = COPY %add + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %cmp_lhs:fpr(<4 x s32>) = COPY $q0 %cmp_rhs:fpr(<4 x s32>) = COPY $q1 %add_lhs:fpr(<4 x s32>) = COPY $q2 @@ -358,3 +372,108 @@ %add:fpr(<4 x s32>) = G_ADD %add_lhs, %cmp $q0 = COPY %add(<4 x s32>) RET_ReallyLR implicit $q0 + +... +--- +name: cmp_add_rhs_64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1, $x2 + + ; The CSINC should use the add's RHS. + ; CHECK-LABEL: name: cmp_add_rhs_64 + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %cmp_lhs:gpr64 = COPY $x0 + ; CHECK-NEXT: %cmp_rhs:gpr64 = COPY $x1 + ; CHECK-NEXT: %add_rhs:gpr64 = COPY $x2 + ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv + ; CHECK-NEXT: %add:gpr64 = CSINCXr %add_rhs, %add_rhs, 1, implicit $nzcv + ; CHECK-NEXT: $x0 = COPY %add + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %cmp_lhs:gpr(s64) = COPY $x0 + %cmp_rhs:gpr(s64) = COPY $x1 + %add_rhs:gpr(s64) = COPY $x2 + %cmp:gpr(s32) = G_ICMP intpred(eq), %cmp_lhs(s64), %cmp_rhs + %cmp_ext:gpr(s64) = G_ZEXT %cmp + %add:gpr(s64) = G_ADD %cmp_ext, %add_rhs + $x0 = COPY %add(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: cmp_add_rhs_64_zext_multi_use +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1, $x2 + + ; The ZExt is used more than once so don't fold. + ; CHECK-LABEL: name: cmp_add_rhs_64_zext_multi_use + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %cmp_lhs:gpr64 = COPY $x0 + ; CHECK-NEXT: %cmp_rhs:gpr64 = COPY $x1 + ; CHECK-NEXT: %add_rhs:gpr64 = COPY $x2 + ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv + ; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv + ; CHECK-NEXT: %cmp_ext:gpr64 = SUBREG_TO_REG 0, %cmp, %subreg.sub_32 + ; CHECK-NEXT: %add:gpr64 = ADDXrr %cmp_ext, %add_rhs + ; CHECK-NEXT: %or:gpr64 = ORRXrr %add, %cmp_ext + ; CHECK-NEXT: $x0 = COPY %or + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %cmp_lhs:gpr(s64) = COPY $x0 + %cmp_rhs:gpr(s64) = COPY $x1 + %add_rhs:gpr(s64) = COPY $x2 + %cmp:gpr(s32) = G_ICMP intpred(eq), %cmp_lhs(s64), %cmp_rhs + %cmp_ext:gpr(s64) = G_ZEXT %cmp + %add:gpr(s64) = G_ADD %cmp_ext, %add_rhs + %or:gpr(s64) = G_OR %add, %cmp_ext + $x0 = COPY %or(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: cmp_add_rhs_64_cmp_multi_use +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1, $x2 + + ; The cmp is used more than once so don't fold. + ; CHECK-LABEL: name: cmp_add_rhs_64_cmp_multi_use + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %cmp_lhs:gpr64 = COPY $x0 + ; CHECK-NEXT: %cmp_rhs:gpr64 = COPY $x1 + ; CHECK-NEXT: %add_rhs:gpr64 = COPY $x2 + ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv + ; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv + ; CHECK-NEXT: %cmp_ext:gpr64 = SUBREG_TO_REG 0, %cmp, %subreg.sub_32 + ; CHECK-NEXT: %add:gpr64 = ADDXrr %cmp_ext, %add_rhs + ; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], %cmp, %subreg.sub_32 + ; CHECK-NEXT: %cmp_ext2:gpr64 = SBFMXri [[INSERT_SUBREG]], 0, 31 + ; CHECK-NEXT: %or:gpr64 = ORRXrr %add, %cmp_ext2 + ; CHECK-NEXT: $x0 = COPY %or + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %cmp_lhs:gpr(s64) = COPY $x0 + %cmp_rhs:gpr(s64) = COPY $x1 + %add_rhs:gpr(s64) = COPY $x2 + %cmp:gpr(s32) = G_ICMP intpred(eq), %cmp_lhs(s64), %cmp_rhs + %cmp_ext:gpr(s64) = G_ZEXT %cmp + %add:gpr(s64) = G_ADD %cmp_ext, %add_rhs + %cmp_ext2:gpr(s64) = G_SEXT %cmp + %or:gpr(s64) = G_OR %add, %cmp_ext2 + $x0 = COPY %or(s64) + RET_ReallyLR implicit $x0