Index: llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -152,6 +152,10 @@ MachineInstr *emitVectorConcat(Optional Dst, Register Op1, Register Op2, MachineIRBuilder &MIRBuilder) const; + + // Emit an integer compare between LHS and RHS, which checks for Predicate. + // + // This may update Predicate when emitting the compare. MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, MachineIRBuilder &MIRBuilder) const; @@ -300,6 +304,10 @@ MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, MachineIRBuilder &MIRBuilder) const; + MachineInstr *tryOptArithImmedIntegerCompare(MachineOperand &LHS, + MachineOperand &RHS, + MachineOperand &Predicate, + MachineIRBuilder &MIB) const; /// Return true if \p MI is a load or store of \p NumBytes bytes. bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const; @@ -460,6 +468,27 @@ } } +static Optional getImmedFromMO(const MachineOperand &Root) { + auto &MI = *Root.getParent(); + auto &MBB = *MI.getParent(); + auto &MF = *MBB.getParent(); + auto &MRI = MF.getRegInfo(); + uint64_t Immed; + if (Root.isImm()) + Immed = Root.getImm(); + else if (Root.isCImm()) + Immed = Root.getCImm()->getZExtValue(); + else if (Root.isReg()) { + auto ValAndVReg = + getConstantVRegValWithLookThrough(Root.getReg(), MRI, true); + if (!ValAndVReg) + return None; + Immed = ValAndVReg->Value; + } else + return None; + return Immed; +} + /// Check whether \p I is a currently unsupported binary operation: /// - it has an unsized type /// - an operand is not a vreg @@ -1282,7 +1311,9 @@ if (!emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3), CCMI->getOperand(1), MIB)) return false; - const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(Pred); + // Predicate may have been updated by emitIntegerCompare. + auto NewPred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate(); + const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(NewPred); MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB); I.eraseFromParent(); return true; @@ -3676,19 +3707,12 @@ } // Try to match immediate forms. - auto ImmFns = selectArithImmed(RHS); - if (ImmFns) - CmpOpc = CmpOpc == AArch64::SUBSWrr ? AArch64::SUBSWri : AArch64::SUBSXri; - - auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addDef(ZReg).addUse(LHS.getReg()); - // If we matched a valid constant immediate, add those operands. - if (ImmFns) { - for (auto &RenderFn : *ImmFns) - RenderFn(CmpMI); - } else { - CmpMI.addUse(RHS.getReg()); - } - + MachineInstr *ImmedCmp = + tryOptArithImmedIntegerCompare(LHS, RHS, Predicate, MIRBuilder); + if (ImmedCmp) + return ImmedCmp; + auto CmpMI = + MIRBuilder.buildInstr(CmpOpc, {ZReg}, {LHS.getReg(), RHS.getReg()}); // Make sure that we can constrain the compare that we emitted. constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); return &*CmpMI; @@ -3863,13 +3887,16 @@ AArch64CC::CondCode CondCode; if (CondOpc == TargetOpcode::G_ICMP) { - CondCode = changeICMPPredToAArch64CC( - (CmpInst::Predicate)CondDef->getOperand(1).getPredicate()); if (!emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3), CondDef->getOperand(1), MIB)) { LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n"); return false; } + + // Have to collect the CondCode after emitIntegerCompare, since it can + // update the predicate. + CondCode = changeICMPPredToAArch64CC( + (CmpInst::Predicate)CondDef->getOperand(1).getPredicate()); } else { // Get the condition code for the select. AArch64CC::CondCode CondCode2; @@ -3999,6 +4026,119 @@ return nullptr; } +MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare( + MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, + MachineIRBuilder &MIB) const { + // Attempt to select the immediate form of an integer compare. + MachineRegisterInfo &MRI = *MIB.getMRI(); + auto Ty = MRI.getType(LHS.getReg()); + assert(!Ty.isVector() && "Expected scalar or pointer only?"); + unsigned Size = Ty.getSizeInBits(); + assert((Size == 32 || Size == 64) && + "Expected 32 bit or 64 bit compare only?"); + auto P = (CmpInst::Predicate)Predicate.getPredicate(); + + // Check if this is a case we can already handle. + InstructionSelector::ComplexRendererFns ImmFns; + ImmFns = selectArithImmed(RHS); + + if (!ImmFns) { + // We didn't get a rendering function, but we may still have a constant. + auto MaybeImmed = getImmedFromMO(RHS); + if (!MaybeImmed) + return nullptr; + + // We have a constant, but it doesn't fit. Try adjusting it by one and + // updating the predicate if possible. + uint64_t C = *MaybeImmed; + switch (P) { + default: + return nullptr; + case CmpInst::ICMP_SLT: + case CmpInst::ICMP_SGE: + // Check for + // + // x slt c => x sle c - 1 + // x sge c => x sgt c - 1 + // + // When c is not the smallest possible negative number. + if ((Size == 64 && static_cast(C) == INT64_MIN) || + (Size == 32 && static_cast(C) == INT32_MIN)) + return nullptr; + P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT; + C -= 1; + break; + case CmpInst::ICMP_ULT: + case CmpInst::ICMP_UGE: + // Check for + // + // x ult c => x ule c - 1 + // x uge c => x ugt c - 1 + // + // When c is not zero. + if (C == 0) + return nullptr; + P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT; + C -= 1; + break; + case CmpInst::ICMP_SLE: + case CmpInst::ICMP_SGT: + // Check for + // + // x sle c => x slt c + 1 + // x sgt c => s sge c + 1 + // + // When c is not the largest possible signed integer. + if ((Size == 32 && static_cast(C) == INT32_MAX) || + (Size == 64 && static_cast(C) == INT64_MAX)) + return nullptr; + P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE; + C += 1; + break; + case CmpInst::ICMP_ULE: + case CmpInst::ICMP_UGT: + // Check for + // + // x ule c => x ult c + 1 + // x ugt c => s uge c + 1 + // + // When c is not the largest possible unsigned integer. + if ((Size == 32 && static_cast(C) == UINT32_MAX) || + (Size == 64 && C == UINT64_MAX)) + return nullptr; + P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE; + C += 1; + break; + } + + // Check if the new constant is valid. + if (Size == 32) + C = static_cast(C); + ImmFns = select12BitValueWithLeftShift(C); + if (!ImmFns) + return nullptr; + Predicate.setPredicate(P); + } + + // At this point, we know we can select an immediate form. Go ahead and do + // that. + Register ZReg; + unsigned Opc; + if (Size == 32) { + ZReg = AArch64::WZR; + Opc = AArch64::SUBSWri; + } else { + ZReg = AArch64::XZR; + Opc = AArch64::SUBSXri; + } + + auto CmpMI = MIB.buildInstr(Opc, {ZReg}, {LHS.getReg()}); + for (auto &RenderFn : *ImmFns) + RenderFn(CmpMI); + constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); + return &*CmpMI; +} + bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const { // Try to match a vector splat operation into a dup instruction. // We're looking for this pattern: @@ -4521,27 +4661,6 @@ return false; } -static Optional getImmedFromMO(const MachineOperand &Root) { - auto &MI = *Root.getParent(); - auto &MBB = *MI.getParent(); - auto &MF = *MBB.getParent(); - auto &MRI = MF.getRegInfo(); - uint64_t Immed; - if (Root.isImm()) - Immed = Root.getImm(); - else if (Root.isCImm()) - Immed = Root.getCImm()->getZExtValue(); - else if (Root.isReg()) { - auto ValAndVReg = - getConstantVRegValWithLookThrough(Root.getReg(), MRI, true); - if (!ValAndVReg) - return None; - Immed = ValAndVReg->Value; - } else - return None; - return Immed; -} - InstructionSelector::ComplexRendererFns AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const { auto MaybeImmed = getImmedFromMO(Root); Index: llvm/test/CodeGen/AArch64/GlobalISel/fold-select.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/fold-select.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/fold-select.mir @@ -21,9 +21,9 @@ ; CHECK: liveins: $w0, $w1 ; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0 ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 - ; CHECK: [[MOVwzr:%[0-9]+]]:gpr32 = COPY $wzr + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY $wzr ; CHECK: $wzr = SUBSWri [[COPY]], 0, 0, implicit-def $nzcv - ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[MOVwzr]], [[COPY1]], 0, implicit $nzcv + ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[COPY2]], [[COPY1]], 0, implicit $nzcv ; CHECK: $w0 = COPY [[CSELWr]] ; CHECK: RET_ReallyLR implicit $w0 %0:gpr(s32) = COPY $w0 @@ -50,9 +50,9 @@ ; CHECK: liveins: $s0, $w0, $w1 ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s0 - ; CHECK: [[MOVwzr:%[0-9]+]]:gpr32 = COPY $wzr + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY $wzr ; CHECK: FCMPSri [[COPY1]], implicit-def $nzcv - ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[MOVwzr]], [[COPY]], 0, implicit $nzcv + ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[COPY2]], [[COPY]], 0, implicit $nzcv ; CHECK: $w0 = COPY [[CSELWr]] ; CHECK: RET_ReallyLR implicit $w0 %1:gpr(s32) = COPY $w1 @@ -66,3 +66,34 @@ RET_ReallyLR implicit $w0 ... +--- +name: check_update_predicate +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; The G_ICMP is optimized here to be a slt comparison by adding 1 to the + ; constant. So, the CSELWr should use the predicate code 11, rather than + ; 13. + + ; CHECK-LABEL: name: check_update_predicate + ; CHECK: liveins: $w0, $w1 + ; CHECK: %copy1:gpr32sp = COPY $w0 + ; CHECK: %copy2:gpr32 = COPY $w1 + ; CHECK: %cst:gpr32 = MOVi32imm -1 + ; CHECK: $wzr = SUBSWri %copy1, 0, 0, implicit-def $nzcv + ; CHECK: %select:gpr32 = CSELWr %cst, %copy2, 11, implicit $nzcv + ; CHECK: $w0 = COPY %select + ; CHECK: RET_ReallyLR implicit $w0 + %copy1:gpr(s32) = COPY $w0 + %copy2:gpr(s32) = COPY $w1 + %cst:gpr(s32) = G_CONSTANT i32 -1 + %cmp:gpr(s32) = G_ICMP intpred(sle), %copy1(s32), %cst + %trunc:gpr(s1) = G_TRUNC %cmp(s32) + %select:gpr(s32) = G_SELECT %trunc(s1), %cst, %copy2 + $w0 = COPY %select(s32) + RET_ReallyLR implicit $w0 +... Index: llvm/test/CodeGen/AArch64/GlobalISel/select-arith-immed-compare.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/select-arith-immed-compare.mir @@ -0,0 +1,630 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +name: slt_to_sle_s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + + ; x slt c => x sle c - 1 + ; + ; We should not have a MOV here. We can subtract 1 from the constant and + ; change the condition code. + ; + ; log_2(4096) == 12, so we can represent this as a 12 bit value with a + ; left shift. + + ; CHECK-LABEL: name: slt_to_sle_s32 + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0 + ; CHECK: $wzr = SUBSWri [[COPY]], 1, 12, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv + ; CHECK: [[ANDWri:%[0-9]+]]:gpr32sp = ANDWri [[CSINCWr]], 0 + ; CHECK: $w0 = COPY [[ANDWri]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = G_CONSTANT i32 4097 + %4:gpr(s32) = G_ICMP intpred(slt), %0(s32), %1 + %5:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32) = G_AND %4, %5 + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: slt_to_sle_s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; x slt c => x sle c - 1 + ; + ; We should not have a MOV here. We can subtract 1 from the constant and + ; change the condition code. + ; + ; log_2(4096) == 12, so we can represent this as a 12 bit value with a + ; left shift. + + ; CHECK-LABEL: name: slt_to_sle_s64 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: $xzr = SUBSXri [[COPY]], 1, 12, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv + ; CHECK: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], [[CSINCWr]], %subreg.sub_32 + ; CHECK: [[ANDXri:%[0-9]+]]:gpr64sp = ANDXri [[INSERT_SUBREG]], 4096 + ; CHECK: $x0 = COPY [[ANDXri]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 4097 + %4:gpr(s32) = G_ICMP intpred(slt), %0(s64), %1 + %6:gpr(s64) = G_ANYEXT %4(s32) + %5:gpr(s64) = G_CONSTANT i64 1 + %3:gpr(s64) = G_AND %6, %5 + $x0 = COPY %3(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: sge_to_sgt_s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + + ; x sge c => x sgt c - 1 + ; + ; We should not have a MOV here. We can subtract 1 from the constant and + ; change the condition code. + ; + ; log_2(4096) == 12, so we can represent this as a 12 bit value with a + ; left shift. + + ; CHECK-LABEL: name: sge_to_sgt_s32 + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0 + ; CHECK: $wzr = SUBSWri [[COPY]], 1, 12, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv + ; CHECK: [[ANDWri:%[0-9]+]]:gpr32sp = ANDWri [[CSINCWr]], 0 + ; CHECK: $w0 = COPY [[ANDWri]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = G_CONSTANT i32 4097 + %4:gpr(s32) = G_ICMP intpred(sge), %0(s32), %1 + %5:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32) = G_AND %4, %5 + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: sge_to_sgt_s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; x sge c => x sgt c - 1 + ; + ; We should not have a MOV here. We can subtract 1 from the constant and + ; change the condition code. + ; + ; log_2(4096) == 12, so we can represent this as a 12 bit value with a + ; left shift. + + ; CHECK-LABEL: name: sge_to_sgt_s64 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: $xzr = SUBSXri [[COPY]], 1, 12, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv + ; CHECK: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], [[CSINCWr]], %subreg.sub_32 + ; CHECK: [[ANDXri:%[0-9]+]]:gpr64sp = ANDXri [[INSERT_SUBREG]], 4096 + ; CHECK: $x0 = COPY [[ANDXri]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 4097 + %4:gpr(s32) = G_ICMP intpred(sge), %0(s64), %1 + %6:gpr(s64) = G_ANYEXT %4(s32) + %5:gpr(s64) = G_CONSTANT i64 1 + %3:gpr(s64) = G_AND %6, %5 + $x0 = COPY %3(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: ult_to_ule_s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + + ; x ult c => x ule c - 1 + ; + ; We should not have a MOV here. We can subtract 1 from the constant and + ; change the condition code. + ; + ; log_2(4096) == 12, so we can represent this as a 12 bit value with a + ; left shift. + + ; CHECK-LABEL: name: ult_to_ule_s32 + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0 + ; CHECK: $wzr = SUBSWri [[COPY]], 1, 12, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 8, implicit $nzcv + ; CHECK: [[ANDWri:%[0-9]+]]:gpr32sp = ANDWri [[CSINCWr]], 0 + ; CHECK: $w0 = COPY [[ANDWri]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = G_CONSTANT i32 4097 + %4:gpr(s32) = G_ICMP intpred(ult), %0(s32), %1 + %5:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32) = G_AND %4, %5 + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: ult_to_ule_s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; x ult c => x ule c - 1 + ; + ; We should not have a MOV here. We can subtract 1 from the constant and + ; change the condition code. + ; + ; log_2(4096) == 12, so we can represent this as a 12 bit value with a + ; left shift. + + ; CHECK-LABEL: name: ult_to_ule_s64 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: $xzr = SUBSXri [[COPY]], 1, 12, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 8, implicit $nzcv + ; CHECK: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], [[CSINCWr]], %subreg.sub_32 + ; CHECK: [[ANDXri:%[0-9]+]]:gpr64sp = ANDXri [[INSERT_SUBREG]], 4096 + ; CHECK: $x0 = COPY [[ANDXri]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 4097 + %4:gpr(s32) = G_ICMP intpred(ult), %0(s64), %1 + %6:gpr(s64) = G_ANYEXT %4(s32) + %5:gpr(s64) = G_CONSTANT i64 1 + %3:gpr(s64) = G_AND %6, %5 + $x0 = COPY %3(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: uge_to_ugt_s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + + ; x uge c => x ugt c - 1 + ; + ; We should not have a MOV here. We can subtract 1 from the constant and + ; change the condition code. + ; + ; log_2(4096) == 12, so we can represent this as a 12 bit value with a + ; left shift. + + ; CHECK-LABEL: name: uge_to_ugt_s32 + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0 + ; CHECK: $wzr = SUBSWri [[COPY]], 1, 12, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv + ; CHECK: [[ANDWri:%[0-9]+]]:gpr32sp = ANDWri [[CSINCWr]], 0 + ; CHECK: $w0 = COPY [[ANDWri]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = G_CONSTANT i32 4097 + %4:gpr(s32) = G_ICMP intpred(uge), %0(s32), %1 + %5:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32) = G_AND %4, %5 + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: uge_to_ugt_s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; x uge c => x ugt c - 1 + ; + ; We should not have a MOV here. We can subtract 1 from the constant and + ; change the condition code. + ; + ; log_2(4096) == 12, so we can represent this as a 12 bit value with a + ; left shift. + + ; CHECK-LABEL: name: uge_to_ugt_s64 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: $xzr = SUBSXri [[COPY]], 1, 12, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv + ; CHECK: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], [[CSINCWr]], %subreg.sub_32 + ; CHECK: [[ANDXri:%[0-9]+]]:gpr64sp = ANDXri [[INSERT_SUBREG]], 4096 + ; CHECK: $x0 = COPY [[ANDXri]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 4097 + %4:gpr(s32) = G_ICMP intpred(uge), %0(s64), %1 + %6:gpr(s64) = G_ANYEXT %4(s32) + %5:gpr(s64) = G_CONSTANT i64 1 + %3:gpr(s64) = G_AND %6, %5 + $x0 = COPY %3(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: sle_to_slt_s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + + ; x sle c => x slt c + 1 + ; + ; We should not have a MOV here. We can add 1 to the constant and change + ; the condition code. + ; + ; log_2(8192) == 13, so we can represent this as a 12 bit value with a + ; left shift. + ; + ; (We can't use 4095 here, because that's a legal arithmetic immediate.) + + ; CHECK-LABEL: name: sle_to_slt_s32 + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0 + ; CHECK: $wzr = SUBSWri [[COPY]], 2, 12, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv + ; CHECK: [[ANDWri:%[0-9]+]]:gpr32sp = ANDWri [[CSINCWr]], 0 + ; CHECK: $w0 = COPY [[ANDWri]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = G_CONSTANT i32 8191 + %4:gpr(s32) = G_ICMP intpred(sle), %0(s32), %1 + %5:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32) = G_AND %4, %5 + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: sle_to_slt_s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; x sle c => x slt c + 1 + ; + ; We should not have a MOV here. We can add 1 to the constant and change + ; the condition code. + ; + ; log_2(8192) == 13, so we can represent this as a 12 bit value with a + ; left shift. + + ; CHECK-LABEL: name: sle_to_slt_s64 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: $xzr = SUBSXri [[COPY]], 2, 12, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv + ; CHECK: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], [[CSINCWr]], %subreg.sub_32 + ; CHECK: [[ANDXri:%[0-9]+]]:gpr64sp = ANDXri [[INSERT_SUBREG]], 4096 + ; CHECK: $x0 = COPY [[ANDXri]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 8191 + %4:gpr(s32) = G_ICMP intpred(sle), %0(s64), %1 + %6:gpr(s64) = G_ANYEXT %4(s32) + %5:gpr(s64) = G_CONSTANT i64 1 + %3:gpr(s64) = G_AND %6, %5 + $x0 = COPY %3(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: sgt_to_sge_s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + + ; x sgt c => s sge c + 1 + ; + ; We should not have a MOV here. We can add 1 to the constant and change + ; the condition code. + ; + ; log_2(8192) == 13, so we can represent this as a 12 bit value with a + ; left shift. + + ; CHECK-LABEL: name: sgt_to_sge_s32 + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0 + ; CHECK: $wzr = SUBSWri [[COPY]], 2, 12, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 11, implicit $nzcv + ; CHECK: [[ANDWri:%[0-9]+]]:gpr32sp = ANDWri [[CSINCWr]], 0 + ; CHECK: $w0 = COPY [[ANDWri]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = G_CONSTANT i32 8191 + %4:gpr(s32) = G_ICMP intpred(sgt), %0(s32), %1 + %5:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32) = G_AND %4, %5 + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: sgt_to_sge_s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; x sgt c => s sge c + 1 + ; + ; We should not have a MOV here. We can add 1 to the constant and change + ; the condition code. + ; + ; log_2(8192) == 13, so we can represent this as a 12 bit value with a + ; left shift. + + ; CHECK-LABEL: name: sgt_to_sge_s64 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: $xzr = SUBSXri [[COPY]], 2, 12, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 11, implicit $nzcv + ; CHECK: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], [[CSINCWr]], %subreg.sub_32 + ; CHECK: [[ANDXri:%[0-9]+]]:gpr64sp = ANDXri [[INSERT_SUBREG]], 4096 + ; CHECK: $x0 = COPY [[ANDXri]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 8191 + %4:gpr(s32) = G_ICMP intpred(sgt), %0(s64), %1 + %6:gpr(s64) = G_ANYEXT %4(s32) + %5:gpr(s64) = G_CONSTANT i64 1 + %3:gpr(s64) = G_AND %6, %5 + $x0 = COPY %3(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: no_opt_int32_min +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + + ; This one should contain a MOV. + ; + ; If we subtract 1 from the constant, it will wrap around, and so it's not + ; true that + ; + ; x slt c => x sle c - 1 + ; x sge c => x sgt c - 1 + + ; CHECK-LABEL: name: no_opt_int32_min + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm -2147483648 + ; CHECK: $wzr = SUBSWrr [[COPY]], [[MOVi32imm]], implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv + ; CHECK: [[ANDWri:%[0-9]+]]:gpr32sp = ANDWri [[CSINCWr]], 0 + ; CHECK: $w0 = COPY [[ANDWri]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = G_CONSTANT i32 -2147483648 + %4:gpr(s32) = G_ICMP intpred(slt), %0(s32), %1 + %5:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32) = G_AND %4, %5 + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: no_opt_int64_min +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; This one should contain a MOV. + ; + ; If we subtract 1 from the constant, it will wrap around, and so it's not + ; true that + ; + ; x slt c => x sle c - 1 + ; x sge c => x sgt c - 1 + + ; CHECK-LABEL: name: no_opt_int64_min + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[MOVi64imm:%[0-9]+]]:gpr64 = MOVi64imm -9223372036854775808 + ; CHECK: $xzr = SUBSXrr [[COPY]], [[MOVi64imm]], implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv + ; CHECK: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], [[CSINCWr]], %subreg.sub_32 + ; CHECK: [[ANDXri:%[0-9]+]]:gpr64sp = ANDXri [[INSERT_SUBREG]], 4096 + ; CHECK: $x0 = COPY [[ANDXri]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 -9223372036854775808 + %4:gpr(s32) = G_ICMP intpred(slt), %0(s64), %1 + %6:gpr(s64) = G_ANYEXT %4(s32) + %5:gpr(s64) = G_CONSTANT i64 1 + %3:gpr(s64) = G_AND %6, %5 + $x0 = COPY %3(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: no_opt_int32_max +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + + ; This one should contain a MOV. + ; + ; If we add 1 to the constant, it will wrap around, and so it's not true + ; that + ; + ; x slt c => x sle c - 1 + ; x sge c => x sgt c - 1 + + ; CHECK-LABEL: name: no_opt_int32_max + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 2147483647 + ; CHECK: $wzr = SUBSWrr [[COPY]], [[MOVi32imm]], implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv + ; CHECK: [[ANDWri:%[0-9]+]]:gpr32sp = ANDWri [[CSINCWr]], 0 + ; CHECK: $w0 = COPY [[ANDWri]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = G_CONSTANT i32 2147483647 + %4:gpr(s32) = G_ICMP intpred(sle), %0(s32), %1 + %5:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32) = G_AND %4, %5 + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: no_opt_int64_max +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; This one should contain a MOV. + ; + ; If we add 1 to the constant, it will wrap around, and so it's not true + ; that + ; + ; x slt c => x sle c - 1 + ; x sge c => x sgt c - 1 + + + ; CHECK-LABEL: name: no_opt_int64_max + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[MOVi64imm:%[0-9]+]]:gpr64 = MOVi64imm 9223372036854775807 + ; CHECK: $xzr = SUBSXrr [[COPY]], [[MOVi64imm]], implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv + ; CHECK: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], [[CSINCWr]], %subreg.sub_32 + ; CHECK: [[ANDXri:%[0-9]+]]:gpr64sp = ANDXri [[INSERT_SUBREG]], 4096 + ; CHECK: $x0 = COPY [[ANDXri]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 9223372036854775807 + %4:gpr(s32) = G_ICMP intpred(sle), %0(s64), %1 + %6:gpr(s64) = G_ANYEXT %4(s32) + %5:gpr(s64) = G_CONSTANT i64 1 + %3:gpr(s64) = G_AND %6, %5 + $x0 = COPY %3(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: no_opt_zero +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; This one should contain a MOV. + ; + ; This is an unsigned comparison, so when the constant is 0, the following + ; does not hold: + ; + ; x slt c => x sle c - 1 + ; x sge c => x sgt c - 1 + + ; CHECK-LABEL: name: no_opt_zero + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: $xzr = SUBSXri [[COPY]], 0, 0, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv + ; CHECK: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], [[CSINCWr]], %subreg.sub_32 + ; CHECK: [[ANDXri:%[0-9]+]]:gpr64sp = ANDXri [[INSERT_SUBREG]], 4096 + ; CHECK: $x0 = COPY [[ANDXri]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 0 + %4:gpr(s32) = G_ICMP intpred(ult), %0(s64), %1 + %6:gpr(s64) = G_ANYEXT %4(s32) + %5:gpr(s64) = G_CONSTANT i64 1 + %3:gpr(s64) = G_AND %6, %5 + $x0 = COPY %3(s64) + RET_ReallyLR implicit $x0 +... Index: llvm/test/CodeGen/AArch64/GlobalISel/select-cbz.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/select-cbz.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/select-cbz.mir @@ -1,15 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s ---- | - define void @cbz_s32() { ret void } - define void @cbz_s64() { ret void } - define void @cbnz_s32() { ret void } - define void @cbnz_s64() { ret void } - define hidden void @test_rhs_inttoptr(i64* %p) { ret void } - define hidden void @test_rhs_unknown(i64* %p) { ret void } -... - --- name: cbz_s32 legalized: true @@ -132,7 +123,7 @@ ; CHECK: CBZX [[COPY]], %bb.2 ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: STRXui $xzr, [[COPY]], 0 :: (store 8 into %ir.p) + ; CHECK: STRXui $xzr, [[COPY]], 0 :: (store 8) ; CHECK: bb.2: ; CHECK: RET_ReallyLR bb.1: @@ -148,7 +139,7 @@ bb.2: %5:gpr(s64) = G_CONSTANT i64 0 - G_STORE %5(s64), %0(p0) :: (store 8 into %ir.p) + G_STORE %5(s64), %0(p0) :: (store 8) bb.3: RET_ReallyLR @@ -166,12 +157,12 @@ ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRXui:%[0-9]+]]:gpr64common = LDRXui [[COPY]], 0 :: (load 8 from %ir.p) + ; CHECK: [[LDRXui:%[0-9]+]]:gpr64common = LDRXui [[COPY]], 0 :: (load 8) ; CHECK: $xzr = SUBSXri [[LDRXui]], 42, 0, implicit-def $nzcv ; CHECK: Bcc 0, %bb.2, implicit $nzcv ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: STRXui $xzr, [[COPY]], 0 :: (store 8 into %ir.p) + ; CHECK: STRXui $xzr, [[COPY]], 0 :: (store 8) ; CHECK: bb.2: ; CHECK: RET_ReallyLR bb.1: @@ -181,16 +172,47 @@ %0:gpr(p0) = COPY $x0 %2:gpr(s64) = G_CONSTANT i64 42 %4:gpr(s64) = G_CONSTANT i64 0 - %1:gpr(s64) = G_LOAD %0(p0) :: (load 8 from %ir.p) + %1:gpr(s64) = G_LOAD %0(p0) :: (load 8) %5:gpr(s32) = G_ICMP intpred(eq), %1(s64), %2 %3:gpr(s1) = G_TRUNC %5(s32) G_BRCOND %3(s1), %bb.3 bb.2: %6:gpr(s64) = G_CONSTANT i64 0 - G_STORE %6(s64), %0(p0) :: (store 8 into %ir.p) + G_STORE %6(s64), %0(p0) :: (store 8) bb.3: RET_ReallyLR ... +--- +name: update_pred_minus_one +legalized: true +regBankSelected: true + +body: | + ; The G_ICMP here will be optimized into a slt against 0. + ; The branch should inherit this change, so we should have Bcc 11 rather than + ; Bcc 13. + + ; CHECK-LABEL: name: update_pred_minus_one + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0 + ; CHECK: $wzr = SUBSWri [[COPY]], 0, 0, implicit-def $nzcv + ; CHECK: Bcc 11, %bb.1, implicit $nzcv + ; CHECK: B %bb.0 + ; CHECK: bb.1: + bb.0: + liveins: $w0 + successors: %bb.0, %bb.1 + + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = G_CONSTANT i32 -1 + %2:gpr(s32) = G_ICMP intpred(sle), %0, %1 + %3:gpr(s1) = G_TRUNC %2(s32) + G_BRCOND %3(s1), %bb.1 + G_BR %bb.0 + + bb.1: +... Index: llvm/test/CodeGen/AArch64/GlobalISel/tbz-sgt.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/tbz-sgt.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/tbz-sgt.mir @@ -99,10 +99,9 @@ ; CHECK: bb.0: ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK: %copy:gpr64 = COPY $x0 - ; CHECK: %negative_one:gpr64 = MOVi64imm -1 - ; CHECK: %and:gpr64common = ANDXri %copy, 8000 - ; CHECK: $xzr = SUBSXrr %and, %negative_one, implicit-def $nzcv - ; CHECK: Bcc 12, %bb.1, implicit $nzcv + ; CHECK: %and:gpr64sp = ANDXri %copy, 8000 + ; CHECK: $xzr = SUBSXri %and, 0, 0, implicit-def $nzcv + ; CHECK: Bcc 10, %bb.1, implicit $nzcv ; CHECK: B %bb.0 ; CHECK: bb.1: ; CHECK: RET_ReallyLR