diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -155,7 +155,9 @@ // Emit an integer compare between LHS and RHS, which checks for Predicate. // - // This may update Predicate when emitting the compare. + // This returns the produced compare instruction, and the predicate which + // was ultimately used in the compare. The predicate may differ from what + // is passed in \p Predicate due to optimization. std::pair emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, @@ -307,7 +309,7 @@ MachineIRBuilder &MIRBuilder) const; MachineInstr *tryOptArithImmedIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, - MachineOperand &Predicate, + CmpInst::Predicate &Predicate, MachineIRBuilder &MIB) const; MachineInstr *tryOptArithShiftedCompare(MachineOperand &LHS, MachineOperand &RHS, @@ -3685,13 +3687,16 @@ MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, MachineIRBuilder &MIRBuilder) const { assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!"); + assert(Predicate.isPredicate() && "Expected predicate?"); MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); + CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate(); + // Fold the compare if possible. MachineInstr *FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder); if (FoldCmp) - return {FoldCmp, (CmpInst::Predicate)Predicate.getPredicate()}; + return {FoldCmp, P}; // Can't fold into a CMN. Just emit a normal compare. unsigned CmpOpc = 0; @@ -3712,21 +3717,21 @@ // Try to match immediate forms. MachineInstr *ImmedCmp = - tryOptArithImmedIntegerCompare(LHS, RHS, Predicate, MIRBuilder); + tryOptArithImmedIntegerCompare(LHS, RHS, P, MIRBuilder); if (ImmedCmp) - return {ImmedCmp, (CmpInst::Predicate)Predicate.getPredicate()}; + return {ImmedCmp, P}; // If we don't have an immediate, we may have a shift which can be folded // into the compare. MachineInstr *ShiftedCmp = tryOptArithShiftedCompare(LHS, RHS, MIRBuilder); if (ShiftedCmp) - return {ShiftedCmp, (CmpInst::Predicate)Predicate.getPredicate()}; + return {ShiftedCmp, P}; auto CmpMI = MIRBuilder.buildInstr(CmpOpc, {ZReg}, {LHS.getReg(), RHS.getReg()}); // Make sure that we can constrain the compare that we emitted. constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); - return {&*CmpMI, (CmpInst::Predicate)Predicate.getPredicate()}; + return {&*CmpMI, P}; } MachineInstr *AArch64InstructionSelector::emitVectorConcat( @@ -4042,7 +4047,7 @@ } MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare( - MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, + MachineOperand &LHS, MachineOperand &RHS, CmpInst::Predicate &P, MachineIRBuilder &MIB) const { // Attempt to select the immediate form of an integer compare. MachineRegisterInfo &MRI = *MIB.getMRI(); @@ -4051,7 +4056,6 @@ unsigned Size = Ty.getSizeInBits(); assert((Size == 32 || Size == 64) && "Expected 32 bit or 64 bit compare only?"); - auto P = (CmpInst::Predicate)Predicate.getPredicate(); // Check if this is a case we can already handle. InstructionSelector::ComplexRendererFns ImmFns; @@ -4066,6 +4070,7 @@ // We have a constant, but it doesn't fit. Try adjusting it by one and // updating the predicate if possible. uint64_t C = *MaybeImmed; + CmpInst::Predicate NewP; switch (P) { default: return nullptr; @@ -4080,7 +4085,7 @@ if ((Size == 64 && static_cast(C) == INT64_MIN) || (Size == 32 && static_cast(C) == INT32_MIN)) return nullptr; - P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT; + NewP = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT; C -= 1; break; case CmpInst::ICMP_ULT: @@ -4093,7 +4098,7 @@ // When c is not zero. if (C == 0) return nullptr; - P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT; + NewP = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT; C -= 1; break; case CmpInst::ICMP_SLE: @@ -4107,7 +4112,7 @@ if ((Size == 32 && static_cast(C) == INT32_MAX) || (Size == 64 && static_cast(C) == INT64_MAX)) return nullptr; - P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE; + NewP = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE; C += 1; break; case CmpInst::ICMP_ULE: @@ -4121,7 +4126,7 @@ if ((Size == 32 && static_cast(C) == UINT32_MAX) || (Size == 64 && C == UINT64_MAX)) return nullptr; - P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE; + NewP = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE; C += 1; break; } @@ -4132,7 +4137,7 @@ ImmFns = select12BitValueWithLeftShift(C); if (!ImmFns) return nullptr; - Predicate.setPredicate(P); + P = NewP; } // At this point, we know we can select an immediate form. Go ahead and do diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-arith-immed-compare.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-arith-immed-compare.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-arith-immed-compare.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-arith-immed-compare.mir @@ -627,4 +627,82 @@ %3:gpr(s64) = G_AND %6, %5 $x0 = COPY %3(s64) RET_ReallyLR implicit $x0 + +... +--- +name: more_than_one_use_select +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1, $x2 + + ; Both of these selects use the same compare. + ; + ; They should both be optimized in the same way, so the SUBS produced for + ; each CSEL should be the same. + + ; CHECK-LABEL: name: more_than_one_use_select + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: %a:gpr64common = COPY $x0 + ; CHECK: %b:gpr64 = COPY $x1 + ; CHECK: %c:gpr64 = COPY $x2 + ; CHECK: $xzr = SUBSXri %a, 0, 0, implicit-def $nzcv + ; CHECK: %select1:gpr64 = CSELXr %a, %b, 11, implicit $nzcv + ; CHECK: $xzr = SUBSXri %a, 0, 0, implicit-def $nzcv + ; CHECK: %select2:gpr64 = CSELXr %b, %c, 11, implicit $nzcv + ; CHECK: %add:gpr64 = ADDXrr %select1, %select2 + ; CHECK: $x0 = COPY %add + ; CHECK: RET_ReallyLR implicit $x0 + %a:gpr(s64) = COPY $x0 + %b:gpr(s64) = COPY $x1 + %c:gpr(s64) = COPY $x2 + %cst:gpr(s64) = G_CONSTANT i64 -1 + %cmp:gpr(s32) = G_ICMP intpred(sle), %a(s64), %cst + %trunc_cmp:gpr(s1) = G_TRUNC %cmp(s32) + %select1:gpr(s64) = G_SELECT %trunc_cmp(s1), %a, %b + %select2:gpr(s64) = G_SELECT %trunc_cmp(s1), %b, %c + %add:gpr(s64) = G_ADD %select1, %select2 + $x0 = COPY %add(s64) + RET_ReallyLR implicit $x0 +... +--- +name: more_than_one_use_select_no_opt +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1, $x2 + + ; When we don't end up doing the optimization, we should not change the + ; predicate. + ; + ; In this case, the CSELXrs should both have predicate code 13. + + ; CHECK-LABEL: name: more_than_one_use_select_no_opt + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: %a:gpr64 = COPY $x0 + ; CHECK: %b:gpr64 = COPY $x1 + ; CHECK: %c:gpr64 = COPY $x2 + ; CHECK: %cst:gpr64 = MOVi64imm 922337203685477580 + ; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %a, %cst, implicit-def $nzcv + ; CHECK: %select1:gpr64 = CSELXr %a, %b, 13, implicit $nzcv + ; CHECK: [[SUBSXrr1:%[0-9]+]]:gpr64 = SUBSXrr %a, %cst, implicit-def $nzcv + ; CHECK: %select2:gpr64 = CSELXr %b, %c, 13, implicit $nzcv + ; CHECK: %add:gpr64 = ADDXrr %select1, %select2 + ; CHECK: $x0 = COPY %add + ; CHECK: RET_ReallyLR implicit $x0 + %a:gpr(s64) = COPY $x0 + %b:gpr(s64) = COPY $x1 + %c:gpr(s64) = COPY $x2 + %cst:gpr(s64) = G_CONSTANT i64 922337203685477580 + %cmp:gpr(s32) = G_ICMP intpred(sle), %a(s64), %cst + %trunc_cmp:gpr(s1) = G_TRUNC %cmp(s32) + %select1:gpr(s64) = G_SELECT %trunc_cmp(s1), %a, %b + %select2:gpr(s64) = G_SELECT %trunc_cmp(s1), %b, %c + %add:gpr(s64) = G_ADD %select1, %select2 + $x0 = COPY %add(s64) + RET_ReallyLR implicit $x0 ...