Index: llvm/lib/Target/ARM/ARMISelLowering.h =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.h +++ llvm/lib/Target/ARM/ARMISelLowering.h @@ -85,6 +85,7 @@ FMSTAT, // ARM fmstat instruction. CMOV, // ARM conditional move instructions. + OpaqueSUB, // Subtract that DAG combiner should ignore. SSAT, // Signed saturation USAT, // Unsigned saturation Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1281,6 +1281,7 @@ case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; case ARMISD::CMOV: return "ARMISD::CMOV"; + case ARMISD::OpaqueSUB: return "ARMISD::OpaqueSUB"; case ARMISD::SSAT: return "ARMISD::SSAT"; case ARMISD::USAT: return "ARMISD::USAT"; @@ -12720,21 +12721,21 @@ DAG.getConstant(1, dl, MVT::i32), Neg.getValue(1)); Res = DAG.getNode(ISD::ADDCARRY, dl, VTs, Sub, Neg, Carry); } - } else if (CC == ARMCC::NE && LHS != RHS && + } else if (CC == ARMCC::NE && !isNullConstant(RHS) && (!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) { // This seems pointless but will allow us to combine it further below. // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUB x, y), z, !=, (CMPZ x, y) - SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS); + SDValue Sub = DAG.getNode(ARMISD::OpaqueSUB, dl, VT, LHS, RHS); Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc, N->getOperand(3), Cmp); } } else if (isNullConstant(TrueVal)) { - if (CC == ARMCC::EQ && LHS != RHS && + if (CC == ARMCC::EQ && !isNullConstant(RHS) && (!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) { // This seems pointless but will allow us to combine it further below // Note that we change == for != as this is the dual for the case above. // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUB x, y), z, !=, (CMPZ x, y) - SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS); + SDValue Sub = DAG.getNode(ARMISD::OpaqueSUB, dl, VT, LHS, RHS); Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal, DAG.getConstant(ARMCC::NE, dl, MVT::i32), N->getOperand(3), Cmp); @@ -12751,8 +12752,8 @@ // t4 = (SUB 1, t2:1) [ we want a carry, not a borrow ] const APInt *TrueConst; if (Subtarget->isThumb1Only() && CC == ARMCC::NE && - (FalseVal.getOpcode() == ISD::SUB) && (FalseVal.getOperand(0) == LHS) && - (FalseVal.getOperand(1) == RHS) && + (FalseVal.getOpcode() == ARMISD::OpaqueSUB) && + (FalseVal.getOperand(0) == LHS) && (FalseVal.getOperand(1) == RHS) && (TrueConst = isPowerOf2Constant(TrueVal))) { SDVTList VTs = DAG.getVTList(VT, MVT::i32); unsigned ShiftAmount = TrueConst->logBase2(); Index: llvm/lib/Target/ARM/ARMInstrInfo.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrInfo.td +++ llvm/lib/Target/ARM/ARMInstrInfo.td @@ -144,6 +144,7 @@ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov, [SDNPInGlue]>; +def ARMopaquesub : SDNode<"ARMISD::OpaqueSUB", SDTIntBinOp>; def ARMssatnoshift : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>; @@ -3622,6 +3623,14 @@ defm SUB : AsI1_bin_irs<0b0010, "sub", IIC_iALUi, IIC_iALUr, IIC_iALUsr, sub>; + +def : ARMPat<(ARMopaquesub GPR:$Rn, mod_imm:$imm), (SUBri $Rn, mod_imm:$imm)>; +def : ARMPat<(ARMopaquesub GPR:$Rn, GPR:$Rm), (SUBrr $Rn, $Rm)>; +def : ARMPat<(ARMopaquesub GPR:$Rn, so_reg_imm:$shift), + (SUBrsi $Rn, so_reg_imm:$shift)>; +def : ARMPat<(ARMopaquesub GPR:$Rn, so_reg_reg:$shift), + (SUBrsr $Rn, so_reg_reg:$shift)>; + // ADD and SUB with 's' bit set. // // Currently, ADDS/SUBS are pseudo opcodes that exist only in the Index: llvm/lib/Target/ARM/ARMInstrThumb.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrThumb.td +++ llvm/lib/Target/ARM/ARMInstrThumb.td @@ -1282,6 +1282,8 @@ bits<3> imm3; let Inst{8-6} = imm3; } +def : T1Pat<(ARMopaquesub tGPR:$Rn, imm0_7:$imm3), + (tSUBi3 $Rn, imm0_7:$imm3)>; def tSUBi8 : // A8.6.210 T2 T1sItGenEncodeImm<{1,1,1,?,?}, (outs tGPR:$Rdn), @@ -1289,6 +1291,8 @@ "sub", "\t$Rdn, $imm8", [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255_neg:$imm8))]>, Sched<[WriteALU]>; +def : T1Pat<(ARMopaquesub tGPR:$Rn, imm0_255:$imm8), + (tSUBi8 $Rn, imm0_255:$imm8)>; def : tInstSubst<"add${s}${p} $rd, $rn, $imm", (tSUBi3 tGPR:$rd, s_cc_out:$s, tGPR:$rn, mod_imm1_7_neg:$imm, pred:$p)>; @@ -1305,6 +1309,7 @@ "sub", "\t$Rd, $Rn, $Rm", [(set tGPR:$Rd, (sub tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; +def : T1Pat<(ARMopaquesub tGPR:$Rn, tGPR:$Rm), (tSUBrr $Rn, $Rm)>; def : tInstAlias <"sub${s}${p} $Rdn, $Rm", (tSUBrr tGPR:$Rdn,s_cc_out:$s, tGPR:$Rdn, tGPR:$Rm, pred:$p)>; Index: llvm/lib/Target/ARM/ARMInstrThumb2.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrThumb2.td +++ llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -2081,6 +2081,14 @@ defm t2ADD : T2I_bin_ii12rs<0b000, "add", add, 1>; defm t2SUB : T2I_bin_ii12rs<0b101, "sub", sub>; +def : T2Pat<(ARMopaquesub GPRnopc:$Rn, t2_so_imm:$imm), + (t2SUBri $Rn, t2_so_imm:$imm)>; +def : T2Pat<(ARMopaquesub GPRnopc:$Rn, imm0_4095:$imm), + (t2SUBri12 $Rn, imm0_4095:$imm)>; +def : T2Pat<(ARMopaquesub GPRnopc:$Rn, rGPR:$Rm), (t2SUBrr $Rn, $Rm)>; +def : T2Pat<(ARMopaquesub GPRnopc:$Rn, t2_so_reg:$ShiftedRm), + (t2SUBrs $Rn, t2_so_reg:$ShiftedRm)>; + // ADD and SUB with 's' bit set. No 12-bit immediate (T4) variants. // // Currently, t2ADDS/t2SUBS are pseudo opcodes that exist only in the Index: llvm/test/CodeGen/ARM/select.ll =================================================================== --- llvm/test/CodeGen/ARM/select.ll +++ llvm/test/CodeGen/ARM/select.ll @@ -142,3 +142,17 @@ ret float %2 } +; N.b. sub is redundant with cmp. Don't worry if peepholer realises this and +; removes the cmp in favour of a subs. +; CHECK-LABEL: test_overflow_recombine: +define i1 @test_overflow_recombine(i32 %in) { +; CHECK: smull [[LO:r[0-9]+]], [[HI:r[0-9]+]] +; CHECK: sub [[ZERO:r[0-9]+]], [[HI]], [[LO]], asr #31 +; CHECK: cmp [[HI]], [[LO]], asr #31 +; CHECK: movne [[ZERO]], #1 + %prod = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 0, i32 %in) + %overflow = extractvalue { i32, i1 } %prod, 1 + ret i1 %overflow +} + +declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32)