Index: llvm/lib/Target/ARM/ARMISelLowering.h =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.h +++ llvm/lib/Target/ARM/ARMISelLowering.h @@ -85,6 +85,7 @@ FMSTAT, // ARM fmstat instruction. CMOV, // ARM conditional move instructions. + OpaqueSUBS, // Subtract that DAG combiner should ignore. SSAT, // Signed saturation USAT, // Unsigned saturation Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1282,6 +1282,7 @@ case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; case ARMISD::CMOV: return "ARMISD::CMOV"; + case ARMISD::OpaqueSUBS: return "ARMISD::OpaqueSUBS"; case ARMISD::SSAT: return "ARMISD::SSAT"; case ARMISD::USAT: return "ARMISD::USAT"; @@ -12663,30 +12664,38 @@ DAG.getConstant(1, dl, MVT::i32), Neg.getValue(1)); Res = DAG.getNode(ISD::ADDCARRY, dl, VTs, Sub, Neg, Carry); } - } else if (CC == ARMCC::NE && LHS != RHS && + } else if (CC == ARMCC::NE && !isNullConstant(RHS) && (!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) { // This seems pointless but will allow us to combine it further below. - // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUB x, y), z, !=, (CMPZ x, y) - SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS); + // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1 + SDValue Sub = DAG.getNode(ARMISD::OpaqueSUBS, dl, + DAG.getVTList(VT, MVT::i32), LHS, RHS); + SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR, + Sub.getValue(1), SDValue()); Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc, - N->getOperand(3), Cmp); + N->getOperand(3), CPSRGlue.getValue(1)); + FalseVal = Sub; } } else if (isNullConstant(TrueVal)) { - if (CC == ARMCC::EQ && LHS != RHS && + if (CC == ARMCC::EQ && !isNullConstant(RHS) && (!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) { // This seems pointless but will allow us to combine it further below // Note that we change == for != as this is the dual for the case above. - // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUB x, y), z, !=, (CMPZ x, y) - SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS); + // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1 + SDValue Sub = DAG.getNode(ARMISD::OpaqueSUBS, dl, + DAG.getVTList(VT, MVT::i32), LHS, RHS); + SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR, + Sub.getValue(1), SDValue()); Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal, DAG.getConstant(ARMCC::NE, dl, MVT::i32), - N->getOperand(3), Cmp); + N->getOperand(3), CPSRGlue.getValue(1)); + FalseVal = Sub; } } // On Thumb1, the DAG above may be further combined if z is a power of 2 // (z == 2 ^ K). - // CMOV (SUB x, y), z, !=, (CMPZ x, y) -> + // CMOV (SUBS x, y), z, !=, (SUBS x, y):1 -> // merge t3, t4 // where t1 = (SUBCARRY (SUB x, y), z, 0) // t2 = (SUBCARRY (SUB x, y), t1:0, t1:1) @@ -12694,8 +12703,8 @@ // t4 = (SUB 1, t2:1) [ we want a carry, not a borrow ] const APInt *TrueConst; if (Subtarget->isThumb1Only() && CC == ARMCC::NE && - (FalseVal.getOpcode() == ISD::SUB) && (FalseVal.getOperand(0) == LHS) && - (FalseVal.getOperand(1) == RHS) && + (FalseVal.getOpcode() == ARMISD::OpaqueSUBS) && + (FalseVal.getOperand(0) == LHS) && (FalseVal.getOperand(1) == RHS) && (TrueConst = isPowerOf2Constant(TrueVal))) { SDVTList VTs = DAG.getVTList(VT, MVT::i32); unsigned ShiftAmount = TrueConst->logBase2(); Index: llvm/lib/Target/ARM/ARMInstrInfo.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrInfo.td +++ llvm/lib/Target/ARM/ARMInstrInfo.td @@ -144,6 +144,7 @@ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov, [SDNPInGlue]>; +def ARMopaquesub : SDNode<"ARMISD::OpaqueSUBS", SDTIntBinOp, [SDNPOutGlue]>; def ARMssatnoshift : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>; @@ -3627,6 +3628,7 @@ defm SUB : AsI1_bin_irs<0b0010, "sub", IIC_iALUi, IIC_iALUr, IIC_iALUsr, sub>; + // ADD and SUB with 's' bit set. // // Currently, ADDS/SUBS are pseudo opcodes that exist only in the @@ -3641,6 +3643,14 @@ defm ADDS : AsI1_bin_s_irs; defm SUBS : AsI1_bin_s_irs; +def : ARMPat<(ARMopaquesub GPR:$Rn, mod_imm:$imm), (SUBSri $Rn, mod_imm:$imm)>; +def : ARMPat<(ARMopaquesub GPR:$Rn, GPR:$Rm), (SUBSrr $Rn, $Rm)>; +def : ARMPat<(ARMopaquesub GPR:$Rn, so_reg_imm:$shift), + (SUBSrsi $Rn, so_reg_imm:$shift)>; +def : ARMPat<(ARMopaquesub GPR:$Rn, so_reg_reg:$shift), + (SUBSrsr $Rn, so_reg_reg:$shift)>; + + let isAdd = 1 in defm ADC : AI1_adde_sube_irs<0b0101, "adc", ARMadde, 1>; defm SBC : AI1_adde_sube_irs<0b0110, "sbc", ARMsube>; Index: llvm/lib/Target/ARM/ARMInstrThumb.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrThumb.td +++ llvm/lib/Target/ARM/ARMInstrThumb.td @@ -1282,7 +1282,6 @@ bits<3> imm3; let Inst{8-6} = imm3; } - def tSUBi8 : // A8.6.210 T2 T1sItGenEncodeImm<{1,1,1,?,?}, (outs tGPR:$Rdn), (ins tGPR:$Rn, imm0_255:$imm8), IIC_iALUi, @@ -1351,6 +1350,14 @@ Sched<[WriteALU]>; } + +def : T1Pat<(ARMopaquesub tGPR:$Rn, tGPR:$Rm), (tSUBSrr $Rn, $Rm)>; +def : T1Pat<(ARMopaquesub tGPR:$Rn, imm0_7:$imm3), + (tSUBSi3 $Rn, imm0_7:$imm3)>; +def : T1Pat<(ARMopaquesub tGPR:$Rn, imm0_255:$imm8), + (tSUBSi8 $Rn, imm0_255:$imm8)>; + + // Sign-extend byte def tSXTB : // A8.6.222 T1pIMiscEncode<{0,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm), Index: llvm/lib/Target/ARM/ARMInstrThumb2.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrThumb2.td +++ llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -2081,6 +2081,7 @@ defm t2ADD : T2I_bin_ii12rs<0b000, "add", add, 1>; defm t2SUB : T2I_bin_ii12rs<0b101, "sub", sub>; + // ADD and SUB with 's' bit set. No 12-bit immediate (T4) variants. // // Currently, t2ADDS/t2SUBS are pseudo opcodes that exist only in the @@ -2094,6 +2095,12 @@ defm t2ADDS : T2I_bin_s_irs ; defm t2SUBS : T2I_bin_s_irs ; +def : T2Pat<(ARMopaquesub GPRnopc:$Rn, t2_so_imm:$imm), + (t2SUBSri $Rn, t2_so_imm:$imm)>; +def : T2Pat<(ARMopaquesub GPRnopc:$Rn, rGPR:$Rm), (t2SUBSrr $Rn, $Rm)>; +def : T2Pat<(ARMopaquesub GPRnopc:$Rn, t2_so_reg:$ShiftedRm), + (t2SUBSrs $Rn, t2_so_reg:$ShiftedRm)>; + let hasPostISelHook = 1 in { defm t2ADC : T2I_adde_sube_irs<0b1010, "adc", ARMadde, 1>; defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc", ARMsube>; Index: llvm/test/CodeGen/ARM/CGP/arm-cgp-casts.ll =================================================================== --- llvm/test/CodeGen/ARM/CGP/arm-cgp-casts.ll +++ llvm/test/CodeGen/ARM/CGP/arm-cgp-casts.ll @@ -104,9 +104,7 @@ ; CHECK-COMMON-LABEL: or_icmp_ugt: ; CHECK-COMMON: ldrb -; CHECK-COMMON: sub.w -; CHECK-COMMON-NOT: uxt -; CHECK-COMMON: cmp.w +; CHECK-COMMON: subs.w ; CHECK-COMMON-NOT: uxt ; CHECK-COMMON: cmp define i1 @or_icmp_ugt(i32 %arg, i8* %ptr) { Index: llvm/test/CodeGen/ARM/cmp.ll =================================================================== --- llvm/test/CodeGen/ARM/cmp.ll +++ llvm/test/CodeGen/ARM/cmp.ll @@ -39,15 +39,11 @@ define i1 @f7(i32 %a, i32 %b) { ; CHECK-LABEL: f7: -; CHECK: sub r2, r0, r1, lsr #6 -; CHECK: cmp r0, r1, lsr #6 -; CHECK: movwne r2, #1 -; CHECK: mov r0, r2 -; CHECK-T2: sub.w r2, r0, r1, lsr #6 -; CHECK-T2: cmp.w r0, r1, lsr #6 +; CHECK: subs r0, r0, r1, lsr #6 +; CHECK: movwne r0, #1 +; CHECK-T2: subs.w r0, r0, r1, lsr #6 ; CHECK-T2: it ne -; CHECK-T2: movne r2, #1 -; CHECK-T2: mov r0, r2 +; CHECK-T2: movne r0, #1 %tmp = lshr i32 %b, 6 %tmp1 = icmp ne i32 %a, %tmp ret i1 %tmp1 @@ -68,15 +64,11 @@ define i1 @f9(i32 %a) { ; CHECK-LABEL: f9: -; CHECK: sub r1, r0, r0, ror #8 -; CHECK: cmp r0, r0, ror #8 -; CHECK: movwne r1, #1 -; CHECK: mov r0, r1 -; CHECK-T2: sub.w r1, r0, r0, ror #8 -; CHECK-T2: cmp.w r0, r0, ror #8 +; CHECK: subs r0, r0, r0, ror #8 +; CHECK: movwne r0, #1 +; CHECK-T2: subs.w r0, r0, r0, ror #8 ; CHECK-T2: it ne -; CHECK-T2: movne r1, #1 -; CHECK-T2: mov r0, r1 +; CHECK-T2: movne r0, #1 %l8 = shl i32 %a, 24 %r8 = lshr i32 %a, 8 %tmp = or i32 %l8, %r8 Index: llvm/test/CodeGen/ARM/select.ll =================================================================== --- llvm/test/CodeGen/ARM/select.ll +++ llvm/test/CodeGen/ARM/select.ll @@ -142,3 +142,14 @@ ret float %2 } +; CHECK-LABEL: test_overflow_recombine: +define i1 @test_overflow_recombine(i32 %in) { +; CHECK: smull [[LO:r[0-9]+]], [[HI:r[0-9]+]] +; CHECK: subs [[ZERO:r[0-9]+]], [[HI]], [[LO]], asr #31 +; CHECK: movne [[ZERO]], #1 + %prod = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 0, i32 %in) + %overflow = extractvalue { i32, i1 } %prod, 1 + ret i1 %overflow +} + +declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32)