Index: lib/Target/ARM/ARMISelDAGToDAG.cpp =================================================================== --- lib/Target/ARM/ARMISelDAGToDAG.cpp +++ lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -3164,37 +3164,6 @@ return; } } - case ARMISD::SUBE: { - if (!Subtarget->hasV6Ops()) - break; - // Look for a pattern to match SMMLS - // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b)))) - if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI || - N->getOperand(2).getOpcode() != ARMISD::SUBC || - !SDValue(N, 1).use_empty()) - break; - - if (Subtarget->isThumb()) - assert(Subtarget->hasThumb2() && - "This pattern should not be generated for Thumb"); - - SDValue SmulLoHi = N->getOperand(1); - SDValue Subc = N->getOperand(2); - auto *Zero = dyn_cast(Subc.getOperand(0)); - - if (!Zero || Zero->getZExtValue() != 0 || - Subc.getOperand(1) != SmulLoHi.getValue(0) || - N->getOperand(1) != SmulLoHi.getValue(1) || - N->getOperand(2) != Subc.getValue(1)) - break; - - unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS; - SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1), - N->getOperand(0), getAL(CurDAG, dl), - CurDAG->getRegister(0, MVT::i32) }; - ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops)); - return; - } case ISD::LOAD: { if (Subtarget->isThumb() && Subtarget->hasThumb2()) { if (tryT2IndexedLoad(N)) @@ -3266,6 +3235,110 @@ return; } + + case ARMISD::SUBE: + if (Subtarget->hasV6Ops()) { + + // Look for a pattern to match SMMLS + // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b)))) + if (N->getOperand(1).getOpcode() == ISD::SMUL_LOHI && + N->getOperand(2).getOpcode() == ARMISD::SUBC && + SDValue(N, 1).use_empty()) + { + + if (Subtarget->isThumb()) + assert(Subtarget->hasThumb2() && + "This pattern should not be generated for Thumb"); + + SDValue SmulLoHi = N->getOperand(1); + SDValue Subc = N->getOperand(2); + auto *Zero = dyn_cast(Subc.getOperand(0)); + + if (Zero && Zero->getZExtValue() == 0 && + Subc.getOperand(1) == SmulLoHi.getValue(0) && + N->getOperand(1) == SmulLoHi.getValue(1) && + N->getOperand(2) == Subc.getValue(1)) + { + + unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS; + SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1), + N->getOperand(0), getAL(CurDAG, dl), + CurDAG->getRegister(0, MVT::i32) }; + ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops)); + return; + } + } + } + LLVM_FALLTHROUGH; + + case ARMISD::ADDE: + if (Subtarget->isThumb1Only()) { + bool isAdd = N->getOpcode() == ARMISD::ADDE; + SDValue RHS = N->getOperand(1); + if (ConstantSDNode *C = dyn_cast(RHS)) { + int64_t imm = C->getSExtValue(); + if (imm < 0) { + isAdd = !isAdd; + // The with-carry-in form matches bitwise not instead of the negation. + // Effectively, the inverse interpretation of the carry flag already + // accounts for part of the negation. + RHS = CurDAG->getConstant(~imm, dl, MVT::i32); + Select(RHS.getNode()); + } + } + unsigned Opc = isAdd ? ARM::tADC : ARM::tSBC; + SDValue Carry = N->getOperand(2), + GlueIn = CurDAG->getCopyToReg(Carry, dl, ARM::CPSR, + Carry, SDValue()).getValue(1), + Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), + N->getOperand(0), RHS, getAL(CurDAG, dl), + CurDAG->getRegister(0, MVT::i32), GlueIn}, + Res(CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::Glue, Ops), 0), + GlueOut = CurDAG->getCopyFromReg(Res, dl, ARM::CPSR, + MVT::i32, Res.getValue(1)), + Replacement[] = {Res, GlueOut}; + CurDAG->ReplaceAllUsesWith(N, Replacement); + CurDAG->RemoveDeadNode(N); + return; + } + // Other cases are autogenerated. + break; + + case ARMISD::ADDC: + case ARMISD::SUBC: + if (Subtarget->isThumb1Only()) { + bool isAdd = N->getOpcode() == ARMISD::ADDC; + unsigned Opc = isAdd ? ARM::tADDrr : ARM::tSUBrr; + SDValue RHS = N->getOperand(1); + if (ConstantSDNode *C = dyn_cast(RHS)) { + int64_t imm = C->getSExtValue(); + if (imm < 0) { + imm = -imm; + isAdd = !isAdd; + } + if (imm < 256) { + if (imm < 8) + Opc = isAdd ? ARM::tADDi3 : ARM::tSUBi3; + else + Opc = isAdd ? ARM::tADDi8 : ARM::tSUBi8; + RHS = CurDAG->getTargetConstant(imm, dl, MVT::i32); + } + } + + SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), + N->getOperand(0), RHS, + getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}, + Res(CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::Glue, Ops), 0), + Glue = CurDAG->getCopyFromReg(Res, dl, ARM::CPSR, + MVT::i32, Res.getValue(1)), + Replacement[] = {Res, Glue}; + CurDAG->ReplaceAllUsesWith(N, Replacement); + CurDAG->RemoveDeadNode(N); + return; + } + // Other cases are autogenerated. + break; + case ARMISD::CMPZ: { // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0) // This allows us to avoid materializing the expensive negative constant. Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -826,13 +826,10 @@ setOperationAction(ISD::SRL, MVT::i64, Custom); setOperationAction(ISD::SRA, MVT::i64, Custom); - if (!Subtarget->isThumb1Only()) { - // FIXME: We should do this for Thumb1 as well. - setOperationAction(ISD::ADDC, MVT::i32, Custom); - setOperationAction(ISD::ADDE, MVT::i32, Custom); - setOperationAction(ISD::SUBC, MVT::i32, Custom); - setOperationAction(ISD::SUBE, MVT::i32, Custom); - } + setOperationAction(ISD::ADDC, MVT::i32, Custom); + setOperationAction(ISD::ADDE, MVT::i32, Custom); + setOperationAction(ISD::SUBC, MVT::i32, Custom); + setOperationAction(ISD::SUBE, MVT::i32, Custom); if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); Index: lib/Target/ARM/ARMInstrThumb.td =================================================================== --- lib/Target/ARM/ARMInstrThumb.td +++ lib/Target/ARM/ARMInstrThumb.td @@ -910,7 +910,7 @@ def tADC : // A8.6.2 T1sItDPEncode<0b0101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr, "adc", "\t$Rdn, $Rm", - [(set tGPR:$Rdn, (adde tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; + []>, Sched<[WriteALU]>; // Add immediate def tADDi3 : // A8.6.4 T1 @@ -1197,7 +1197,7 @@ T1sItDPEncode<0b0110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr, "sbc", "\t$Rdn, $Rm", - [(set tGPR:$Rdn, (sube tGPR:$Rn, tGPR:$Rm))]>, + []>, Sched<[WriteALU]>; // Subtract immediate @@ -1386,22 +1386,6 @@ def : T1Pat<(ARMcmpZ tGPR:$Rn, tGPR:$Rm), (tCMPr tGPR:$Rn, tGPR:$Rm)>; -// Add with carry -def : T1Pat<(addc tGPR:$lhs, imm0_7:$rhs), - (tADDi3 tGPR:$lhs, imm0_7:$rhs)>; -def : T1Pat<(addc tGPR:$lhs, imm8_255:$rhs), - (tADDi8 tGPR:$lhs, imm8_255:$rhs)>; -def : T1Pat<(addc tGPR:$lhs, tGPR:$rhs), - (tADDrr tGPR:$lhs, tGPR:$rhs)>; - -// Subtract with carry -def : T1Pat<(addc tGPR:$lhs, imm0_7_neg:$rhs), - (tSUBi3 tGPR:$lhs, imm0_7_neg:$rhs)>; -def : T1Pat<(addc tGPR:$lhs, imm8_255_neg:$rhs), - (tSUBi8 tGPR:$lhs, imm8_255_neg:$rhs)>; -def : T1Pat<(subc tGPR:$lhs, tGPR:$rhs), - (tSUBrr tGPR:$lhs, tGPR:$rhs)>; - // Bswap 16 with load/store def : T1Pat<(srl (bswap (extloadi16 t_addrmode_is2:$addr)), (i32 16)), (tREV16 (tLDRHi t_addrmode_is2:$addr))>; Index: test/CodeGen/Thumb/long.ll =================================================================== --- test/CodeGen/Thumb/long.ll +++ test/CodeGen/Thumb/long.ll @@ -1,33 +1,49 @@ ; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv6-eabi %s -o - | \ +; RUN: FileCheck %s -check-prefix CHECK -check-prefix CHECK-V6 ; RUN: llc -mtriple=thumb-apple-darwin %s -o - | \ ; RUN: FileCheck %s -check-prefix CHECK -check-prefix CHECK-DARWIN define i64 @f1() { entry: ret i64 0 +; CHECK-LABEL: f1: +; CHECK: movs r0, #0 +; CHECK-V6: mov r1, r0 } define i64 @f2() { entry: ret i64 1 +; CHECK-LABEL: f2: +; CHECK: movs r0, #1 +; CHECK: movs r1, #0 } define i64 @f3() { entry: ret i64 2147483647 +; CHECK-LABEL: f3: +; CHECK: ldr r0, +; CHECK: movs r1, #0 } define i64 @f4() { entry: ret i64 2147483648 +; CHECK-LABEL: f4: +; CHECK: movs r0, #1 +; CHECK: lsls r0, r0, #31 +; CHECK: movs r1, #0 } define i64 @f5() { entry: ret i64 9223372036854775807 ; CHECK-LABEL: f5: -; CHECK: mvn -; CHECK-NOT: mvn +; CHECK: movs r0, #0 +; CHECK: mvns r0, r0 +; CHECK: ldr r1, } define i64 @f6(i64 %x, i64 %y) { @@ -35,14 +51,40 @@ %tmp1 = add i64 %y, 1 ; [#uses=1] ret i64 %tmp1 ; CHECK-LABEL: f6: -; CHECK: adc -; CHECK-NOT: adc +; CHECK: adds r0, r2, #1 +; CHECK: movs r1, #0 +; CHECK: adcs r1, r3 +} + +define i64 @f6a(i64 %x, i64 %y) { +entry: + %tmp1 = add i64 %y, 10 + ret i64 %tmp1 +; CHECK-LABEL: f6a: +; CHECK: adds r2, #10 +; CHECK: movs r1, #0 +; CHECK: adcs r1, r3 +; CHECK-V6: mov r0, r2 +} + +define i64 @f6b(i64 %x, i64 %y) { +entry: + %tmp1 = add i64 %y, 1000 + ret i64 %tmp1 +; CHECK-LABEL: f6b: +; CHECK: movs r0, #125 +; CHECK: lsls r0, r0, #3 +; CHECK: adds r0, r2, r0 +; CHECK: movs r1, #0 +; CHECK: adcs r1, r3 } define void @f7() { entry: %tmp = call i64 @f8( ) ; [#uses=0] ret void +; CHECK-LABEL: f7: +; CHECK: bl } declare i64 @f8() @@ -52,8 +94,56 @@ %tmp = sub i64 %a, %b ; [#uses=1] ret i64 %tmp ; CHECK-LABEL: f9: -; CHECK: sbc -; CHECK-NOT: sbc +; CHECK: subs r0, r0, r2 +; CHECK: sbcs r1, r3 +} + +define i64 @f9a(i64 %x, i64 %y) { ; ADDC with small negative imm => SUBS imm +entry: + %tmp1 = sub i64 %y, 10 + ret i64 %tmp1 +; CHECK-LABEL: f9a: +; CHECK: subs r2, #10 +; CHECK: movs r0, #0 +; CHECK: sbcs r3, r0 +; CHECK-V6: mov r0, r2 +; CHECK-V6: mov r1, r3 +} + +define i64 @f9b(i64 %x, i64 %y) { ; ADDC with big negative imm => SUBS reg +entry: + %tmp1 = sub i64 1000, %y + ret i64 %tmp1 +; CHECK-LABEL: f9b: +; CHECK: movs r0, #125 +; CHECK: lsls r0, r0, #3 +; CHECK: subs r0, r0, r2 +; CHECK: movs r1, #0 +; CHECK: sbcs r1, r3 +} + +define i64 @f9c(i64 %x, i32 %y) { ; SUBS with small positive imm => SUBS imm +entry: + %conv = sext i32 %y to i64 + %shl = shl i64 %conv, 32 + %or = or i64 %shl, 1 + %sub = sub nsw i64 %x, %or + ret i64 %sub +; CHECK-LABEL: f9c: +; CHECK: subs r0, r0, #1 +; CHECK: sbcs r1, r2 +} + +define i64 @f9d(i64 %x, i32 %y) { ; SUBS with small negative imm => ADDS imm +entry: + %conv = sext i32 %y to i64 + %shl = shl i64 %conv, 32 + %or = or i64 %shl, 4294967295 + %sub = sub nsw i64 %x, %or + ret i64 %sub +; CHECK-LABEL: f9d: +; CHECK: adds r0, r0, #1 +; CHECK: sbcs r1, r2 } define i64 @f(i32 %a, i32 %b) { @@ -63,6 +153,7 @@ %tmp2 = mul i64 %tmp1, %tmp ; [#uses=1] ret i64 %tmp2 ; CHECK-LABEL: f: +; CHECK-V6: bl __aeabi_lmul ; CHECK-DARWIN: __muldi3 } @@ -73,6 +164,7 @@ %tmp2 = mul i64 %tmp1, %tmp ; [#uses=1] ret i64 %tmp2 ; CHECK-LABEL: g: +; CHECK-V6: bl __aeabi_lmul ; CHECK-DARWIN: __muldi3 } @@ -81,4 +173,9 @@ %a = alloca i64, align 8 ; [#uses=1] %retval = load i64, i64* %a ; [#uses=1] ret i64 %retval +; CHECK-LABEL: f10: +; CHECK: sub sp, #8 +; CHECK: ldr r0, [sp] +; CHECK: ldr r1, [sp, #4] +; CHECK: add sp, #8 }