Index: lib/Target/ARM/ARMISelDAGToDAG.cpp =================================================================== --- lib/Target/ARM/ARMISelDAGToDAG.cpp +++ lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -3037,6 +3037,32 @@ return; } } + case ARMISD::SUBE: { + if (!Subtarget->hasV6Ops()) + break; + // Look for a pattern to match SMMLS + // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b)))) + if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI || + N->getOperand(2).getOpcode() != ARMISD::SUBC) + break; + + SDValue SmulLoHi = N->getOperand(1); + SDValue Subc = N->getOperand(2); + auto *Zero = dyn_cast(Subc.getOperand(0)); + + if (!Zero || Zero->getZExtValue() != 0 || + Subc.getOperand(1) != SmulLoHi.getValue(0) || + N->getOperand(1) != SmulLoHi.getValue(1) || + N->getOperand(2) != Subc.getValue(1)) + break; + + unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS; + SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1), + N->getOperand(0), getAL(CurDAG, dl), + CurDAG->getRegister(0, MVT::i32) }; + ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops)); + return; + } case ISD::LOAD: { if (Subtarget->isThumb() && Subtarget->hasThumb2()) { if (tryT2IndexedLoad(N)) Index: lib/Target/ARM/ARMInstrThumb2.td =================================================================== --- lib/Target/ARM/ARMInstrThumb2.td +++ lib/Target/ARM/ARMInstrThumb2.td @@ -2604,12 +2604,11 @@ let Inst{7-4} = op7_4; } -def t2SMMLA : T2FourRegSMMLA<0b101, 0b0000, "smmla", +def t2SMMLA : T2FourRegSMMLA<0b101, 0b0000, "smmla", [(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]>; -def t2SMMLAR: T2FourRegSMMLA<0b101, 0b0001, "smmlar", []>; -def t2SMMLS: T2FourRegSMMLA<0b110, 0b0000, "smmls", - [(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]>; -def t2SMMLSR:T2FourRegSMMLA<0b110, 0b0001, "smmlsr", []>; +def t2SMMLAR: T2FourRegSMMLA<0b101, 0b0001, "smmlar", []>; +def t2SMMLS: T2FourRegSMMLA<0b110, 0b0000, "smmls", []>; +def t2SMMLSR: T2FourRegSMMLA<0b110, 0b0001, "smmlsr", []>; class T2ThreeRegSMUL op22_20, bits<2> op5_4, string opc, list pattern> @@ -2639,36 +2638,32 @@ def t2SMULWB : T2ThreeRegSMUL<0b011, 0b00, "smulwb", []>; def t2SMULWT : T2ThreeRegSMUL<0b011, 0b01, "smulwt", []>; -class T2FourRegSMLA op5_4, string opc, list pattern> +class T2FourRegSMLA op22_20, bits<2> op5_4, string opc, list pattern> : T2FourReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMUL16, opc, "\t$Rd, $Rn, $Rm, $Ra", pattern>, Requires<[IsThumb2, HasDSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; - let Inst{22-20} = 0b001; + let Inst{22-20} = op22_20; let Inst{7-6} = 0b00; let Inst{5-4} = op5_4; } -def t2SMLABB : T2FourRegSMLA<0b00, "smlabb", +def t2SMLABB : T2FourRegSMLA<0b001, 0b00, "smlabb", [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sext_inreg rGPR:$Rn, i16), (sext_inreg rGPR:$Rm, i16))))]>; -def t2SMLABT : T2FourRegSMLA<0b01, "smlabt", +def t2SMLABT : T2FourRegSMLA<0b001, 0b01, "smlabt", [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sext_inreg rGPR:$Rn, i16), (sra rGPR:$Rm, (i32 16)))))]>; -def t2SMLATB : T2FourRegSMLA<0b10, "smlatb", +def t2SMLATB : T2FourRegSMLA<0b001, 0b10, "smlatb", [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sra rGPR:$Rn, (i32 16)), (sext_inreg rGPR:$Rm, i16))))]>; -def t2SMLATT : T2FourRegSMLA<0b11, "smlatt", +def t2SMLATT : T2FourRegSMLA<0b001, 0b11, "smlatt", [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sra rGPR:$Rn, (i32 16)), (sra rGPR:$Rm, (i32 16)))))]>; -def t2SMLAWB : T2FourRegSMLA<0b00, "smlawb", []> { - let Inst{22-20} = 0b011; -} -def t2SMLAWT : T2FourRegSMLA<0b01, "smlawt", []> { - let Inst{22-20} = 0b011; -} +def t2SMLAWB : T2FourRegSMLA<0b011, 0b00, "smlawb", []>; +def t2SMLAWT : T2FourRegSMLA<0b011, 0b01, "smlawt", []>; class T2SMLAL op22_20, bits<4> op7_4, string opc, list pattern> : T2FourReg_mac<1, op22_20, op7_4, Index: test/CodeGen/ARM/smml.ll =================================================================== --- test/CodeGen/ARM/smml.ll +++ test/CodeGen/ARM/smml.ll @@ -1,8 +1,20 @@ -; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s -check-prefix=CHECK +; RUN: llc -mtriple=armv6-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V6 +; RUN: llc -mtriple=armv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7 +; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck %s -check-prefix=CHECK-THUMB +; RUN: llc -mtriple=thumbv6-eabi %s -o - | FileCheck %s -check-prefix=CHECK-THUMB +; RUN: llc -mtriple=thumbv6t2-eabi %s -o - | FileCheck %s -check-prefix=CHECK-THUMBV6T2 +; RUN: llc -mtriple=thumbv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK-THUMBV7 -define i32 @f(i32 %a, i32 %b, i32 %c) nounwind readnone ssp { +define i32 @Test0(i32 %a, i32 %b, i32 %c) nounwind readnone ssp { entry: +; CHECK-LABEL: Test0 ; CHECK-NOT: smmls +; CHECK-V6-NOT: smmls +; CHECK-V7-NOT: smmls +; CHECK_THUMB-NOT: smmls +; CHECK-THUMBV6T2-NOT: smmls +; CHECK-THUMBV7-NOT: smmls %conv4 = zext i32 %a to i64 %conv1 = sext i32 %b to i64 %conv2 = sext i32 %c to i64 @@ -12,3 +24,23 @@ %conv3 = trunc i64 %sub to i32 ret i32 %conv3 } + +define i32 @Test1(i32 %a, i32 %b, i32 %c) { +;CHECK-LABEL: Test1 +;CHECK-NOT: smmls +;CHECK-THUMB-NOT: smmls +;CHECK-V6: smmls r0, [[Rn:r[1-2]]], [[Rm:r[1-2]]], r0 +;CHECK-V7: smmls r0, [[Rn:r[1-2]]], [[Rm:r[1-2]]], r0 +;CHECK-THUMBV6T2: smmls r0, [[Rn:r[1-2]]], [[Rm:r[1-2]]], r0 +;CHECK-THUMBV7: smmls r0, [[Rn:r[1-2]]], [[Rm:r[1-2]]], r0 +entry: + %conv = sext i32 %b to i64 + %conv1 = sext i32 %c to i64 + %mul = mul nsw i64 %conv1, %conv + %conv26 = zext i32 %a to i64 + %shl = shl nuw i64 %conv26, 32 + %sub = sub nsw i64 %shl, %mul + %shr7 = lshr i64 %sub, 32 + %conv3 = trunc i64 %shr7 to i32 + ret i32 %conv3 +}