Index: lib/Target/ARM/ARMISelDAGToDAG.cpp =================================================================== --- lib/Target/ARM/ARMISelDAGToDAG.cpp +++ lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -14,6 +14,7 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMTargetMachine.h" +#include "ARMPatternHelpers.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -2564,32 +2565,19 @@ // For SM*WB, we need to some form of sext. // For SM*WT, we need to search for (sra X, 16) // Src1 then gets set to X. - if ((SignExt.getOpcode() == ISD::SIGN_EXTEND || - SignExt.getOpcode() == ISD::SIGN_EXTEND_INREG || - SignExt.getOpcode() == ISD::AssertSext) && - SignExt.getValueType() == MVT::i32) { - + if (isSExt32(SignExt)) { *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB; Src1 = SignExt.getOperand(0); return true; } - if (SignExt.getOpcode() != ISD::SRA) - return false; - - ConstantSDNode *SRASrc1 = dyn_cast(SignExt.getOperand(1)); - if (!SRASrc1 || SRASrc1->getZExtValue() != 16) + if (!isSRA16(SignExt)) return false; SDValue Op0 = SignExt.getOperand(0); // The sign extend operand for SM*WB could be generated by a shl and ashr. - if (Op0.getOpcode() == ISD::SHL) { - SDValue SHL = Op0; - ConstantSDNode *SHLSrc1 = dyn_cast(SHL.getOperand(1)); - if (!SHLSrc1 || SHLSrc1->getZExtValue() != 16) - return false; - + if (isSHL16(Op0)) { *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB; Src1 = Op0.getOperand(0); return true; @@ -2612,14 +2600,8 @@ if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) { SRL = OR.getOperand(1); SHL = OR.getOperand(0); - if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) - return false; } - - ConstantSDNode *SRLSrc1 = dyn_cast(SRL.getOperand(1)); - ConstantSDNode *SHLSrc1 = dyn_cast(SHL.getOperand(1)); - if (!SRLSrc1 || !SHLSrc1 || SRLSrc1->getZExtValue() != 16 || - SHLSrc1->getZExtValue() != 16) + if (!isSRL16(SRL) || !isSHL16(SHL)) return false; // The first operands to the shifts need to be the two results from the @@ -3146,24 +3128,57 @@ } } case ARMISD::SMLAL:{ - if (Subtarget->isThumb()) { - SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), - N->getOperand(3), getAL(CurDAG, dl), - CurDAG->getRegister(0, MVT::i32)}; - ReplaceNode( - N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops)); + + if (Subtarget->isThumb2()) { + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), + N->getOperand(2), N->getOperand(3), getAL(CurDAG, dl), + CurDAG->getRegister(0, MVT::i32) }; + ReplaceNode(N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, + MVT::i32, Ops)); return; - }else{ + } else { + unsigned Opcode = Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5; SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), CurDAG->getRegister(0, MVT::i32) }; - ReplaceNode(N, CurDAG->getMachineNode( - Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl, - MVT::i32, MVT::i32, Ops)); + ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32, + Ops)); return; } } + case ARMISD::SMLALBB: { + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), getAL(CurDAG, dl), + CurDAG->getRegister(0, MVT::i32) }; + unsigned Opcode = Subtarget->isThumb2() ? ARM::t2SMLALBB : ARM::SMLALBB; + ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32, Ops)); + return; + } + case ARMISD::SMLALBT: { + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), getAL(CurDAG, dl), + CurDAG->getRegister(0, MVT::i32) }; + unsigned Opcode = Subtarget->isThumb2() ? ARM::t2SMLALBT : ARM::SMLALBT; + ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32, Ops)); + return; + } + case ARMISD::SMLALTB: { + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), getAL(CurDAG, dl), + CurDAG->getRegister(0, MVT::i32) }; + unsigned Opcode = Subtarget->isThumb2() ? ARM::t2SMLALTB : ARM::SMLALTB; + ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32, Ops)); + return; + } + case ARMISD::SMLALTT: { + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), getAL(CurDAG, dl), + CurDAG->getRegister(0, MVT::i32) }; + unsigned Opcode = Subtarget->isThumb2() ? ARM::t2SMLALTT : ARM::SMLALTT; + ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32, Ops)); + return; + } case ARMISD::SUBE: { if (!Subtarget->hasV6Ops()) break; Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -178,6 +178,10 @@ UMLAL, // 64bit Unsigned Accumulate Multiply SMLAL, // 64bit Signed Accumulate Multiply UMAAL, // 64-bit Unsigned Accumulate Accumulate Multiply + SMLALBB, // 64-bit signed accumulate multiply bottom, bottom 16 + SMLALBT, // 64-bit signed accumulate multiply bottom, top 16 + SMLALTB, // 64-bit signed accumulate multiply top, bottom 16 + SMLALTT, // 64-bit signed accumulate multiply top, top 16 // Operands of the standard BUILD_VECTOR node are not legalized, which // is fine if BUILD_VECTORs are always lowered to shuffles or other Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -18,6 +18,7 @@ #include "ARMConstantPoolValue.h" #include "ARMISelLowering.h" #include "ARMMachineFunctionInfo.h" +#include "ARMPatternHelpers.h" #include "ARMPerfectShuffle.h" #include "ARMRegisterInfo.h" #include "ARMSelectionDAGInfo.h" @@ -1344,6 +1345,10 @@ case ARMISD::UMAAL: return "ARMISD::UMAAL"; case ARMISD::UMLAL: return "ARMISD::UMLAL"; case ARMISD::SMLAL: return "ARMISD::SMLAL"; + case ARMISD::SMLALBB: return "ARMISD::SMLALBB"; + case ARMISD::SMLALBT: return "ARMISD::SMLALBT"; + case ARMISD::SMLALTB: return "ARMISD::SMLALTB"; + case ARMISD::SMLALTT: return "ARMISD::SMLALTT"; case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; case ARMISD::BFI: return "ARMISD::BFI"; case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; @@ -9436,6 +9441,73 @@ return SDValue(); } +static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + + if (!Subtarget->hasDSP()) + return SDValue(); + + // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and + // accumulates the product into a 64-bit value. The 16-bit values will + // be sign extended somehow or SRA'd into 32-bit values + // (addc (adde (mul 16bit, 16bit), lo), hi) + SDValue Mul = AddcNode->getOperand(0); + SDValue Hi = AddcNode->getOperand(1); + if (Mul.getOpcode() != ISD::MUL) { + Hi = AddcNode->getOperand(0); + Mul = AddcNode->getOperand(1); + if (Mul.getOpcode() != ISD::MUL) + return SDValue(); + } + + SDValue SRA = AddeNode->getOperand(0); + SDValue Lo = AddeNode->getOperand(1); + if (SRA.getOpcode() != ISD::SRA) { + SRA = AddeNode->getOperand(1); + Lo = AddeNode->getOperand(0); + if (SRA.getOpcode() != ISD::SRA) + return SDValue(); + } + if (auto Const = dyn_cast(SRA.getOperand(1))) { + if (Const->getZExtValue() != 31) + return SDValue(); + } + if (SRA.getOperand(0) != Mul) + return SDValue(); + + unsigned Opcode = 0; + + if (isSExt32(Mul.getOperand(0)) && isSExt32(Mul.getOperand(1))) + Opcode = ARMISD::SMLALBB; + else if (isSExt32(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1))) + Opcode = ARMISD::SMLALBT; + else if (isSRA16(Mul.getOperand(0)) && isSExt32(Mul.getOperand(1))) + Opcode = ARMISD::SMLALTB; + else if (isSRA16(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1))) + Opcode = ARMISD::SMLALTT; + else { + return SDValue(); + } + + SelectionDAG &DAG = DCI.DAG; + SDLoc dl(AddcNode); + + SDValue SMLAL = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32), + Mul.getOperand(0).getOperand(0), + Mul.getOperand(1).getOperand(0), Lo, Hi); + // Replace the ADDs' nodes uses by the MLA node's values. + SDValue HiMLALResult(SMLAL.getNode(), 1); + SDValue LoMLALResult(SMLAL.getNode(), 0); + + DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult); + DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult); + + // Return original node to notify the driver to stop replacing. + SDValue resNode(AddcNode, 0); + return resNode; +} + static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { @@ -9469,13 +9541,6 @@ if (AddcNode->getValueType(1) != MVT::Glue) return SDValue(); - // Check that the ADDC adds the low result of the S/UMUL_LOHI. - if (AddcOp0->getOpcode() != ISD::UMUL_LOHI && - AddcOp0->getOpcode() != ISD::SMUL_LOHI && - AddcOp1->getOpcode() != ISD::UMUL_LOHI && - AddcOp1->getOpcode() != ISD::SMUL_LOHI) - return SDValue(); - // Look for the glued ADDE. SDNode* AddeNode = AddcNode->getGluedUser(); if (!AddeNode) @@ -9489,6 +9554,14 @@ AddeNode->getOperand(2).getValueType() == MVT::Glue && "ADDE node has the wrong inputs"); + // Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it + // maybe a SMLAL which multiplies two 16-bit values. + if (AddcOp0->getOpcode() != ISD::UMUL_LOHI && + AddcOp0->getOpcode() != ISD::SMUL_LOHI && + AddcOp1->getOpcode() != ISD::UMUL_LOHI && + AddcOp1->getOpcode() != ISD::SMUL_LOHI) + return AddCombineTo64BitSMLAL16(AddcNode, AddeNode, DCI, Subtarget); + // Check for the triangle shape. SDValue AddeOp0 = AddeNode->getOperand(0); SDValue AddeOp1 = AddeNode->getOperand(1); @@ -9648,6 +9721,7 @@ /// PerformADDCCombine - Target-specific dag combine transform from /// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL or /// ISD::ADDC, ISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL +/// ISD::ADDC, ISD::ADDE and ISD::MUL to SMLAL[B|T] static SDValue PerformADDCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { Index: lib/Target/ARM/ARMInstrInfo.td =================================================================== --- lib/Target/ARM/ARMInstrInfo.td +++ lib/Target/ARM/ARMInstrInfo.td @@ -4173,29 +4173,19 @@ defm SMLA : AI_smla<"smla">; // Halfword multiply accumulate long: SMLAL. -def SMLALBB : AMulxyI64<0b0001010, 0b00, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), - (ins GPRnopc:$Rn, GPRnopc:$Rm), - IIC_iMAC64, "smlalbb", "\t$RdLo, $RdHi, $Rn, $Rm", []>, +class SMLAL opc1, string asm> + : AMulxyI64<0b0001010, opc1, + (outs GPRnopc:$RdLo, GPRnopc:$RdHi), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + IIC_iMAC64, asm, "\t$RdLo, $RdHi, $Rn, $Rm", []>, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV5TE]>, Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; -def SMLALBT : AMulxyI64<0b0001010, 0b10, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), - (ins GPRnopc:$Rn, GPRnopc:$Rm), - IIC_iMAC64, "smlalbt", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV5TE]>, - Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; - -def SMLALTB : AMulxyI64<0b0001010, 0b01, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), - (ins GPRnopc:$Rn, GPRnopc:$Rm), - IIC_iMAC64, "smlaltb", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV5TE]>, - Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; - -def SMLALTT : AMulxyI64<0b0001010, 0b11, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), - (ins GPRnopc:$Rn, GPRnopc:$Rm), - IIC_iMAC64, "smlaltt", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV5TE]>, - Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; +def SMLALBB : SMLAL<0b00, "smlalbb">; +def SMLALBT : SMLAL<0b10, "smlalbt">; +def SMLALTB : SMLAL<0b01, "smlaltb">; +def SMLALTT : SMLAL<0b11, "smlaltt">; // Helper class for AI_smld. class AMulDualIbase; -class T2SMLAL op22_20, bits<4> op7_4, string opc, list pattern> - : T2FourReg_mac<1, op22_20, op7_4, - (outs rGPR:$Ra, rGPR:$Rd), - (ins rGPR:$Rn, rGPR:$Rm), - IIC_iMAC64, opc, "\t$Ra, $Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasDSP]>; - // Halfword multiple accumulate long: SMLAL -def t2SMLALBB : T2SMLAL<0b100, 0b1000, "smlalbb", []>; -def t2SMLALBT : T2SMLAL<0b100, 0b1001, "smlalbt", []>; -def t2SMLALTB : T2SMLAL<0b100, 0b1010, "smlaltb", []>; -def t2SMLALTT : T2SMLAL<0b100, 0b1011, "smlaltt", []>; +def t2SMLALBB : T2MlaLong<0b100, 0b1000, "smlalbb">, + Requires<[IsThumb2, HasDSP]>; +def t2SMLALBT : T2MlaLong<0b100, 0b1001, "smlalbt">, + Requires<[IsThumb2, HasDSP]>; +def t2SMLALTB : T2MlaLong<0b100, 0b1010, "smlaltb">, + Requires<[IsThumb2, HasDSP]>; +def t2SMLALTT : T2MlaLong<0b100, 0b1011, "smlaltt">, + Requires<[IsThumb2, HasDSP]>; class T2DualHalfMul op22_20, bits<4> op7_4, string opc> : T2ThreeReg_mac<0, op22_20, op7_4, Index: lib/Target/ARM/ARMPatternHelpers.h =================================================================== --- /dev/null +++ lib/Target/ARM/ARMPatternHelpers.h @@ -0,0 +1,15 @@ +#ifndef _LLVM_LIB_TARGET_ARM_PATTERNS_H +#define _LLVM_LIB_TARGET_ARM_PATTERNS_H + +namespace llvm { + +class SDValue; + +bool isSExt32(const SDValue &Op); +bool isSRL16(const SDValue &Op); +bool isSRA16(const SDValue &Op); +bool isSHL16(const SDValue &Op); + +} + +#endif Index: lib/Target/ARM/ARMPatternHelpers.cpp =================================================================== --- /dev/null +++ lib/Target/ARM/ARMPatternHelpers.cpp @@ -0,0 +1,37 @@ +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" + +namespace llvm { + +bool isSExt32(const SDValue &Op) { + return ((Op.getOpcode() == ISD::SIGN_EXTEND || + Op.getOpcode() == ISD::SIGN_EXTEND_INREG || + Op.getOpcode() == ISD::AssertSext) && + Op.getValueType() == MVT::i32); +} + +bool isSRL16(const SDValue &Op) { + if (Op.getOpcode() != ISD::SRL) + return false; + if (auto Const = dyn_cast(Op.getOperand(1))) + return Const->getZExtValue() == 16; + return false; +} + +bool isSRA16(const SDValue &Op) { + if (Op.getOpcode() != ISD::SRA) + return false; + if (auto Const = dyn_cast(Op.getOperand(1))) + return Const->getZExtValue() == 16; + return false; +} + +bool isSHL16(const SDValue &Op) { + if (Op.getOpcode() != ISD::SHL) + return false; + if (auto Const = dyn_cast(Op.getOperand(1))) + return Const->getZExtValue() == 16; + return false; +} + +} Index: lib/Target/ARM/CMakeLists.txt =================================================================== --- lib/Target/ARM/CMakeLists.txt +++ lib/Target/ARM/CMakeLists.txt @@ -46,6 +46,7 @@ ARMLoadStoreOptimizer.cpp ARMMCInstLower.cpp ARMMachineFunctionInfo.cpp + ARMPatternHelpers.cpp ARMRegisterInfo.cpp ARMOptimizeBarriersPass.cpp ARMSelectionDAGInfo.cpp Index: test/CodeGen/ARM/longMAC.ll =================================================================== --- test/CodeGen/ARM/longMAC.ll +++ test/CodeGen/ARM/longMAC.ll @@ -231,3 +231,139 @@ %add2 = add i64 %add, %mul ret i64 %add2 } + +define i64 @MACLongTest11(i16 %a, i16 %b, i64 %c) { +;CHECK-LABEL: MACLongTest11: +;CHECK-LE-NOT: smlalbb +;CHECK-BE-NOT: smlalbb +;CHECK-V6M-THUMB-NOT: smlalbb +;CHECK-V7M-THUMB-NOT: smlalbb +;CHECK-V6-THUMB2-NOT: sxth +;CHECK-V6-THUMB2-NOT: sxth +;CHECK-V6-THUMB2: smlalbb [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] +;CHECK-V6-THUMB2: mov r0, [[RDLO]] +;CHECK-V6-THUMB2: mov r1, [[RDHI]] +;CHECK-V7-THUMB-NOT: sxth +;CHECK-V7-THUMB-NOT: sxth +;CHECK-V7-THUMB: smlalbb [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] +;CHECK-V7-THUMB: mov r0, [[RDLO]] +;CHECK-V7-THUMB: mov r1, [[RDHI]] +;CHECK-V7-THUMB-BE-NOT: sxth +;CHECK-V7-THUMB-BE-NOT: sxth +;CHECK-V7-THUMB-BE: smlalbb [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] +;CHECK-V7-THUMB-BE: mov r0, [[RDHI]] +;CHECK-V7-THUMB-BE: mov r1, [[RDLO]] +;CHECK-V7EM-THUMB-NOT: sxth +;CHECK-V7EM-THUMB-NOT: sxth +;CHECK-V7EM-THUMB: smlalbb [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] +;CHECK-V7EM-THUMB: mov r0, [[RDLO]] +;CHECK-V7EM-THUMB: mov r1, [[RDHI]] + %conv = sext i16 %a to i32 + %conv1 = sext i16 %b to i32 + %mul = mul nsw i32 %conv1, %conv + %conv2 = sext i32 %mul to i64 + %add = add nsw i64 %conv2, %c + ret i64 %add +} + +define i64 @MACLongTest12(i16 %a, i32 %b, i64 %c) { +;CHECK-LABEL: MACLongTest12: +;CHECK-LE-NOT: smlalbt +;CHECK-BE-NOT: smlalbt +;CHECK-V6M-THUMB-NOT: smlalbt +;CHECK-V7M-THUMB-NOT: smlalbt +;CHECK-V6-THUMB2-NOT: sxth +;CHECK-V6-THUMB2-NOT: asr +;CHECK-V6-THUMB2: smlalbt [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] +;CHECK-V6-THUMB2: mov r0, [[RDLO]] +;CHECK-V6-THUMB2: mov r1, [[RDHI]] +;CHECK-V7-THUMB-NOT: sxth +;CHECK-V7-THUMB-NOT: asr +;CHECK-V7-THUMB: smlalbt [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] +;CHECK-V7-THUMB: mov r0, [[RDLO]] +;CHECK-V7-THUMB: mov r1, [[RDHI]] +;CHECK-V7-THUMB-BE-NOT: sxth +;CHECK-V7-THUMB-BE-NOT: asr +;CHECK-V7-THUMB-BE: smlalbt [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] +;CHECK-V7-THUMB-BE: mov r0, [[RDHI]] +;CHECK-V7-THUMB-BE: mov r1, [[RDLO]] +;CHECK-V7EM-THUMB-NOT: sxth +;CHECK-V7EM-THUMB-NOT: asr +;CHECK-V7EM-THUMB: smlalbt [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] +;CHECK-V7EM-THUMB: mov r0, [[RDLO]] +;CHECK-V7EM-THUMB: mov r1, [[RDHI]] + %conv0 = sext i16 %a to i32 + %conv1 = ashr i32 %b, 16 + %mul = mul nsw i32 %conv0, %conv1 + %conv2 = sext i32 %mul to i64 + %add = add nsw i64 %conv2, %c + ret i64 %add +} + +define i64 @MACLongTest13(i32 %a, i16 %b, i64 %c) { +;CHECK-LABEL: MACLongTest13: +;CHECK-LE-NOT: smlaltb +;CHECK-BE-NOT: smlaltb +;CHECK-V6M-THUMB-NOT: smlaltb +;CHECK-V7M-THUMB-NOT: smlaltb +;CHECK-V6-THUMB2-NOT: sxth +;CHECK-V6-THUMB2-NOT: asr +;CHECK-V6-THUMB2: smlaltb [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] +;CHECK-V6-THUMB2: mov r0, [[RDLO]] +;CHECK-V6-THUMB2: mov r1, [[RDHI]] +;CHECK-V7-THUMB-NOT: sxth +;CHECK-V7-THUMB-NOT: asr +;CHECK-V7-THUMB: smlaltb [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] +;CHECK-V7-THUMB: mov r0, [[RDLO]] +;CHECK-V7-THUMB: mov r1, [[RDHI]] +;CHECK-V7-THUMB-BE-NOT: sxth +;CHECK-V7-THUMB-BE-NOT: asr +;CHECK-V7-THUMB-BE: smlaltb [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] +;CHECK-V7-THUMB-BE: mov r0, [[RDHI]] +;CHECK-V7-THUMB-BE: mov r1, [[RDLO]] +;CHECK-V7EM-THUMB-NOT: sxth +;CHECK-V7EM-THUMB-NOT: asr +;CHECK-V7EM-THUMB: smlaltb [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] +;CHECK-V7EM-THUMB: mov r0, [[RDLO]] +;CHECK-V7EM-THUMB: mov r1, [[RDHI]] + %conv0 = ashr i32 %a, 16 + %conv1= sext i16 %b to i32 + %mul = mul nsw i32 %conv0, %conv1 + %conv2 = sext i32 %mul to i64 + %add = add nsw i64 %conv2, %c + ret i64 %add +} + +define i64 @MACLongTest14(i32 %a, i32 %b, i64 %c) { +;CHECK-LABEL: MACLongTest14: +;CHECK-LE-NOT: smlaltt +;CHECK-BE-NOT: smlaltt +;CHECK-V6M-THUMB-NOT: smlaltt +;CHECK-V7M-THUMB-NOT: smlaltt +;CHECK-V6-THUMB2-NOT: asr +;CHECK-V6-THUMB2-NOT: asr +;CHECK-V6-THUMB2: smlaltt [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] +;CHECK-V6-THUMB2: mov r0, [[RDLO]] +;CHECK-V6-THUMB2: mov r1, [[RDHI]] +;CHECK-V7-THUMB-NOT: asr +;CHECK-V7-THUMB-NOT: asr +;CHECK-V7-THUMB: smlaltt [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] +;CHECK-V7-THUMB: mov r0, [[RDLO]] +;CHECK-V7-THUMB: mov r1, [[RDHI]] +;CHECK-V7-THUMB-BE-NOT: asr +;CHECK-V7-THUMB-BE-NOT: asr +;CHECK-V7-THUMB-BE: smlaltt [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] +;CHECK-V7-THUMB-BE: mov r0, [[RDHI]] +;CHECK-V7-THUMB-BE: mov r1, [[RDLO]] +;CHECK-V7EM-THUMB-NOT: asr +;CHECK-V7EM-THUMB-NOT: asr +;CHECK-V7EM-THUMB: smlaltt [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] +;CHECK-V7EM-THUMB: mov r0, [[RDLO]] +;CHECK-V7EM-THUMB: mov r1, [[RDHI]] + %conv0 = ashr i32 %a, 16 + %conv1 = ashr i32 %b, 16 + %mul = mul nsw i32 %conv1, %conv0 + %conv2 = sext i32 %mul to i64 + %add = add nsw i64 %conv2, %c + ret i64 %add +}