Index: llvm/trunk/lib/Target/ARM/ARMInstrFormats.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrFormats.td +++ llvm/trunk/lib/Target/ARM/ARMInstrFormats.td @@ -999,6 +999,12 @@ class VFPNoNEONPat : Pat { list Predicates = [HasVFP2, DontUseNEONForFP]; } +class Thumb2DSPPat : Pat { + list Predicates = [IsThumb2, HasDSP]; +} +class Thumb2DSPMulPat : Pat { + list Predicates = [IsThumb2, UseMulOps, HasDSP]; +} //===----------------------------------------------------------------------===// // Thumb Instruction Format Definitions. // Index: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td @@ -358,7 +358,23 @@ // sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits. def sext_16_node : PatLeaf<(i32 GPR:$a), [{ - return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17; + if (CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17) + return true; + + if (N->getOpcode() != ISD::SRA) + return false; + if (N->getOperand(0).getOpcode() != ISD::SHL) + return false; + + auto *ShiftVal = dyn_cast(N->getOperand(1)); + if (!ShiftVal || ShiftVal->getZExtValue() != 16) + return false; + + ShiftVal = dyn_cast(N->getOperand(0)->getOperand(1)); + if (!ShiftVal || ShiftVal->getZExtValue() != 16) + return false; + + return true; }]>; /// Split a 32-bit immediate into two 16 bit parts. @@ -5492,45 +5508,22 @@ def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>; // smul* and smla* -def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)), - (sra (shl GPR:$b, (i32 16)), (i32 16))), - (SMULBB GPR:$a, GPR:$b)>; def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b), (SMULBB GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)), - (sra GPR:$b, (i32 16))), - (SMULBT GPR:$a, GPR:$b)>; def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))), (SMULBT GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), - (sra (shl GPR:$b, (i32 16)), (i32 16))), - (SMULTB GPR:$a, GPR:$b)>; def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b), (SMULTB GPR:$a, GPR:$b)>; - -def : ARMV5MOPat<(add GPR:$acc, - (mul (sra (shl GPR:$a, (i32 16)), (i32 16)), - (sra (shl GPR:$b, (i32 16)), (i32 16)))), - (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, sext_16_node:$b)), (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; def : ARMV5MOPat<(add GPR:$acc, - (mul (sra (shl GPR:$a, (i32 16)), (i32 16)), - (sra GPR:$b, (i32 16)))), - (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))), (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; def : ARMV5MOPat<(add GPR:$acc, - (mul (sra GPR:$a, (i32 16)), - (sra (shl GPR:$b, (i32 16)), (i32 16)))), - (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5MOPat<(add GPR:$acc, (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)), (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; - // Pre-v7 uses MCR for synchronization barriers. def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>, Requires<[IsARM, HasV6]>; Index: llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td +++ llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td @@ -2640,7 +2640,15 @@ def t2SMULWB : T2ThreeRegSMUL<0b011, 0b00, "smulwb", []>; def t2SMULWT : T2ThreeRegSMUL<0b011, 0b01, "smulwt", []>; -class T2FourRegSMLA op22_20, bits<2> op5_4, string opc, list pattern> +def : Thumb2DSPPat<(mul sext_16_node:$Rm, sext_16_node:$Rn), + (t2SMULBB rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(mul sext_16_node:$Rn, (sra rGPR:$Rm, (i32 16))), + (t2SMULBT rGPR:$Rn, rGPR:$Rm)>; +def : Thumb2DSPPat<(mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm), + (t2SMULTB rGPR:$Rn, rGPR:$Rm)>; + +class T2FourRegSMLA op22_20, bits<2> op5_4, string opc, + list pattern> : T2FourReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMUL16, opc, "\t$Rd, $Rn, $Rm, $Ra", pattern>, Requires<[IsThumb2, HasDSP, UseMulOps]> { @@ -2667,6 +2675,15 @@ def t2SMLAWB : T2FourRegSMLA<0b011, 0b00, "smlawb", []>; def t2SMLAWT : T2FourRegSMLA<0b011, 0b01, "smlawt", []>; +def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn, sext_16_node:$Rm)), + (t2SMLABB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>; +def : Thumb2DSPMulPat<(add rGPR:$Ra, + (mul sext_16_node:$Rn, (sra rGPR:$Rm, (i32 16)))), + (t2SMLABT rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>; +def : Thumb2DSPMulPat<(add rGPR:$Ra, + (mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm)), + (t2SMLATB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>; + class T2SMLAL op22_20, bits<4> op7_4, string opc, list pattern> : T2FourReg_mac<1, op22_20, op7_4, (outs rGPR:$Ra, rGPR:$Rd), Index: llvm/trunk/test/CodeGen/ARM/smul.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/smul.ll +++ llvm/trunk/test/CodeGen/ARM/smul.ll @@ -1,43 +1,46 @@ ; RUN: llc -mtriple=arm-eabi -mcpu=generic %s -o /dev/null ; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s ; RUN: llc -mtriple=thumb--none-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv6t2-none-eabi %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv6-none-eabi %s -o - | FileCheck %s -check-prefix=CHECK-THUMBV6 -@x = weak global i16 0 ; [#uses=1] -@y = weak global i16 0 ; [#uses=0] - -define i32 @f1(i32 %y) { +define i32 @f1(i16 %x, i32 %y) { ; CHECK-LABEL: f1: -; CHECK: smulbt - %tmp = load i16, i16* @x ; [#uses=1] - %tmp1 = add i16 %tmp, 2 ; [#uses=1] - %tmp2 = sext i16 %tmp1 to i32 ; [#uses=1] - %tmp3 = ashr i32 %y, 16 ; [#uses=1] - %tmp4 = mul i32 %tmp2, %tmp3 ; [#uses=1] - ret i32 %tmp4 +; CHECK-NOT: sxth +; CHECK: {{smulbt r0, r0, r1|smultb r0, r1, r0}} +; CHECK-THUMBV6-NOT: {{smulbt|smultb}} + %tmp1 = sext i16 %x to i32 + %tmp2 = ashr i32 %y, 16 + %tmp3 = mul i32 %tmp2, %tmp1 + ret i32 %tmp3 } define i32 @f2(i32 %x, i32 %y) { ; CHECK-LABEL: f2: ; CHECK: smultt - %tmp1 = ashr i32 %x, 16 ; [#uses=1] - %tmp3 = ashr i32 %y, 16 ; [#uses=1] - %tmp4 = mul i32 %tmp3, %tmp1 ; [#uses=1] +; CHECK-THUMBV6-NOT: smultt + %tmp1 = ashr i32 %x, 16 + %tmp3 = ashr i32 %y, 16 + %tmp4 = mul i32 %tmp3, %tmp1 ret i32 %tmp4 } define i32 @f3(i32 %a, i16 %x, i32 %y) { ; CHECK-LABEL: f3: -; CHECK: smlabt - %tmp = sext i16 %x to i32 ; [#uses=1] - %tmp2 = ashr i32 %y, 16 ; [#uses=1] - %tmp3 = mul i32 %tmp2, %tmp ; [#uses=1] - %tmp5 = add i32 %tmp3, %a ; [#uses=1] +; CHECK-NOT: sxth +; CHECK: {{smlabt r0, r1, r2, r0|smlatb r0, r2, r1, r0}} +; CHECK-THUMBV6-NOT: {{smlabt|smlatb}} + %tmp = sext i16 %x to i32 + %tmp2 = ashr i32 %y, 16 + %tmp3 = mul i32 %tmp2, %tmp + %tmp5 = add i32 %tmp3, %a ret i32 %tmp5 } define i32 @f4(i32 %a, i32 %x, i32 %y) { ; CHECK-LABEL: f4: ; CHECK: smlatt +; CHECK-THUMBV6-NOT: smlatt %tmp1 = ashr i32 %x, 16 %tmp3 = ashr i32 %y, 16 %tmp4 = mul i32 %tmp3, %tmp1 @@ -47,7 +50,9 @@ define i32 @f5(i32 %a, i16 %x, i16 %y) { ; CHECK-LABEL: f5: +; CHECK-NOT: sxth ; CHECK: smlabb +; CHECK-THUMBV6-NOT: smlabb %tmp1 = sext i16 %x to i32 %tmp3 = sext i16 %y to i32 %tmp4 = mul i32 %tmp3, %tmp1 @@ -55,19 +60,22 @@ ret i32 %tmp5 } -define i32 @f6(i32 %a, i16 %x, i32 %y) { +define i32 @f6(i32 %a, i32 %x, i16 %y) { ; CHECK-LABEL: f6: -; CHECK: smlabt - %tmp1 = sext i16 %x to i32 - %tmp3 = ashr i32 %y, 16 - %tmp4 = mul i32 %tmp3, %tmp1 - %tmp5 = add i32 %tmp4, %a +; CHECK-NOT: sxth +; CHECK: {{smlatb r0, r1, r2, r0|smlabt r0, r2, r1, r0}} +; CHECK-THUMBV6-NOT: {{smlatb|smlabt}} + %tmp1 = sext i16 %y to i32 + %tmp2 = ashr i32 %x, 16 + %tmp3 = mul i32 %tmp2, %tmp1 + %tmp5 = add i32 %tmp3, %a ret i32 %tmp5 } define i32 @f7(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: f7: -; CHECK: smlawb +; CHECK: smlawb r0, r0, r1, r2 +; CHECK-THUMBV6-NOT: smlawb %shl = shl i32 %b, 16 %shr = ashr exact i32 %shl, 16 %conv = sext i32 %a to i64 @@ -81,7 +89,9 @@ define i32 @f8(i32 %a, i16 signext %b, i32 %c) { ; CHECK-LABEL: f8: -; CHECK: smlawb +; CHECK-NOT: sxth +; CHECK: smlawb r0, r0, r1, r2 +; CHECK-THUMBV6-NOT: smlawb %conv = sext i32 %a to i64 %conv1 = sext i16 %b to i64 %mul = mul nsw i64 %conv1, %conv @@ -93,7 +103,8 @@ define i32 @f9(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: f9: -; CHECK: smlawt +; CHECK: smlawt r0, r0, r1, r2 +; CHECK-THUMBV6-NOT: smlawt %conv = sext i32 %a to i64 %shr = ashr i32 %b, 16 %conv1 = sext i32 %shr to i64 @@ -104,9 +115,10 @@ ret i32 %add } -define i32 @f10(i32 %a, i32 %b, i32 %c) { +define i32 @f10(i32 %a, i32 %b) { ; CHECK-LABEL: f10: -; CHECK: smulwb +; CHECK: smulwb r0, r0, r1 +; CHECK-THUMBV6-NOT: smulwb %shl = shl i32 %b, 16 %shr = ashr exact i32 %shl, 16 %conv = sext i32 %a to i64 @@ -117,9 +129,11 @@ ret i32 %conv4 } -define i32 @f11(i32 %a, i16 signext %b, i32 %c) { +define i32 @f11(i32 %a, i16 signext %b) { ; CHECK-LABEL: f11: -; CHECK: smulwb +; CHECK-NOT: sxth +; CHECK: smulwb r0, r0, r1 +; CHECK-THUMBV6-NOT: smulwb %conv = sext i32 %a to i64 %conv1 = sext i16 %b to i64 %mul = mul nsw i64 %conv1, %conv @@ -128,9 +142,10 @@ ret i32 %conv2 } -define i32 @f12(i32 %a, i32 %b, i32 %c) { +define i32 @f12(i32 %a, i32 %b) { ; CHECK-LABEL: f12: -; CHECK: smulwt +; CHECK: smulwt r0, r0, r1 +; CHECK-THUMBV6-NOT: smulwt %conv = sext i32 %a to i64 %shr = ashr i32 %b, 16 %conv1 = sext i32 %shr to i64 @@ -139,3 +154,111 @@ %conv3 = trunc i64 %shr25 to i32 ret i32 %conv3 } + +define i32 @f13(i32 %x, i16 %y) { +; CHECK-LABEL: f13: +; CHECK-NOT: sxth +; CHECK: {{smultb r0, r0, r1|smulbt r0, r1, r0}} +; CHECK-THUMBV6-NOT: {{smultb|smulbt}} + %tmp1 = sext i16 %y to i32 + %tmp2 = ashr i32 %x, 16 + %tmp3 = mul i32 %tmp2, %tmp1 + ret i32 %tmp3 +} + +define i32 @f14(i32 %x, i32 %y) { +; CHECK-LABEL: f14: +; CHECK-NOT: sxth +; CHECK: {{smultb r0, r0, r1|smulbt r0, r1, r0}} +; CHECK-THUMBV6-NOT: {{smultb|smulbt}} + %tmp1 = shl i32 %y, 16 + %tmp2 = ashr i32 %tmp1, 16 + %tmp3 = ashr i32 %x, 16 + %tmp4 = mul i32 %tmp3, %tmp2 + ret i32 %tmp4 +} + +define i32 @f15(i32 %x, i32 %y) { +; CHECK-LABEL: f15: +; CHECK-NOT: sxth +; CHECK: {{smulbt r0, r0, r1|smultb r0, r1, r0}} +; CHECK-THUMBV6-NOT: {{smulbt|smultb}} + %tmp1 = shl i32 %x, 16 + %tmp2 = ashr i32 %tmp1, 16 + %tmp3 = ashr i32 %y, 16 + %tmp4 = mul i32 %tmp2, %tmp3 + ret i32 %tmp4 +} + +define i32 @f16(i16 %x, i16 %y) { +; CHECK-LABEL: f16: +; CHECK-NOT: sxth +; CHECK: smulbb +; CHECK-THUMBV6-NOT: smulbb + %tmp1 = sext i16 %x to i32 + %tmp2 = sext i16 %x to i32 + %tmp3 = mul i32 %tmp1, %tmp2 + ret i32 %tmp3 +} + +define i32 @f17(i32 %x, i32 %y) { +; CHECK-LABEL: f17: +; CHECK: smulbb +; CHECK-THUMBV6-NOT: smulbb + %tmp1 = shl i32 %x, 16 + %tmp2 = shl i32 %y, 16 + %tmp3 = ashr i32 %tmp1, 16 + %tmp4 = ashr i32 %tmp2, 16 + %tmp5 = mul i32 %tmp3, %tmp4 + ret i32 %tmp5 +} + +define i32 @f18(i32 %a, i32 %x, i32 %y) { +; CHECK-LABEL: f18: +; CHECK: {{smlabt r0, r1, r2, r0|smlatb r0, r2, r1, r0}} +; CHECK-THUMBV6-NOT: {{smlabt|smlatb}} + %tmp0 = shl i32 %x, 16 + %tmp1 = ashr i32 %tmp0, 16 + %tmp2 = ashr i32 %y, 16 + %tmp3 = mul i32 %tmp2, %tmp1 + %tmp5 = add i32 %tmp3, %a + ret i32 %tmp5 +} + +define i32 @f19(i32 %a, i32 %x, i32 %y) { +; CHECK-LABEL: f19: +; CHECK: {{smlatb r0, r1, r2, r0|smlabt r0, r2, r1, r0}} +; CHECK-THUMBV6-NOT: {{smlatb|smlabt}} + %tmp0 = shl i32 %y, 16 + %tmp1 = ashr i32 %tmp0, 16 + %tmp2 = ashr i32 %x, 16 + %tmp3 = mul i32 %tmp2, %tmp1 + %tmp5 = add i32 %tmp3, %a + ret i32 %tmp5 +} + +define i32 @f20(i32 %a, i32 %x, i32 %y) { +; CHECK-LABEL: f20: +; CHECK: smlabb +; CHECK-THUMBV6-NOT: smlabb + %tmp1 = shl i32 %x, 16 + %tmp2 = ashr i32 %tmp1, 16 + %tmp3 = shl i32 %y, 16 + %tmp4 = ashr i32 %tmp3, 16 + %tmp5 = mul i32 %tmp2, %tmp4 + %tmp6 = add i32 %tmp5, %a + ret i32 %tmp6 +} + +define i32 @f21(i32 %a, i32 %x, i16 %y) { +; CHECK-LABEL: f21 +; CHECK-NOT: sxth +; CHECK: smlabb +; CHECK-THUMBV6-NOT: smlabb + %tmp1 = shl i32 %x, 16 + %tmp2 = ashr i32 %tmp1, 16 + %tmp3 = sext i16 %y to i32 + %tmp4 = mul i32 %tmp2, %tmp3 + %tmp5 = add i32 %a, %tmp4 + ret i32 %tmp5 +}