Index: lib/Target/ARM/ARMInstrInfo.td =================================================================== --- lib/Target/ARM/ARMInstrInfo.td +++ lib/Target/ARM/ARMInstrInfo.td @@ -426,23 +426,7 @@ // sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits. def sext_16_node : PatLeaf<(i32 GPR:$a), [{ - if (CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17) - return true; - - if (N->getOpcode() != ISD::SRA) - return false; - if (N->getOperand(0).getOpcode() != ISD::SHL) - return false; - - auto *ShiftVal = dyn_cast(N->getOperand(1)); - if (!ShiftVal || ShiftVal->getZExtValue() != 16) - return false; - - ShiftVal = dyn_cast(N->getOperand(0)->getOperand(1)); - if (!ShiftVal || ShiftVal->getZExtValue() != 16) - return false; - - return true; + return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17; }]>; /// Split a 32-bit immediate into two 16 bit parts. @@ -5818,6 +5802,9 @@ def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b), (SMULBB GPR:$a, GPR:$b)>, Sched<[WriteMUL32, ReadMUL, ReadMUL]>; +def : ARMV5TEPat<(mul sext_16_node:$a, (sext_inreg GPR:$b, i16)), + (SMULBB GPR:$a, GPR:$b)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))), (SMULBT GPR:$a, GPR:$b)>, Sched<[WriteMUL32, ReadMUL, ReadMUL]>; @@ -5829,6 +5816,10 @@ (SMLABB GPR:$a, GPR:$b, GPR:$acc)>, Sched<[WriteMUL32, ReadMUL, ReadMUL]>; def : ARMV5MOPat<(add GPR:$acc, + (mul sext_16_node:$a, (sext_inreg GPR:$b, i16))), + (SMLABB GPR:$a, GPR:$b, GPR:$acc)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; +def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))), (SMLABT GPR:$a, GPR:$b, GPR:$acc)>, Sched<[WriteMUL32, ReadMUL, ReadMUL]>; Index: lib/Target/ARM/ARMInstrThumb2.td =================================================================== --- lib/Target/ARM/ARMInstrThumb2.td +++ lib/Target/ARM/ARMInstrThumb2.td @@ -2750,6 +2750,8 @@ def : Thumb2DSPPat<(mul sext_16_node:$Rm, sext_16_node:$Rn), (t2SMULBB rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(mul sext_16_node:$Rm, (sext_inreg rGPR:$Rn, i16)), + (t2SMULBB rGPR:$Rm, rGPR:$Rn)>; def : Thumb2DSPPat<(mul sext_16_node:$Rn, (sra rGPR:$Rm, (i32 16))), (t2SMULBT rGPR:$Rn, rGPR:$Rm)>; def : Thumb2DSPPat<(mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm), @@ -2800,6 +2802,9 @@ def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn, sext_16_node:$Rm)), (t2SMLABB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>; +def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn, + (sext_inreg rGPR:$Rm, i16))), + (t2SMLABB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>; def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn, (sra rGPR:$Rm, (i32 16)))), (t2SMLABT rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>; Index: test/CodeGen/ARM/smul.ll =================================================================== --- test/CodeGen/ARM/smul.ll +++ test/CodeGen/ARM/smul.ll @@ -203,6 +203,7 @@ define i32 @f17(i32 %x, i32 %y) { ; CHECK-LABEL: f17: +; CHECK-NOT: sxth ; CHECK: smulbb ; CHECK-THUMBV6-NOT: smulbb %tmp1 = shl i32 %x, 16 @@ -215,6 +216,7 @@ define i32 @f18(i32 %a, i32 %x, i32 %y) { ; CHECK-LABEL: f18: +; CHECK-NOT: sxth ; CHECK: {{smlabt r0, r1, r2, r0|smlatb r0, r2, r1, r0}} ; CHECK-THUMBV6-NOT: {{smlabt|smlatb}} %tmp0 = shl i32 %x, 16 @@ -239,6 +241,7 @@ define i32 @f20(i32 %a, i32 %x, i32 %y) { ; CHECK-LABEL: f20: +; CHECK-NOT: sxth ; CHECK: smlabb ; CHECK-THUMBV6-NOT: smlabb %tmp1 = shl i32 %x, 16 @@ -267,6 +270,7 @@ define i32 @f22(i32 %a) { ; CHECK-LABEL: f22: +; CHECK-NOT: sxth ; CHECK: smulwb r0, r0, r1 ; CHECK-THUMBV6-NOT: smulwb %b = load i16, i16* @global_b, align 2 @@ -280,6 +284,7 @@ define i32 @f23(i32 %a, i32 %c) { ; CHECK-LABEL: f23: +; CHECK-NOT: sxth ; CHECK: smlawb r0, r0, r2, r1 ; CHECK-THUMBV6-NOT: smlawb %b = load i16, i16* @global_b, align 2 @@ -291,3 +296,77 @@ %add = add nsw i32 %conv5, %c ret i32 %add } + +; CHECK-LABEL: f24 +; CHECK-NOT: sxth +; CHECK: smulbb +define i32 @f24(i16* %a, i32* %b, i32* %c) { + %ld.0 = load i16, i16* %a, align 2 + %ld.1 = load i32, i32* %b, align 4 + %conv.0 = sext i16 %ld.0 to i32 + %shift = shl i32 %ld.1, 16 + %conv.1 = ashr i32 %shift, 16 + %mul.0 = mul i32 %conv.0, %conv.1 + store i32 %ld.1, i32* %c + ret i32 %mul.0 +} + +; CHECK-LABEL: f25 +; CHECK-NOT: sxth +; CHECK: smulbb +define i32 @f25(i16* %a, i32 %b, i32* %c) { + %ld.0 = load i16, i16* %a, align 2 + %conv.0 = sext i16 %ld.0 to i32 + %shift = shl i32 %b, 16 + %conv.1 = ashr i32 %shift, 16 + %mul.0 = mul i32 %conv.0, %conv.1 + store i32 %b, i32* %c + ret i32 %mul.0 +} + +; CHECK-LABEL: f26 +; CHECK-NOT: sxth +; CHECK: {{smulbt | smultb }} +define i32 @f26(i16* %a, i32 %b, i32* %c) { + %ld.0 = load i16, i16* %a, align 2 + %conv.0 = sext i16 %ld.0 to i32 + %conv.1 = ashr i32 %b, 16 + %mul.0 = mul i32 %conv.0, %conv.1 + store i32 %b, i32* %c + ret i32 %mul.0 +} + +; CHECK-LABEL: f27 +; CHECK-NOT: sxth +; CHECK: smulbb +; CHECK: {{smlabt | smlatb }} +define i32 @f27(i16* %a, i32* %b) { + %ld.0 = load i16, i16* %a, align 2 + %ld.1 = load i32, i32* %b, align 4 + %conv.0 = sext i16 %ld.0 to i32 + %shift = shl i32 %ld.1, 16 + %conv.1 = ashr i32 %shift, 16 + %conv.2 = ashr i32 %ld.1, 16 + %mul.0 = mul i32 %conv.0, %conv.1 + %mul.1 = mul i32 %conv.0, %conv.2 + %add = add i32 %mul.0, %mul.1 + ret i32 %add +} + +; CHECK-LABEL: f28 +; CHECK-NOT: sxth +; CHECK: smulbb +; CHECK: {{smlabt | smlatb }} +define i32 @f28(i16* %a, i32* %b) { + %ld.0 = load i16, i16* %a, align 2 + %ld.1 = load i32, i32* %b, align 4 + %conv.0 = sext i16 %ld.0 to i32 + %shift = shl i32 %ld.1, 16 + %conv.1 = ashr i32 %shift, 16 + %conv.2 = ashr i32 %ld.1, 16 + %mul.0 = mul i32 %conv.0, %conv.1 + %mul.1 = mul i32 %conv.2, %conv.0 + %add = add i32 %mul.0, %mul.1 + ret i32 %add +} +