Index: lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.td +++ lib/Target/AArch64/AArch64InstrInfo.td @@ -528,6 +528,12 @@ return (Imm & 0xffffffffULL) == static_cast(Imm); }]>; +def s64imm_32bit : ImmLeaf(Imm); + return Imm64 >= std::numeric_limits::min() && + Imm64 <= std::numeric_limits::max(); +}]>; + def trunc_imm : SDNodeXFormgetTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32); }]>; @@ -734,6 +740,40 @@ (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))), (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; + +def : Pat<(i64 (mul (sext GPR32:$Rn), (s64imm_32bit:$C))), + (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; +def : Pat<(i64 (mul (zext GPR32:$Rn), (i64imm_32bit:$C))), + (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; +def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C))), + (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), + (MOVi32imm (trunc_imm imm:$C)), XZR)>; + +def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))), + (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; +def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))), + (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; +def : Pat<(i64 (ineg (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)))), + (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), + (MOVi32imm (trunc_imm imm:$C)), XZR)>; + +def : Pat<(i64 (add (mul (sext GPR32:$Rn), (s64imm_32bit:$C)), GPR64:$Ra)), + (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; +def : Pat<(i64 (add (mul (zext GPR32:$Rn), (i64imm_32bit:$C)), GPR64:$Ra)), + (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; +def : Pat<(i64 (add (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)), + GPR64:$Ra)), + (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), + (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; + +def : Pat<(i64 (sub (mul (sext GPR32:$Rn), (s64imm_32bit:$C)), GPR64:$Ra)), + (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; +def : Pat<(i64 (sub (mul (zext GPR32:$Rn), (i64imm_32bit:$C)), GPR64:$Ra)), + (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; +def : Pat<(i64 (sub (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)), + GPR64:$Ra)), + (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), + (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; } // AddedComplexity = 5 def : MulAccumWAlias<"mul", MADDWrrr>; Index: test/CodeGen/AArch64/arm64-mul.ll =================================================================== --- test/CodeGen/AArch64/arm64-mul.ll +++ test/CodeGen/AArch64/arm64-mul.ll @@ -88,3 +88,55 @@ %tmp4 = sub i64 0, %tmp3 ret i64 %tmp4 } + +define i64 @t9(i32 %a) nounwind { +entry: +; CHECK-LABEL: t9: +; CHECK: umull {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp1 = zext i32 %a to i64 + %tmp2 = mul i64 %tmp1, 139968 + ret i64 %tmp2 +} + +; Check 64-bit multiplication is used for constants > 32 bits. +define i64 @t10(i32 %a) nounwind { +entry: +; CHECK-LABEL: t10: +; CHECK: mul {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + %tmp1 = sext i32 %a to i64 + %tmp2 = mul i64 %tmp1, 2147483650 ; = 2^31 + 2 + ret i64 %tmp2 +} + +; Check the sext_inreg case. +define i64 @t11(i64 %a) nounwind { +entry: +; CHECK-LABEL: t11: +; CHECK: smnegl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp1 = trunc i64 %a to i32 + %tmp2 = sext i32 %tmp1 to i64 + %tmp3 = mul i64 %tmp2, -2395238 + %tmp4 = sub i64 0, %tmp3 + ret i64 %tmp4 +} + +define i64 @t12(i64 %a, i64 %b) nounwind { +entry: +; CHECK-LABEL: t12: +; CHECK: smaddl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{x[0-9]+}} + %tmp1 = trunc i64 %a to i32 + %tmp2 = sext i32 %tmp1 to i64 + %tmp3 = mul i64 %tmp2, -34567890 + %tmp4 = add i64 %b, %tmp3 + ret i64 %tmp4 +} + +define i64 @t13(i32 %a, i64 %b) nounwind { +entry: +; CHECK-LABEL: t13: +; CHECK: umsubl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{x[0-9]+}} + %tmp1 = zext i32 %a to i64 + %tmp3 = mul i64 %tmp1, 12345678 + %tmp4 = sub i64 %tmp3, %b + ret i64 %tmp4 +}