Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1921,6 +1921,22 @@ (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; } // AddedComplexity = 5 +// Mul with enough sign-bits. +def smullwithsignbits : PatFrag<(ops node:$l, node:$r), (mul node:$l, node:$r), [{ + return CurDAG->ComputeNumSignBits(N->getOperand(0)) > 32 && + CurDAG->ComputeNumSignBits(N->getOperand(1)) > 32; +}]>; +let AddedComplexity = 5 in +def : Pat<(i64 (smullwithsignbits GPR64:$Rn, GPR64:$Rm)), + (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; + +def smullwithonesignbits : PatFrag<(ops node:$l, node:$r), (mul node:$l, node:$r), [{ + return CurDAG->ComputeNumSignBits(N->getOperand(0)) > 32; +}]>; +let AddedComplexity = 5 in +def : Pat<(i64 (smullwithonesignbits GPR64:$Rn, (sext GPR32:$Rm))), + (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; + def : MulAccumWAlias<"mul", MADDWrrr>; def : MulAccumXAlias<"mul", MADDXrrr>; def : MulAccumWAlias<"mneg", MSUBWrrr>; Index: llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll =================================================================== --- llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll +++ llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll @@ -76,3 +76,220 @@ %mul = mul nsw i64 %conv, %shr ret i64 %mul } + +define i64 @smull_ldrsb_b(i8* %x0, i8 %x1) { +; CHECK-LABEL: smull_ldrsb_b: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsb x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxtb x9, w1 +; CHECK-NEXT: smull x0, w8, w9 +; CHECK-NEXT: ret +entry: + %ext64 = load i8, i8* %x0 + %sext = sext i8 %ext64 to i64 + %sext4 = sext i8 %x1 to i64 + %mul = mul nsw i64 %sext, %sext4 + ret i64 %mul +} + +define i64 @smull_ldrsb_h(i8* %x0, i16 %x1) { +; CHECK-LABEL: smull_ldrsb_h: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsb x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxth x9, w1 +; CHECK-NEXT: smull x0, w8, w9 +; CHECK-NEXT: ret +entry: + %ext64 = load i8, i8* %x0 + %sext = sext i8 %ext64 to i64 + %sext4 = sext i16 %x1 to i64 + %mul = mul nsw i64 %sext, %sext4 + ret i64 %mul +} + +define i64 @smull_ldrsb_w(i8* %x0, i32 %x1) { +; CHECK-LABEL: smull_ldrsb_w: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsb x8, [x0] +; CHECK-NEXT: smull x0, w8, w1 +; CHECK-NEXT: ret +entry: + %ext64 = load i8, i8* %x0 + %sext = sext i8 %ext64 to i64 + %sext4 = sext i32 %x1 to i64 + %mul = mul nsw i64 %sext, %sext4 + ret i64 %mul +} + +define i64 @smull_ldrsh_b(i16* %x0, i8 %x1) { +; CHECK-LABEL: smull_ldrsh_b: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsh x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxtb x9, w1 +; CHECK-NEXT: smull x0, w8, w9 +; CHECK-NEXT: ret +entry: + %ext64 = load i16, i16* %x0 + %sext = sext i16 %ext64 to i64 + %sext4 = sext i8 %x1 to i64 + %mul = mul nsw i64 %sext, %sext4 + ret i64 %mul +} + +define i64 @smull_ldrsh_h(i16* %x0, i16 %x1) { +; CHECK-LABEL: smull_ldrsh_h: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsh x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxth x9, w1 +; CHECK-NEXT: smull x0, w8, w9 +; CHECK-NEXT: ret +entry: + %ext64 = load i16, i16* %x0 + %sext = sext i16 %ext64 to i64 + %sext4 = sext i16 %x1 to i64 + %mul = mul nsw i64 %sext, %sext4 + ret i64 %mul +} + +define i64 @smull_ldrsh_w(i16* %x0, i32 %x1) { +; CHECK-LABEL: smull_ldrsh_w: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsh x8, [x0] +; CHECK-NEXT: smull x0, w8, w1 +; CHECK-NEXT: ret +entry: + %ext64 = load i16, i16* %x0 + %sext = sext i16 %ext64 to i64 + %sext4 = sext i32 %x1 to i64 + %mul = mul nsw i64 %sext, %sext4 + ret i64 %mul +} + +define i64 @smull_ldrsw_b(i32* %x0, i8 %x1) { +; CHECK-LABEL: smull_ldrsw_b: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsw x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxtb x9, w1 +; CHECK-NEXT: smull x0, w8, w9 +; CHECK-NEXT: ret +entry: + %ext64 = load i32, i32* %x0 + %sext = sext i32 %ext64 to i64 + %sext4 = sext i8 %x1 to i64 + %mul = mul nsw i64 %sext, %sext4 + ret i64 %mul +} + +define i64 @smull_ldrsw_h(i32* %x0, i16 %x1) { +; CHECK-LABEL: smull_ldrsw_h: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsw x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxth x9, w1 +; CHECK-NEXT: smull x0, w8, w9 +; CHECK-NEXT: ret +entry: + %ext64 = load i32, i32* %x0 + %sext = sext i32 %ext64 to i64 + %sext4 = sext i16 %x1 to i64 + %mul = mul nsw i64 %sext, %sext4 + ret i64 %mul +} + +define i64 @smull_ldrsw_w(i32* %x0, i32 %x1) { +; CHECK-LABEL: smull_ldrsw_w: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsw x8, [x0] +; CHECK-NEXT: smull x0, w8, w1 +; CHECK-NEXT: ret +entry: + %ext64 = load i32, i32* %x0 + %sext = sext i32 %ext64 to i64 + %sext4 = sext i32 %x1 to i64 + %mul = mul nsw i64 %sext, %sext4 + ret i64 %mul +} + +define i64 @smull_sext_bb(i8 %x0, i8 %x1) { +; CHECK-LABEL: smull_sext_bb: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sxtb x8, w0 +; CHECK-NEXT: sxtb x9, w1 +; CHECK-NEXT: smull x0, w8, w9 +; CHECK-NEXT: ret +entry: + %sext = sext i8 %x0 to i64 + %sext4 = sext i8 %x1 to i64 + %mul = mul nsw i64 %sext, %sext4 + ret i64 %mul +} + +define i64 @smull_ldrsw_shift(i32* %x0, i64 %x1) { +; CHECK-LABEL: smull_ldrsw_shift: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsw x8, [x0] +; CHECK-NEXT: sxtw x9, w1 +; CHECK-NEXT: smull x0, w8, w9 +; CHECK-NEXT: ret +entry: + %ext64 = load i32, i32* %x0 + %sext = sext i32 %ext64 to i64 + %shl = shl i64 %x1, 32 + %shr = ashr exact i64 %shl, 32 + %mul = mul nsw i64 %sext, %shr + ret i64 %mul +} + +define i64 @smull_ldrsh_zextw(i16* %x0, i32 %x1) { +; CHECK-LABEL: smull_ldrsh_zextw: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsh x8, [x0] +; CHECK-NEXT: mov w9, w1 +; CHECK-NEXT: mul x0, x8, x9 +; CHECK-NEXT: ret +entry: + %ext64 = load i16, i16* %x0 + %sext = sext i16 %ext64 to i64 + %zext = zext i32 %x1 to i64 + %mul = mul nsw i64 %sext, %zext + ret i64 %mul +} + +define i64 @smull_ldrsw_zexth(i32* %x0, i16 %x1) { +; CHECK-LABEL: smull_ldrsw_zexth: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsw x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: and x9, x1, #0xffff +; CHECK-NEXT: smull x0, w8, w9 +; CHECK-NEXT: ret +entry: + %ext64 = load i32, i32* %x0 + %sext = sext i32 %ext64 to i64 + %zext = zext i16 %x1 to i64 + %mul = mul nsw i64 %sext, %zext + ret i64 %mul +} + +define i64 @smull_ldrsw_zextb(i32* %x0, i8 %x1) { +; CHECK-LABEL: smull_ldrsw_zextb: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsw x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: and x9, x1, #0xff +; CHECK-NEXT: smull x0, w8, w9 +; CHECK-NEXT: ret +entry: + %ext64 = load i32, i32* %x0 + %sext = sext i32 %ext64 to i64 + %zext = zext i8 %x1 to i64 + %mul = mul nsw i64 %sext, %zext + ret i64 %mul +} Index: llvm/test/CodeGen/AArch64/aarch64-smull.ll =================================================================== --- llvm/test/CodeGen/AArch64/aarch64-smull.ll +++ llvm/test/CodeGen/AArch64/aarch64-smull.ll @@ -137,8 +137,8 @@ ; CHECK-NEXT: sshll v0.2d, v0.2s, #0 ; CHECK-NEXT: fmov x9, d0 ; CHECK-NEXT: mov x10, v0.d[1] -; CHECK-NEXT: mul x8, x8, x9 -; CHECK-NEXT: mul x9, x11, x10 +; CHECK-NEXT: smull x8, w8, w9 +; CHECK-NEXT: smull x9, w11, w10 ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov v0.d[1], x9 ; CHECK-NEXT: ret @@ -162,8 +162,8 @@ ; CHECK-NEXT: fmov x10, d0 ; CHECK-NEXT: mov x8, v1.d[1] ; CHECK-NEXT: mov x11, v0.d[1] -; CHECK-NEXT: mul x9, x10, x9 -; CHECK-NEXT: mul x8, x11, x8 +; CHECK-NEXT: smull x9, w10, w9 +; CHECK-NEXT: smull x8, w11, w8 ; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/arm64-mul.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-mul.ll +++ llvm/test/CodeGen/AArch64/arm64-mul.ll @@ -126,10 +126,8 @@ ; CHECK-LABEL: t10: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov w8, #2 -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x9, w0 ; CHECK-NEXT: movk w8, #32768, lsl #16 -; CHECK-NEXT: mul x0, x9, x8 +; CHECK-NEXT: smull x0, w8, w0 ; CHECK-NEXT: ret entry: %tmp1 = sext i32 %a to i64