diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -795,6 +795,12 @@ }]; } +// Match mul with enough sign-bits. Can be reduced to a smaller mul operand. +def smullwithsignbits : PatFrag<(ops node:$l, node:$r), (mul node:$l, node:$r), [{ + return CurDAG->ComputeNumSignBits(N->getOperand(0)) > 32 && + CurDAG->ComputeNumSignBits(N->getOperand(1)) > 32; +}]>; + //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// @@ -1925,6 +1931,26 @@ (s64imm_32bit:$C)))), (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; + +def : Pat<(i64 (smullwithsignbits GPR64:$Rn, GPR64:$Rm)), + (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; +def : Pat<(i64 (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm))), + (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; + +def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, GPR64:$Rm), GPR64:$Ra)), + (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; +def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)), GPR64:$Ra)), + (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; + +def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, GPR64:$Rm))), + (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; +def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))), + (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; + +def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, GPR64:$Rm))), + (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; +def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))), + (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; } // AddedComplexity = 5 def : MulAccumWAlias<"mul", MADDWrrr>; diff --git a/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll b/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll --- a/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll @@ -76,3 +76,831 @@ %mul = mul nsw i64 %conv, %shr ret i64 %mul } + +define i64 @smull_ldrsb_b(i8* %x0, i8 %x1) { +; CHECK-LABEL: smull_ldrsb_b: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsb x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxtb x9, w1 +; CHECK-NEXT: smull x0, w8, w9 +; CHECK-NEXT: ret +entry: + %ext64 = load i8, i8* %x0 + %sext = sext i8 %ext64 to i64 + %sext4 = sext i8 %x1 to i64 + %mul = mul i64 %sext, %sext4 + ret i64 %mul +} + +define i64 @smull_ldrsb_b_commuted(i8* %x0, i8 %x1) { +; CHECK-LABEL: smull_ldrsb_b_commuted: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsb x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxtb x9, w1 +; CHECK-NEXT: smull x0, w9, w8 +; CHECK-NEXT: ret +entry: + %ext64 = load i8, i8* %x0 + %sext = sext i8 %ext64 to i64 + %sext4 = sext i8 %x1 to i64 + %mul = mul i64 %sext4, %sext + ret i64 %mul +} + +define i64 @smull_ldrsb_h(i8* %x0, i16 %x1) { +; CHECK-LABEL: smull_ldrsb_h: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsb x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxth x9, w1 +; CHECK-NEXT: smull x0, w8, w9 +; CHECK-NEXT: ret +entry: + %ext64 = load i8, i8* %x0 + %sext = sext i8 %ext64 to i64 + %sext4 = sext i16 %x1 to i64 + %mul = mul i64 %sext, %sext4 + ret i64 %mul +} + +define i64 @smull_ldrsb_w(i8* %x0, i32 %x1) { +; CHECK-LABEL: smull_ldrsb_w: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsb x8, [x0] +; CHECK-NEXT: smull x0, w8, w1 +; CHECK-NEXT: ret +entry: + %ext64 = load i8, i8* %x0 + %sext = sext i8 %ext64 to i64 + %sext4 = sext i32 %x1 to i64 + %mul = mul i64 %sext, %sext4 + ret i64 %mul +} + +define i64 @smull_ldrsh_b(i16* %x0, i8 %x1) { +; CHECK-LABEL: smull_ldrsh_b: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsh x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxtb x9, w1 +; CHECK-NEXT: smull x0, w8, w9 +; CHECK-NEXT: ret +entry: + %ext64 = load i16, i16* %x0 + %sext = sext i16 %ext64 to i64 + %sext4 = sext i8 %x1 to i64 + %mul = mul i64 %sext, %sext4 + ret i64 %mul +} + +define i64 @smull_ldrsh_h(i16* %x0, i16 %x1) { +; CHECK-LABEL: smull_ldrsh_h: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsh x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxth x9, w1 +; CHECK-NEXT: smull x0, w8, w9 +; CHECK-NEXT: ret +entry: + %ext64 = load i16, i16* %x0 + %sext = sext i16 %ext64 to i64 + %sext4 = sext i16 %x1 to i64 + %mul = mul i64 %sext, %sext4 + ret i64 %mul +} + +define i64 @smull_ldrsh_h_commuted(i16* %x0, i16 %x1) { +; CHECK-LABEL: smull_ldrsh_h_commuted: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsh x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxth x9, w1 +; CHECK-NEXT: smull x0, w9, w8 +; CHECK-NEXT: ret +entry: + %ext64 = load i16, i16* %x0 + %sext = sext i16 %ext64 to i64 + %sext4 = sext i16 %x1 to i64 + %mul = mul i64 %sext4, %sext + ret i64 %mul +} + +define i64 @smull_ldrsh_w(i16* %x0, i32 %x1) { +; CHECK-LABEL: smull_ldrsh_w: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsh x8, [x0] +; CHECK-NEXT: smull x0, w8, w1 +; CHECK-NEXT: ret +entry: + %ext64 = load i16, i16* %x0 + %sext = sext i16 %ext64 to i64 + %sext4 = sext i32 %x1 to i64 + %mul = mul i64 %sext, %sext4 + ret i64 %mul +} + +define i64 @smull_ldrsw_b(i32* %x0, i8 %x1) { +; CHECK-LABEL: smull_ldrsw_b: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsw x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxtb x9, w1 +; CHECK-NEXT: smull x0, w8, w9 +; CHECK-NEXT: ret +entry: + %ext64 = load i32, i32* %x0 + %sext = sext i32 %ext64 to i64 + %sext4 = sext i8 %x1 to i64 + %mul = mul i64 %sext, %sext4 + ret i64 %mul +} + +define i64 @smull_ldrsw_h(i32* %x0, i16 %x1) { +; CHECK-LABEL: smull_ldrsw_h: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsw x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxth x9, w1 +; CHECK-NEXT: smull x0, w8, w9 +; CHECK-NEXT: ret +entry: + %ext64 = load i32, i32* %x0 + %sext = sext i32 %ext64 to i64 + %sext4 = sext i16 %x1 to i64 + %mul = mul i64 %sext, %sext4 + ret i64 %mul +} + +define i64 @smull_ldrsw_w(i32* %x0, i32 %x1) { +; CHECK-LABEL: smull_ldrsw_w: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsw x8, [x0] +; CHECK-NEXT: smull x0, w8, w1 +; CHECK-NEXT: ret +entry: + %ext64 = load i32, i32* %x0 + %sext = sext i32 %ext64 to i64 + %sext4 = sext i32 %x1 to i64 + %mul = mul i64 %sext, %sext4 + ret i64 %mul +} + +define i64 @smull_ldrsw_w_commuted(i32* %x0, i32 %x1) { +; CHECK-LABEL: smull_ldrsw_w_commuted: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsw x8, [x0] +; CHECK-NEXT: smull x0, w8, w1 +; CHECK-NEXT: ret +entry: + %ext64 = load i32, i32* %x0 + %sext = sext i32 %ext64 to i64 + %sext4 = sext i32 %x1 to i64 + %mul = mul i64 %sext4, %sext + ret i64 %mul +} + +define i64 @smull_sext_bb(i8 %x0, i8 %x1) { +; CHECK-LABEL: smull_sext_bb: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sxtb x8, w0 +; CHECK-NEXT: sxtb x9, w1 +; CHECK-NEXT: smull x0, w8, w9 +; CHECK-NEXT: ret +entry: + %sext = sext i8 %x0 to i64 + %sext4 = sext i8 %x1 to i64 + %mul = mul i64 %sext, %sext4 + ret i64 %mul +} + +define i64 @smull_ldrsw_shift(i32* %x0, i64 %x1) { +; CHECK-LABEL: smull_ldrsw_shift: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsw x8, [x0] +; CHECK-NEXT: sxtw x9, w1 +; CHECK-NEXT: smull x0, w8, w9 +; CHECK-NEXT: ret +entry: + %ext64 = load i32, i32* %x0 + %sext = sext i32 %ext64 to i64 + %shl = shl i64 %x1, 32 + %shr = ashr exact i64 %shl, 32 + %mul = mul i64 %sext, %shr + ret i64 %mul +} + +define i64 @smull_ldrsh_zextw(i16* %x0, i32 %x1) { +; CHECK-LABEL: smull_ldrsh_zextw: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsh x8, [x0] +; CHECK-NEXT: mov w9, w1 +; CHECK-NEXT: mul x0, x8, x9 +; CHECK-NEXT: ret +entry: + %ext64 = load i16, i16* %x0 + %sext = sext i16 %ext64 to i64 + %zext = zext i32 %x1 to i64 + %mul = mul i64 %sext, %zext + ret i64 %mul +} + +define i64 @smull_ldrsw_zexth(i32* %x0, i16 %x1) { +; CHECK-LABEL: smull_ldrsw_zexth: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsw x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: and x9, x1, #0xffff +; CHECK-NEXT: smull x0, w8, w9 +; CHECK-NEXT: ret +entry: + %ext64 = load i32, i32* %x0 + %sext = sext i32 %ext64 to i64 + %zext = zext i16 %x1 to i64 + %mul = mul i64 %sext, %zext + ret i64 %mul +} + +define i64 @smull_ldrsw_zextb(i32* %x0, i8 %x1) { +; CHECK-LABEL: smull_ldrsw_zextb: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsw x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: and x9, x1, #0xff +; CHECK-NEXT: smull x0, w8, w9 +; CHECK-NEXT: ret +entry: + %ext64 = load i32, i32* %x0 + %sext = sext i32 %ext64 to i64 + %zext = zext i8 %x1 to i64 + %mul = mul i64 %sext, %zext + ret i64 %mul +} + +define i64 @smull_ldrsw_zextb_commuted(i32* %x0, i8 %x1) { +; CHECK-LABEL: smull_ldrsw_zextb_commuted: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsw x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: and x9, x1, #0xff +; CHECK-NEXT: smull x0, w9, w8 +; CHECK-NEXT: ret +entry: + %ext64 = load i32, i32* %x0 + %sext = sext i32 %ext64 to i64 + %zext = zext i8 %x1 to i64 + %mul = mul i64 %zext, %sext + ret i64 %mul +} + +define i64 @smaddl_ldrsb_h(i8* %x0, i16 %x1, i64 %x2) { +; CHECK-LABEL: smaddl_ldrsb_h: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsb x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxth x9, w1 +; CHECK-NEXT: smaddl x0, w8, w9, x2 +; CHECK-NEXT: ret +entry: + %ext64 = load i8, i8* %x0 + %sext = sext i8 %ext64 to i64 + %sext4 = sext i16 %x1 to i64 + %mul = mul i64 %sext, %sext4 + %add = add i64 %x2, %mul + ret i64 %add +} + +define i64 @smaddl_ldrsb_h_commuted(i8* %x0, i16 %x1, i64 %x2) { +; CHECK-LABEL: smaddl_ldrsb_h_commuted: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsb x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxth x9, w1 +; CHECK-NEXT: smaddl x0, w9, w8, x2 +; CHECK-NEXT: ret +entry: + %ext64 = load i8, i8* %x0 + %sext = sext i8 %ext64 to i64 + %sext4 = sext i16 %x1 to i64 + %mul = mul i64 %sext4, %sext + %add = add i64 %x2, %mul + ret i64 %add +} + +define i64 @smaddl_ldrsh_w(i16* %x0, i32 %x1, i64 %x2) { +; CHECK-LABEL: smaddl_ldrsh_w: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsh x8, [x0] +; CHECK-NEXT: smaddl x0, w8, w1, x2 +; CHECK-NEXT: ret +entry: + %ext64 = load i16, i16* %x0 + %sext = sext i16 %ext64 to i64 + %sext4 = sext i32 %x1 to i64 + %mul = mul i64 %sext, %sext4 + %add = add i64 %x2, %mul + ret i64 %add +} + +define i64 @smaddl_ldrsh_w_commuted(i16* %x0, i32 %x1, i64 %x2) { +; CHECK-LABEL: smaddl_ldrsh_w_commuted: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsh x8, [x0] +; CHECK-NEXT: smaddl x0, w8, w1, x2 +; CHECK-NEXT: ret +entry: + %ext64 = load i16, i16* %x0 + %sext = sext i16 %ext64 to i64 + %sext4 = sext i32 %x1 to i64 + %mul = mul i64 %sext4, %sext + %add = add i64 %x2, %mul + ret i64 %add +} + +define i64 @smaddl_ldrsw_b(i32* %x0, i8 %x1, i64 %x2) { +; CHECK-LABEL: smaddl_ldrsw_b: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrsw x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxtb x9, w1 +; CHECK-NEXT: smaddl x0, w8, w9, x2 +; CHECK-NEXT: ret + %ext64 = load i32, i32* %x0 + %sext = sext i32 %ext64 to i64 + %sext2 = sext i8 %x1 to i64 + %mul = mul i64 %sext, %sext2 + %add = add i64 %x2, %mul + ret i64 %add +} + +define i64 @smaddl_ldrsw_b_commuted(i32* %x0, i8 %x1, i64 %x2) { +; CHECK-LABEL: smaddl_ldrsw_b_commuted: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrsw x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxtb x9, w1 +; CHECK-NEXT: smaddl x0, w9, w8, x2 +; CHECK-NEXT: ret + %ext64 = load i32, i32* %x0 + %sext = sext i32 %ext64 to i64 + %sext2 = sext i8 %x1 to i64 + %mul = mul i64 %sext2, %sext + %add = add i64 %x2, %mul + ret i64 %add +} + +define i64 @smaddl_ldrsw_ldrsw(i32* %x0, i32* %x1, i64 %x2) { +; CHECK-LABEL: smaddl_ldrsw_ldrsw: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsw x8, [x0] +; CHECK-NEXT: ldrsw x9, [x1] +; CHECK-NEXT: smaddl x0, w8, w9, x2 +; CHECK-NEXT: ret +entry: + %ext64 = load i32, i32* %x0 + %ext64_2 = load i32, i32* %x1 + %sext = sext i32 %ext64 to i64 + %sext2 = sext i32 %ext64_2 to i64 + %mul = mul i64 %sext, %sext2 + %add = add i64 %x2, %mul + ret i64 %add +} + +define i64 @smaddl_sext_hh(i16 %x0, i16 %x1, i64 %x2) { +; CHECK-LABEL: smaddl_sext_hh: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sxth x8, w0 +; CHECK-NEXT: sxth x9, w1 +; CHECK-NEXT: smaddl x0, w8, w9, x2 +; CHECK-NEXT: ret +entry: + %sext = sext i16 %x0 to i64 + %sext2 = sext i16 %x1 to i64 + %mul = mul i64 %sext, %sext2 + %add = add i64 %x2, %mul + ret i64 %add +} + +define i64 @smaddl_ldrsw_shift(i32* %x0, i64 %x1, i64 %x2) { +; CHECK-LABEL: smaddl_ldrsw_shift: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsw x8, [x0] +; CHECK-NEXT: sxtw x9, w1 +; CHECK-NEXT: smaddl x0, w8, w9, x2 +; CHECK-NEXT: ret +entry: + %ext64 = load i32, i32* %x0 + %sext = sext i32 %ext64 to i64 + %shl = shl i64 %x1, 32 + %shr = ashr exact i64 %shl, 32 + %mul = mul i64 %sext, %shr + %add = add i64 %x2, %mul + ret i64 %add +} + +define i64 @smaddl_ldrsw_zextb(i32* %x0, i8 %x1, i64 %x2) { +; CHECK-LABEL: smaddl_ldrsw_zextb: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsw x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: and x9, x1, #0xff +; CHECK-NEXT: smaddl x0, w8, w9, x2 +; CHECK-NEXT: ret +entry: + %ext64 = load i32, i32* %x0 + %sext = sext i32 %ext64 to i64 + %zext = zext i8 %x1 to i64 + %mul = mul i64 %sext, %zext + %add = add i64 %x2, %mul + ret i64 %add +} + +define i64 @smnegl_ldrsb_h(i8* %x0, i16 %x1) { +; CHECK-LABEL: smnegl_ldrsb_h: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsb x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxth x9, w1 +; CHECK-NEXT: smnegl x0, w8, w9 +; CHECK-NEXT: ret +entry: + %ext64 = load i8, i8* %x0 + %sext = sext i8 %ext64 to i64 + %sext4 = sext i16 %x1 to i64 + %mul = mul i64 %sext, %sext4 + %sub = sub i64 0, %mul + ret i64 %sub +} + +define i64 @smnegl_ldrsb_h_commuted(i8* %x0, i16 %x1) { +; CHECK-LABEL: smnegl_ldrsb_h_commuted: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsb x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxth x9, w1 +; CHECK-NEXT: smnegl x0, w9, w8 +; CHECK-NEXT: ret +entry: + %ext64 = load i8, i8* %x0 + %sext = sext i8 %ext64 to i64 + %sext4 = sext i16 %x1 to i64 + %mul = mul i64 %sext4, %sext + %sub = sub i64 0, %mul + ret i64 %sub +} + +define i64 @smnegl_ldrsh_w(i16* %x0, i32 %x1) { +; CHECK-LABEL: smnegl_ldrsh_w: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsh x8, [x0] +; CHECK-NEXT: smnegl x0, w8, w1 +; CHECK-NEXT: ret +entry: + %ext64 = load i16, i16* %x0 + %sext = sext i16 %ext64 to i64 + %sext4 = sext i32 %x1 to i64 + %mul = mul i64 %sext, %sext4 + %sub = sub i64 0, %mul + ret i64 %sub +} + +define i64 @smnegl_ldrsh_w_commuted(i16* %x0, i32 %x1) { +; CHECK-LABEL: smnegl_ldrsh_w_commuted: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsh x8, [x0] +; CHECK-NEXT: smnegl x0, w8, w1 +; CHECK-NEXT: ret +entry: + %ext64 = load i16, i16* %x0 + %sext = sext i16 %ext64 to i64 + %sext4 = sext i32 %x1 to i64 + %mul = mul i64 %sext4, %sext + %sub = sub i64 0, %mul + ret i64 %sub +} + +define i64 @smnegl_ldrsw_b(i32* %x0, i8 %x1) { +; CHECK-LABEL: smnegl_ldrsw_b: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrsw x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxtb x9, w1 +; CHECK-NEXT: smnegl x0, w8, w9 +; CHECK-NEXT: ret + %ext64 = load i32, i32* %x0 + %sext = sext i32 %ext64 to i64 + %sext2 = sext i8 %x1 to i64 + %mul = mul i64 %sext, %sext2 + %sub = sub i64 0, %mul + ret i64 %sub +} + +define i64 @smnegl_ldrsw_b_commuted(i32* %x0, i8 %x1) { +; CHECK-LABEL: smnegl_ldrsw_b_commuted: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrsw x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxtb x9, w1 +; CHECK-NEXT: smnegl x0, w9, w8 +; CHECK-NEXT: ret + %ext64 = load i32, i32* %x0 + %sext = sext i32 %ext64 to i64 + %sext2 = sext i8 %x1 to i64 + %mul = mul i64 %sext2, %sext + %sub = sub i64 0, %mul + ret i64 %sub +} + +define i64 @smnegl_ldrsw_ldrsw(i32* %x0, i32* %x1) { +; CHECK-LABEL: smnegl_ldrsw_ldrsw: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsw x8, [x0] +; CHECK-NEXT: ldrsw x9, [x1] +; CHECK-NEXT: smnegl x0, w8, w9 +; CHECK-NEXT: ret +entry: + %ext64 = load i32, i32* %x0 + %ext64_2 = load i32, i32* %x1 + %sext = sext i32 %ext64 to i64 + %sext2 = sext i32 %ext64_2 to i64 + %mul = mul i64 %sext, %sext2 + %sub = sub i64 0, %mul + ret i64 %sub +} + +define i64 @smnegl_sext_hh(i16 %x0, i16 %x1) { +; CHECK-LABEL: smnegl_sext_hh: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sxth x8, w0 +; CHECK-NEXT: sxth x9, w1 +; CHECK-NEXT: smnegl x0, w8, w9 +; CHECK-NEXT: ret +entry: + %sext = sext i16 %x0 to i64 + %sext2 = sext i16 %x1 to i64 + %mul = mul i64 %sext, %sext2 + %sub = sub i64 0, %mul + ret i64 %sub +} + +define i64 @smnegl_ldrsw_shift(i32* %x0, i64 %x1) { +; CHECK-LABEL: smnegl_ldrsw_shift: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsw x8, [x0] +; CHECK-NEXT: sxtw x9, w1 +; CHECK-NEXT: smnegl x0, w8, w9 +; CHECK-NEXT: ret +entry: + %ext64 = load i32, i32* %x0 + %sext = sext i32 %ext64 to i64 + %shl = shl i64 %x1, 32 + %shr = ashr exact i64 %shl, 32 + %mul = mul i64 %sext, %shr + %sub = sub i64 0, %mul + ret i64 %sub +} + +define i64 @smnegl_ldrsw_zextb(i32* %x0, i8 %x1) { +; CHECK-LABEL: smnegl_ldrsw_zextb: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsw x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: and x9, x1, #0xff +; CHECK-NEXT: smnegl x0, w8, w9 +; CHECK-NEXT: ret +entry: + %ext64 = load i32, i32* %x0 + %sext = sext i32 %ext64 to i64 + %zext = zext i8 %x1 to i64 + %mul = mul i64 %sext, %zext + %sub = sub i64 0, %mul + ret i64 %sub +} + +define i64 @smsubl_ldrsb_h(i8* %x0, i16 %x1, i64 %x2) { +; CHECK-LABEL: smsubl_ldrsb_h: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsb x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxth x9, w1 +; CHECK-NEXT: smsubl x0, w8, w9, x2 +; CHECK-NEXT: ret +entry: + %ext64 = load i8, i8* %x0 + %sext = sext i8 %ext64 to i64 + %sext4 = sext i16 %x1 to i64 + %mul = mul i64 %sext, %sext4 + %sub = sub i64 %x2, %mul + ret i64 %sub +} + +define i64 @smsubl_ldrsb_h_commuted(i8* %x0, i16 %x1, i64 %x2) { +; CHECK-LABEL: smsubl_ldrsb_h_commuted: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsb x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxth x9, w1 +; CHECK-NEXT: smsubl x0, w9, w8, x2 +; CHECK-NEXT: ret +entry: + %ext64 = load i8, i8* %x0 + %sext = sext i8 %ext64 to i64 + %sext4 = sext i16 %x1 to i64 + %mul = mul i64 %sext4, %sext + %sub = sub i64 %x2, %mul + ret i64 %sub +} + +define i64 @smsubl_ldrsh_w(i16* %x0, i32 %x1, i64 %x2) { +; CHECK-LABEL: smsubl_ldrsh_w: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsh x8, [x0] +; CHECK-NEXT: smsubl x0, w8, w1, x2 +; CHECK-NEXT: ret +entry: + %ext64 = load i16, i16* %x0 + %sext = sext i16 %ext64 to i64 + %sext4 = sext i32 %x1 to i64 + %mul = mul i64 %sext, %sext4 + %sub = sub i64 %x2, %mul + ret i64 %sub +} + +define i64 @smsubl_ldrsh_w_commuted(i16* %x0, i32 %x1, i64 %x2) { +; CHECK-LABEL: smsubl_ldrsh_w_commuted: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsh x8, [x0] +; CHECK-NEXT: smsubl x0, w8, w1, x2 +; CHECK-NEXT: ret +entry: + %ext64 = load i16, i16* %x0 + %sext = sext i16 %ext64 to i64 + %sext4 = sext i32 %x1 to i64 + %mul = mul i64 %sext4, %sext + %sub = sub i64 %x2, %mul + ret i64 %sub +} + +define i64 @smsubl_ldrsw_b(i32* %x0, i8 %x1, i64 %x2) { +; CHECK-LABEL: smsubl_ldrsw_b: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrsw x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxtb x9, w1 +; CHECK-NEXT: smsubl x0, w8, w9, x2 +; CHECK-NEXT: ret + %ext64 = load i32, i32* %x0 + %sext = sext i32 %ext64 to i64 + %sext2 = sext i8 %x1 to i64 + %mul = mul i64 %sext, %sext2 + %sub = sub i64 %x2, %mul + ret i64 %sub +} + +define i64 @smsubl_ldrsw_b_commuted(i32* %x0, i8 %x1, i64 %x2) { +; CHECK-LABEL: smsubl_ldrsw_b_commuted: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrsw x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxtb x9, w1 +; CHECK-NEXT: smsubl x0, w9, w8, x2 +; CHECK-NEXT: ret + %ext64 = load i32, i32* %x0 + %sext = sext i32 %ext64 to i64 + %sext2 = sext i8 %x1 to i64 + %mul = mul i64 %sext2, %sext + %sub = sub i64 %x2, %mul + ret i64 %sub +} + +define i64 @smsubl_ldrsw_ldrsw(i32* %x0, i32* %x1, i64 %x2) { +; CHECK-LABEL: smsubl_ldrsw_ldrsw: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsw x8, [x0] +; CHECK-NEXT: ldrsw x9, [x1] +; CHECK-NEXT: smsubl x0, w8, w9, x2 +; CHECK-NEXT: ret +entry: + %ext64 = load i32, i32* %x0 + %ext64_2 = load i32, i32* %x1 + %sext = sext i32 %ext64 to i64 + %sext2 = sext i32 %ext64_2 to i64 + %mul = mul i64 %sext, %sext2 + %sub = sub i64 %x2, %mul + ret i64 %sub +} + +define i64 @smsubl_sext_hh(i16 %x0, i16 %x1, i64 %x2) { +; CHECK-LABEL: smsubl_sext_hh: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sxth x8, w0 +; CHECK-NEXT: sxth x9, w1 +; CHECK-NEXT: smsubl x0, w8, w9, x2 +; CHECK-NEXT: ret +entry: + %sext = sext i16 %x0 to i64 + %sext2 = sext i16 %x1 to i64 + %mul = mul i64 %sext, %sext2 + %sub = sub i64 %x2, %mul + ret i64 %sub +} + +define i64 @smsubl_ldrsw_shift(i32* %x0, i64 %x1, i64 %x2) { +; CHECK-LABEL: smsubl_ldrsw_shift: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsw x8, [x0] +; CHECK-NEXT: sxtw x9, w1 +; CHECK-NEXT: smsubl x0, w8, w9, x2 +; CHECK-NEXT: ret +entry: + %ext64 = load i32, i32* %x0 + %sext = sext i32 %ext64 to i64 + %shl = shl i64 %x1, 32 + %shr = ashr exact i64 %shl, 32 + %mul = mul i64 %sext, %shr + %sub = sub i64 %x2, %mul + ret i64 %sub +} + +define i64 @smsubl_ldrsw_zextb(i32* %x0, i8 %x1, i64 %x2) { +; CHECK-LABEL: smsubl_ldrsw_zextb: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrsw x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: and x9, x1, #0xff +; CHECK-NEXT: smsubl x0, w8, w9, x2 +; CHECK-NEXT: ret +entry: + %ext64 = load i32, i32* %x0 + %sext = sext i32 %ext64 to i64 + %zext = zext i8 %x1 to i64 + %mul = mul i64 %sext, %zext + %sub = sub i64 %x2, %mul + ret i64 %sub +} + +define i64 @smull_sext_ashr31(i32 %a, i64 %b) nounwind { +; CHECK-LABEL: smull_sext_ashr31: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: asr x9, x1, #31 +; CHECK-NEXT: mul x0, x8, x9 +; CHECK-NEXT: ret +entry: + %tmp1 = sext i32 %a to i64 + %c = ashr i64 %b, 31 + %tmp3 = mul i64 %tmp1, %c + ret i64 %tmp3 +} + +define i64 @smull_sext_ashr32(i32 %a, i64 %b) nounwind { +; CHECK-LABEL: smull_sext_ashr32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: asr x8, x1, #32 +; CHECK-NEXT: smull x0, w8, w0 +; CHECK-NEXT: ret +entry: + %tmp1 = sext i32 %a to i64 + %c = ashr i64 %b, 32 + %tmp3 = mul i64 %tmp1, %c + ret i64 %tmp3 +} + + +define i64 @smull_ashr31_both(i64 %a, i64 %b) nounwind { +; CHECK-LABEL: smull_ashr31_both: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: asr x8, x0, #31 +; CHECK-NEXT: asr x9, x1, #31 +; CHECK-NEXT: mul x0, x8, x9 +; CHECK-NEXT: ret +entry: + %tmp1 = ashr i64 %a, 31 + %c = ashr i64 %b, 31 + %tmp3 = mul i64 %tmp1, %c + ret i64 %tmp3 +} + +define i64 @smull_ashr32_both(i64 %a, i64 %b) nounwind { +; CHECK-LABEL: smull_ashr32_both: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: asr x8, x0, #32 +; CHECK-NEXT: asr x9, x1, #32 +; CHECK-NEXT: smull x0, w8, w9 +; CHECK-NEXT: ret +entry: + %tmp1 = ashr i64 %a, 32 + %c = ashr i64 %b, 32 + %tmp3 = mul i64 %tmp1, %c + ret i64 %tmp3 +} diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll --- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll @@ -137,8 +137,8 @@ ; CHECK-NEXT: sshll v0.2d, v0.2s, #0 ; CHECK-NEXT: fmov x9, d0 ; CHECK-NEXT: mov x10, v0.d[1] -; CHECK-NEXT: mul x8, x8, x9 -; CHECK-NEXT: mul x9, x11, x10 +; CHECK-NEXT: smull x8, w8, w9 +; CHECK-NEXT: smull x9, w11, w10 ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov v0.d[1], x9 ; CHECK-NEXT: ret @@ -162,8 +162,8 @@ ; CHECK-NEXT: fmov x10, d0 ; CHECK-NEXT: mov x8, v1.d[1] ; CHECK-NEXT: mov x11, v0.d[1] -; CHECK-NEXT: mul x9, x10, x9 -; CHECK-NEXT: mul x8, x11, x8 +; CHECK-NEXT: smull x9, w10, w9 +; CHECK-NEXT: smull x8, w11, w8 ; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret