Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1951,6 +1951,26 @@ (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))), (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; + +def : Pat<(i64 (mul top32Zero:$Rn, top32Zero:$Rm)), + (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; +def : Pat<(i64 (mul top32Zero:$Rn, (zext GPR32:$Rm))), + (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; + +def : Pat<(i64 (add (mul top32Zero:$Rn, top32Zero:$Rm), GPR64:$Ra)), + (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; +def : Pat<(i64 (add (mul top32Zero:$Rn, (zext GPR32:$Rm)), GPR64:$Ra)), + (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; + +def : Pat<(i64 (ineg (mul top32Zero:$Rn, top32Zero:$Rm))), + (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; +def : Pat<(i64 (ineg (mul top32Zero:$Rn, (zext GPR32:$Rm)))), + (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; + +def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, top32Zero:$Rm))), + (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; +def : Pat<(i64 (sub GPR64:$Ra, (mul GPR64:$Rn, (zext GPR32:$Rm)))), + (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; } // AddedComplexity = 5 def : MulAccumWAlias<"mul", MADDWrrr>; Index: llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll =================================================================== --- llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll +++ llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll @@ -123,8 +123,8 @@ ; CHECK-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-NEXT: fmov x9, d0 ; CHECK-NEXT: mov x10, v0.d[1] -; CHECK-NEXT: mul x9, x8, x9 -; CHECK-NEXT: mul x8, x8, x10 +; CHECK-NEXT: umull x9, w8, w9 +; CHECK-NEXT: umull x8, w8, w10 ; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll =================================================================== --- llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll +++ llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll @@ -904,3 +904,452 @@ %tmp3 = mul i64 %tmp1, %c ret i64 %tmp3 } + +define i64 @umull_ldrb_h(i8* %x0, i16 %x1) { +; CHECK-LABEL: umull_ldrb_h: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrb w8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: and x9, x1, #0xffff +; CHECK-NEXT: umull x0, w8, w9 +; CHECK-NEXT: ret +entry: + %ext64 = load i8, i8* %x0 + %zext = zext i8 %ext64 to i64 + %zext4 = zext i16 %x1 to i64 + %mul = mul i64 %zext, %zext4 + ret i64 %mul +} + +define i64 @umull_ldrb_h_commuted(i8* %x0, i16 %x1) { +; CHECK-LABEL: umull_ldrb_h_commuted: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrb w8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: and x9, x1, #0xffff +; CHECK-NEXT: umull x0, w9, w8 +; CHECK-NEXT: ret +entry: + %ext64 = load i8, i8* %x0 + %zext = zext i8 %ext64 to i64 + %zext4 = zext i16 %x1 to i64 + %mul = mul i64 %zext4, %zext + ret i64 %mul +} + +define i64 @umull_ldrh_w(i16* %x0, i32 %x1) { +; CHECK-LABEL: umull_ldrh_w: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: umull x0, w8, w1 +; CHECK-NEXT: ret +entry: + %ext64 = load i16, i16* %x0 + %zext = zext i16 %ext64 to i64 + %zext4 = zext i32 %x1 to i64 + %mul = mul i64 %zext, %zext4 + ret i64 %mul +} + +define i64 @umull_ldr_b(i32* %x0, i8 %x1) { +; CHECK-LABEL: umull_ldr_b: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: and x9, x1, #0xff +; CHECK-NEXT: umull x0, w8, w9 +; CHECK-NEXT: ret +entry: + %ext64 = load i32, i32* %x0 + %zext = zext i32 %ext64 to i64 + %zext4 = zext i8 %x1 to i64 + %mul = mul i64 %zext, %zext4 + ret i64 %mul +} + +define i64 @umull_ldr2_w(i64* %x0, i32 %x1) { +; CHECK-LABEL: umull_ldr2_w: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: umull x0, w8, w1 +; CHECK-NEXT: ret +entry: + %ext64 = load i64, i64* %x0 + %and = and i64 %ext64, 4294967295 + %zext4 = zext i32 %x1 to i64 + %mul = mul i64 %and, %zext4 + ret i64 %mul +} + +define i64 @umull_ldr2_ldr2(i64* %x0, i64* %x1) { +; CHECK-LABEL: umull_ldr2_ldr2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: ldr w9, [x1] +; CHECK-NEXT: umull x0, w8, w9 +; CHECK-NEXT: ret +entry: + %ext64 = load i64, i64* %x0 + %and = and i64 %ext64, 4294967295 + %ext64_2 = load i64, i64* %x1 + %and2 = and i64 %ext64_2, 4294967295 + %mul = mul i64 %and, %and2 + ret i64 %mul +} + +define i64 @umull_ldr2_d(i64* %x0, i64 %x1) { +; CHECK-LABEL: umull_ldr2_d: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: and x9, x1, #0xffffffff +; CHECK-NEXT: umull x0, w8, w9 +; CHECK-NEXT: ret +entry: + %ext64 = load i64, i64* %x0 + %and = and i64 %ext64, 4294967295 + %and2 = and i64 %x1, 4294967295 + %mul = mul i64 %and, %and2 + ret i64 %mul +} + +define i64 @umaddl_ldrb_h(i8* %x0, i16 %x1, i64 %x2) { +; CHECK-LABEL: umaddl_ldrb_h: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrb w8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: and x9, x1, #0xffff +; CHECK-NEXT: umaddl x0, w8, w9, x2 +; CHECK-NEXT: ret +entry: + %ext64 = load i8, i8* %x0 + %zext = zext i8 %ext64 to i64 + %zext4 = zext i16 %x1 to i64 + %mul = mul i64 %zext, %zext4 + %add = add i64 %mul, %x2 + ret i64 %add +} + +define i64 @umaddl_ldrb_h_commuted(i8* %x0, i16 %x1, i64 %x2) { +; CHECK-LABEL: umaddl_ldrb_h_commuted: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrb w8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: and x9, x1, #0xffff +; CHECK-NEXT: umaddl x0, w9, w8, x2 +; CHECK-NEXT: ret +entry: + %ext64 = load i8, i8* %x0 + %zext = zext i8 %ext64 to i64 + %zext4 = zext i16 %x1 to i64 + %mul = mul i64 %zext4, %zext + %add = add i64 %mul, %x2 + ret i64 %add +} + +define i64 @umaddl_ldrh_w(i16* %x0, i32 %x1, i64 %x2) { +; CHECK-LABEL: umaddl_ldrh_w: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: umaddl x0, w8, w1, x2 +; CHECK-NEXT: ret +entry: + %ext64 = load i16, i16* %x0 + %zext = zext i16 %ext64 to i64 + %zext4 = zext i32 %x1 to i64 + %mul = mul i64 %zext, %zext4 + %add = add i64 %mul, %x2 + ret i64 %add +} + +define i64 @umaddl_ldr_b(i32* %x0, i8 %x1, i64 %x2) { +; CHECK-LABEL: umaddl_ldr_b: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: and x9, x1, #0xff +; CHECK-NEXT: umaddl x0, w8, w9, x2 +; CHECK-NEXT: ret +entry: + %ext64 = load i32, i32* %x0 + %zext = zext i32 %ext64 to i64 + %zext4 = zext i8 %x1 to i64 + %mul = mul i64 %zext, %zext4 + %add = add i64 %mul, %x2 + ret i64 %add +} + +define i64 @umaddl_ldr2_w(i64* %x0, i32 %x1, i64 %x2) { +; CHECK-LABEL: umaddl_ldr2_w: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: umaddl x0, w8, w1, x2 +; CHECK-NEXT: ret +entry: + %ext64 = load i64, i64* %x0 + %and = and i64 %ext64, 4294967295 + %zext4 = zext i32 %x1 to i64 + %mul = mul i64 %and, %zext4 + %add = add i64 %mul, %x2 + ret i64 %add +} + +define i64 @umaddl_ldr2_ldr2(i64* %x0, i64* %x1, i64 %x2) { +; CHECK-LABEL: umaddl_ldr2_ldr2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: ldr w9, [x1] +; CHECK-NEXT: umaddl x0, w8, w9, x2 +; CHECK-NEXT: ret +entry: + %ext64 = load i64, i64* %x0 + %and = and i64 %ext64, 4294967295 + %ext64_2 = load i64, i64* %x1 + %and2 = and i64 %ext64_2, 4294967295 + %mul = mul i64 %and, %and2 + %add = add i64 %mul, %x2 + ret i64 %add +} + +define i64 @umaddl_ldr2_d(i64* %x0, i64 %x1, i64 %x2) { +; CHECK-LABEL: umaddl_ldr2_d: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: and x9, x1, #0xffffffff +; CHECK-NEXT: umaddl x0, w8, w9, x2 +; CHECK-NEXT: ret +entry: + %ext64 = load i64, i64* %x0 + %and = and i64 %ext64, 4294967295 + %and2 = and i64 %x1, 4294967295 + %mul = mul i64 %and, %and2 + %add = add i64 %mul, %x2 + ret i64 %add +} + +define i64 @umnegl_ldrb_h(i8* %x0, i16 %x1) { +; CHECK-LABEL: umnegl_ldrb_h: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrb w8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: and x9, x1, #0xffff +; CHECK-NEXT: umnegl x0, w8, w9 +; CHECK-NEXT: ret +entry: + %ext64 = load i8, i8* %x0 + %zext = zext i8 %ext64 to i64 + %zext4 = zext i16 %x1 to i64 + %mul = mul i64 %zext, %zext4 + %sub = sub i64 0, %mul + ret i64 %sub +} + +define i64 @umnegl_ldrb_h_commuted(i8* %x0, i16 %x1) { +; CHECK-LABEL: umnegl_ldrb_h_commuted: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrb w8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: and x9, x1, #0xffff +; CHECK-NEXT: umnegl x0, w9, w8 +; CHECK-NEXT: ret +entry: + %ext64 = load i8, i8* %x0 + %zext = zext i8 %ext64 to i64 + %zext4 = zext i16 %x1 to i64 + %mul = mul i64 %zext4, %zext + %sub = sub i64 0, %mul + ret i64 %sub +} + +define i64 @umnegl_ldrh_w(i16* %x0, i32 %x1) { +; CHECK-LABEL: umnegl_ldrh_w: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: umnegl x0, w8, w1 +; CHECK-NEXT: ret +entry: + %ext64 = load i16, i16* %x0 + %zext = zext i16 %ext64 to i64 + %zext4 = zext i32 %x1 to i64 + %mul = mul i64 %zext, %zext4 + %sub = sub i64 0, %mul + ret i64 %sub +} + +define i64 @umnegl_ldr_b(i32* %x0, i8 %x1) { +; CHECK-LABEL: umnegl_ldr_b: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: and x9, x1, #0xff +; CHECK-NEXT: umnegl x0, w8, w9 +; CHECK-NEXT: ret +entry: + %ext64 = load i32, i32* %x0 + %zext = zext i32 %ext64 to i64 + %zext4 = zext i8 %x1 to i64 + %mul = mul i64 %zext, %zext4 + %sub = sub i64 0, %mul + ret i64 %sub +} + +define i64 @umnegl_ldr2_w(i64* %x0, i32 %x1) { +; CHECK-LABEL: umnegl_ldr2_w: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: umnegl x0, w8, w1 +; CHECK-NEXT: ret +entry: + %ext64 = load i64, i64* %x0 + %and = and i64 %ext64, 4294967295 + %zext4 = zext i32 %x1 to i64 + %mul = mul i64 %and, %zext4 + %sub = sub i64 0, %mul + ret i64 %sub +} + +define i64 @umnegl_ldr2_ldr2(i64* %x0, i64* %x1) { +; CHECK-LABEL: umnegl_ldr2_ldr2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: ldr w9, [x1] +; CHECK-NEXT: umnegl x0, w8, w9 +; CHECK-NEXT: ret +entry: + %ext64 = load i64, i64* %x0 + %and = and i64 %ext64, 4294967295 + %ext64_2 = load i64, i64* %x1 + %and2 = and i64 %ext64_2, 4294967295 + %mul = mul i64 %and, %and2 + %sub = sub i64 0, %mul + ret i64 %sub +} + +define i64 @umnegl_ldr2_d(i64* %x0, i64 %x1) { +; CHECK-LABEL: umnegl_ldr2_d: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: and x9, x1, #0xffffffff +; CHECK-NEXT: umnegl x0, w8, w9 +; CHECK-NEXT: ret +entry: + %ext64 = load i64, i64* %x0 + %and = and i64 %ext64, 4294967295 + %and2 = and i64 %x1, 4294967295 + %mul = mul i64 %and, %and2 + %sub = sub i64 0, %mul + ret i64 %sub +} + +define i64 @umsubl_ldrb_h(i8* %x0, i16 %x1, i64 %x2) { +; CHECK-LABEL: umsubl_ldrb_h: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrb w8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: and x9, x1, #0xffff +; CHECK-NEXT: umsubl x0, w8, w9, x2 +; CHECK-NEXT: ret +entry: + %ext64 = load i8, i8* %x0 + %zext = zext i8 %ext64 to i64 + %zext4 = zext i16 %x1 to i64 + %mul = mul i64 %zext, %zext4 + %sub = sub i64 %x2, %mul + ret i64 %sub +} + +define i64 @umsubl_ldrb_h_commuted(i8* %x0, i16 %x1, i64 %x2) { +; CHECK-LABEL: umsubl_ldrb_h_commuted: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrb w8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: and x9, x1, #0xffff +; CHECK-NEXT: umsubl x0, w9, w8, x2 +; CHECK-NEXT: ret +entry: + %ext64 = load i8, i8* %x0 + %zext = zext i8 %ext64 to i64 + %zext4 = zext i16 %x1 to i64 + %mul = mul i64 %zext4, %zext + %sub = sub i64 %x2, %mul + ret i64 %sub +} + +define i64 @umsubl_ldrh_w(i16* %x0, i32 %x1, i64 %x2) { +; CHECK-LABEL: umsubl_ldrh_w: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: umsubl x0, w8, w1, x2 +; CHECK-NEXT: ret +entry: + %ext64 = load i16, i16* %x0 + %zext = zext i16 %ext64 to i64 + %zext4 = zext i32 %x1 to i64 + %mul = mul i64 %zext, %zext4 + %sub = sub i64 %x2, %mul + ret i64 %sub +} + +define i64 @umsubl_ldr_b(i32* %x0, i8 %x1, i64 %x2) { +; CHECK-LABEL: umsubl_ldr_b: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: and x9, x1, #0xff +; CHECK-NEXT: umsubl x0, w8, w9, x2 +; CHECK-NEXT: ret +entry: + %ext64 = load i32, i32* %x0 + %zext = zext i32 %ext64 to i64 + %zext4 = zext i8 %x1 to i64 + %mul = mul i64 %zext, %zext4 + %sub = sub i64 %x2, %mul + ret i64 %sub +} + +define i64 @umsubl_ldr2_w(i64* %x0, i32 %x1, i64 %x2) { +; CHECK-LABEL: umsubl_ldr2_w: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: umsubl x0, w8, w1, x2 +; CHECK-NEXT: ret +entry: + %ext64 = load i64, i64* %x0 + %and = and i64 %ext64, 4294967295 + %zext4 = zext i32 %x1 to i64 + %mul = mul i64 %and, %zext4 + %sub = sub i64 %x2, %mul + ret i64 %sub +} + +define i64 @umsubl_ldr2_ldr2(i64* %x0, i64* %x1, i64 %x2) { +; CHECK-LABEL: umsubl_ldr2_ldr2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: ldr w9, [x1] +; CHECK-NEXT: umsubl x0, w8, w9, x2 +; CHECK-NEXT: ret +entry: + %ext64 = load i64, i64* %x0 + %and = and i64 %ext64, 4294967295 + %ext64_2 = load i64, i64* %x1 + %and2 = and i64 %ext64_2, 4294967295 + %mul = mul i64 %and, %and2 + %sub = sub i64 %x2, %mul + ret i64 %sub +} + +define i64 @umsubl_ldr2_d(i64* %x0, i64 %x1, i64 %x2) { +; CHECK-LABEL: umsubl_ldr2_d: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: and x9, x1, #0xffffffff +; CHECK-NEXT: umsubl x0, w8, w9, x2 +; CHECK-NEXT: ret +entry: + %ext64 = load i64, i64* %x0 + %and = and i64 %ext64, 4294967295 + %and2 = and i64 %x1, 4294967295 + %mul = mul i64 %and, %and2 + %sub = sub i64 %x2, %mul + ret i64 %sub +} Index: llvm/test/CodeGen/AArch64/addcarry-crash.ll =================================================================== --- llvm/test/CodeGen/AArch64/addcarry-crash.ll +++ llvm/test/CodeGen/AArch64/addcarry-crash.ll @@ -8,7 +8,7 @@ ; CHECK-NEXT: ldr w8, [x0, #4] ; CHECK-NEXT: lsr x9, x1, #32 ; CHECK-NEXT: cmn x3, x2 -; CHECK-NEXT: mul x8, x8, x9 +; CHECK-NEXT: umull x8, w8, w9 ; CHECK-NEXT: cinc x0, x8, hs ; CHECK-NEXT: ret entry: