diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -920,6 +920,10 @@ def vnot : PatFrag<(ops node:$in), (xor node:$in, immAllOnesV)>; def ineg : PatFrag<(ops node:$in), (sub 0, node:$in)>; +def zanyext : PatFrags<(ops node:$op), + [(zext node:$op), + (anyext node:$op)]>; + // null_frag - The null pattern operator is used in multiclass instantiations // which accept an SDPatternOperator for use in matching patterns for internal // definitions. When expanding a pattern, if the null fragment is referenced diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4765,18 +4765,18 @@ defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal", AArch64uabd>; defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl", - BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>; + BinOpFrag<(add (zanyext node:$LHS), (zanyext node:$RHS))>>; defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw", - BinOpFrag<(add node:$LHS, (zext node:$RHS))>>; + BinOpFrag<(add node:$LHS, (zanyext node:$RHS))>>; defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal", TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl", TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", int_aarch64_neon_umull>; defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl", - BinOpFrag<(sub (zext node:$LHS), (zext node:$RHS))>>; + BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>; defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw", - BinOpFrag<(sub node:$LHS, (zext node:$RHS))>>; + BinOpFrag<(sub node:$LHS, (zanyext node:$RHS))>>; // Additional patterns for SMULL and UMULL multiclass Neon_mul_widen_patterns @test_vaddl_a8(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: test_vaddl_a8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: add v0.8h, v0.8h, v1.8h +; CHECK-NEXT: uaddl v0.8h, v0.8b, v1.8b ; CHECK-NEXT: bic v0.8h, #255, lsl #8 ; CHECK-NEXT: ret entry: @@ -119,9 +117,7 @@ define <4 x i32> @test_vaddl_a16(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: test_vaddl_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: uaddl v0.4s, v0.4h, v1.4h ; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -136,9 +132,7 @@ define <2 x i64> @test_vaddl_a32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vaddl_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-NEXT: ushll v1.2d, v1.2s, #0 -; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: uaddl v0.2d, v0.2s, v1.2s ; CHECK-NEXT: movi v1.2d, #0x000000ffffffff ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -237,9 +231,7 @@ define <8 x i16> @test_vaddl_high_a8(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: test_vaddl_high_a8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll2 v0.8h, v0.16b, #0 -; CHECK-NEXT: ushll2 v1.8h, v1.16b, #0 -; CHECK-NEXT: add v0.8h, v0.8h, v1.8h +; CHECK-NEXT: uaddl2 v0.8h, v0.16b, v1.16b ; CHECK-NEXT: bic v0.8h, #255, lsl #8 ; CHECK-NEXT: ret entry: @@ -255,9 +247,7 @@ define <4 x i32> @test_vaddl_high_a16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test_vaddl_high_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0 -; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0 -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: uaddl2 v0.4s, v0.8h, v1.8h ; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -274,9 +264,7 @@ define <2 x i64> @test_vaddl_high_a32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test_vaddl_high_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll2 v0.2d, v0.4s, #0 -; CHECK-NEXT: ushll2 v1.2d, v1.4s, #0 -; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: uaddl2 v0.2d, v0.4s, v1.4s ; CHECK-NEXT: movi v1.2d, #0x000000ffffffff ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -359,8 +347,7 @@ define <8 x i16> @test_vaddw_a8(<8 x i16> %a, <8 x i8> %b) { ; CHECK-LABEL: test_vaddw_a8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: add v0.8h, v1.8h, v0.8h +; CHECK-NEXT: uaddw v0.8h, v0.8h, v1.8b ; CHECK-NEXT: bic v0.8h, #255, lsl #8 ; CHECK-NEXT: ret entry: @@ -373,8 +360,7 @@ define <4 x i32> @test_vaddw_a16(<4 x i32> %a, <4 x i16> %b) { ; CHECK-LABEL: test_vaddw_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: add v0.4s, v1.4s, v0.4s +; CHECK-NEXT: uaddw v0.4s, v0.4s, v1.4h ; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -388,8 +374,7 @@ define <2 x i64> @test_vaddw_a32(<2 x i64> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vaddw_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll v1.2d, v1.2s, #0 -; CHECK-NEXT: add v0.2d, v1.2d, v0.2d +; CHECK-NEXT: uaddw v0.2d, v0.2d, v1.2s ; CHECK-NEXT: movi v1.2d, #0x000000ffffffff ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -475,8 +460,7 @@ define <8 x i16> @test_vaddw_high_a8(<8 x i16> %a, <16 x i8> %b) { ; CHECK-LABEL: test_vaddw_high_a8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll2 v1.8h, v1.16b, #0 -; CHECK-NEXT: add v0.8h, v1.8h, v0.8h +; CHECK-NEXT: uaddw2 v0.8h, v0.8h, v1.16b ; CHECK-NEXT: bic v0.8h, #255, lsl #8 ; CHECK-NEXT: ret entry: @@ -490,8 +474,7 @@ define <4 x i32> @test_vaddw_high_a16(<4 x i32> %a, <8 x i16> %b) { ; CHECK-LABEL: test_vaddw_high_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0 -; CHECK-NEXT: add v0.4s, v1.4s, v0.4s +; CHECK-NEXT: uaddw2 v0.4s, v0.4s, v1.8h ; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -506,8 +489,7 @@ define <2 x i64> @test_vaddw_high_a32(<2 x i64> %a, <4 x i32> %b) { ; CHECK-LABEL: test_vaddw_high_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll2 v1.2d, v1.4s, #0 -; CHECK-NEXT: add v0.2d, v1.2d, v0.2d +; CHECK-NEXT: uaddw2 v0.2d, v0.2d, v1.4s ; CHECK-NEXT: movi v1.2d, #0x000000ffffffff ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -594,9 +576,7 @@ define <8 x i16> @test_vsubl_a8(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: test_vsubl_a8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h +; CHECK-NEXT: usubl v0.8h, v0.8b, v1.8b ; CHECK-NEXT: bic v0.8h, #255, lsl #8 ; CHECK-NEXT: ret entry: @@ -610,9 +590,7 @@ define <4 x i32> @test_vsubl_a16(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: test_vsubl_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: usubl v0.4s, v0.4h, v1.4h ; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -627,9 +605,7 @@ define <2 x i64> @test_vsubl_a32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vsubl_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-NEXT: ushll v1.2d, v1.2s, #0 -; CHECK-NEXT: sub v0.2d, v0.2d, v1.2d +; CHECK-NEXT: usubl v0.2d, v0.2s, v1.2s ; CHECK-NEXT: movi v1.2d, #0x000000ffffffff ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -728,9 +704,7 @@ define <8 x i16> @test_vsubl_high_a8(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: test_vsubl_high_a8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll2 v0.8h, v0.16b, #0 -; CHECK-NEXT: ushll2 v1.8h, v1.16b, #0 -; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h +; CHECK-NEXT: usubl2 v0.8h, v0.16b, v1.16b ; CHECK-NEXT: bic v0.8h, #255, lsl #8 ; CHECK-NEXT: ret entry: @@ -746,9 +720,7 @@ define <4 x i32> @test_vsubl_high_a16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test_vsubl_high_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0 -; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0 -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: usubl2 v0.4s, v0.8h, v1.8h ; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -765,9 +737,7 @@ define <2 x i64> @test_vsubl_high_a32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test_vsubl_high_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll2 v0.2d, v0.4s, #0 -; CHECK-NEXT: ushll2 v1.2d, v1.4s, #0 -; CHECK-NEXT: sub v0.2d, v0.2d, v1.2d +; CHECK-NEXT: usubl2 v0.2d, v0.4s, v1.4s ; CHECK-NEXT: movi v1.2d, #0x000000ffffffff ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -850,8 +820,7 @@ define <8 x i16> @test_vsubw_a8(<8 x i16> %a, <8 x i8> %b) { ; CHECK-LABEL: test_vsubw_a8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h +; CHECK-NEXT: usubw v0.8h, v0.8h, v1.8b ; CHECK-NEXT: bic v0.8h, #255, lsl #8 ; CHECK-NEXT: ret entry: @@ -864,8 +833,7 @@ define <4 x i32> @test_vsubw_a16(<4 x i32> %a, <4 x i16> %b) { ; CHECK-LABEL: test_vsubw_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: usubw v0.4s, v0.4s, v1.4h ; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -879,8 +847,7 @@ define <2 x i64> @test_vsubw_a32(<2 x i64> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vsubw_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll v1.2d, v1.2s, #0 -; CHECK-NEXT: sub v0.2d, v0.2d, v1.2d +; CHECK-NEXT: usubw v0.2d, v0.2d, v1.2s ; CHECK-NEXT: movi v1.2d, #0x000000ffffffff ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -966,8 +933,7 @@ define <8 x i16> @test_vsubw_high_a8(<8 x i16> %a, <16 x i8> %b) { ; CHECK-LABEL: test_vsubw_high_a8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll2 v1.8h, v1.16b, #0 -; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h +; CHECK-NEXT: usubw2 v0.8h, v0.8h, v1.16b ; CHECK-NEXT: bic v0.8h, #255, lsl #8 ; CHECK-NEXT: ret entry: @@ -981,8 +947,7 @@ define <4 x i32> @test_vsubw_high_a16(<4 x i32> %a, <8 x i16> %b) { ; CHECK-LABEL: test_vsubw_high_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0 -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: usubw2 v0.4s, v0.4s, v1.8h ; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -997,8 +962,7 @@ define <2 x i64> @test_vsubw_high_a32(<2 x i64> %a, <4 x i32> %b) { ; CHECK-LABEL: test_vsubw_high_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll2 v1.2d, v1.4s, #0 -; CHECK-NEXT: sub v0.2d, v0.2d, v1.2d +; CHECK-NEXT: usubw2 v0.2d, v0.2d, v1.4s ; CHECK-NEXT: movi v1.2d, #0x000000ffffffff ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/lowerMUL-newload.ll b/llvm/test/CodeGen/AArch64/lowerMUL-newload.ll --- a/llvm/test/CodeGen/AArch64/lowerMUL-newload.ll +++ b/llvm/test/CodeGen/AArch64/lowerMUL-newload.ll @@ -21,10 +21,10 @@ define <4 x i32> @mlai16_and(<4 x i16> %vec0, <4 x i16> %vec1, <4 x i16> %vec2) { ; CHECK-LABEL: mlai16_and: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll v2.4s, v2.4h, #0 -; CHECK-NEXT: umlal v2.4s, v1.4h, v0.4h -; CHECK-NEXT: movi v0.2d, #0x00ffff0000ffff -; CHECK-NEXT: and v0.16b, v2.16b, v0.16b +; CHECK-NEXT: umull v0.4s, v1.4h, v0.4h +; CHECK-NEXT: uaddw v0.4s, v0.4s, v2.4h +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %v0 = sext <4 x i16> %vec0 to <4 x i32> @@ -157,10 +157,10 @@ define <2 x i64> @mlai32_and(<2 x i32> %vec0, <2 x i32> %vec1, <2 x i32> %vec2) { ; CHECK-LABEL: mlai32_and: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll v2.2d, v2.2s, #0 -; CHECK-NEXT: umlal v2.2d, v1.2s, v0.2s -; CHECK-NEXT: movi v0.2d, #0x000000ffffffff -; CHECK-NEXT: and v0.16b, v2.16b, v0.16b +; CHECK-NEXT: umull v0.2d, v1.2s, v0.2s +; CHECK-NEXT: uaddw v0.2d, v0.2d, v2.2s +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %v0 = sext <2 x i32> %vec0 to <2 x i64>