diff --git a/llvm/include/llvm/Target/GlobalISel/Target.td b/llvm/include/llvm/Target/GlobalISel/Target.td --- a/llvm/include/llvm/Target/GlobalISel/Target.td +++ b/llvm/include/llvm/Target/GlobalISel/Target.td @@ -24,6 +24,7 @@ def s64 : LLT; def v2s32 : LLT; def v4s16 : LLT; +def v8s8 : LLT; // Defines a matcher for complex operands. This is analogous to ComplexPattern // from SelectionDAG. diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -133,6 +133,16 @@ def extract_high_v2i64 : ComplexPattern; +def gi_extract_high_v16i8 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; +def gi_extract_high_v8i16 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; +def gi_extract_high_v4i32 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + def extract_high_v8f16 : ComplexPattern; def extract_high_v4f32 : diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -7285,23 +7285,23 @@ def : Pat<(v2i64 (zext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; // Also match an extend from the upper half of a 128 bit source register. -def : Pat<(v8i16 (anyext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), +def : Pat<(v8i16 (anyext (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))), (USHLLv16i8_shift V128:$Rn, (i32 0))>; -def : Pat<(v8i16 (zext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), +def : Pat<(v8i16 (zext (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))), (USHLLv16i8_shift V128:$Rn, (i32 0))>; -def : Pat<(v8i16 (sext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), +def : Pat<(v8i16 (sext (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))), (SSHLLv16i8_shift V128:$Rn, (i32 0))>; -def : Pat<(v4i32 (anyext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), +def : Pat<(v4i32 (anyext (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))), (USHLLv8i16_shift V128:$Rn, (i32 0))>; -def : Pat<(v4i32 (zext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), +def : Pat<(v4i32 (zext (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))), (USHLLv8i16_shift V128:$Rn, (i32 0))>; -def : Pat<(v4i32 (sext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), +def : Pat<(v4i32 (sext (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))), (SSHLLv8i16_shift V128:$Rn, (i32 0))>; -def : Pat<(v2i64 (anyext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), +def : Pat<(v2i64 (anyext (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))), (USHLLv4i32_shift V128:$Rn, (i32 0))>; -def : Pat<(v2i64 (zext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), +def : Pat<(v2i64 (zext (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))), (USHLLv4i32_shift V128:$Rn, (i32 0))>; -def : Pat<(v2i64 (sext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), +def : Pat<(v2i64 (sext (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))), (SSHLLv4i32_shift V128:$Rn, (i32 0))>; // Vector shift sxtl aliases diff --git a/llvm/test/CodeGen/AArch64/aarch64-addv.ll b/llvm/test/CodeGen/AArch64/aarch64-addv.ll --- a/llvm/test/CodeGen/AArch64/aarch64-addv.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-addv.ll @@ -85,21 +85,19 @@ ; GISEL-NEXT: movi v0.2d, #0000000000000000 ; GISEL-NEXT: ushll v1.8h, v1.8b, #0 ; GISEL-NEXT: ushll v2.8h, v2.8b, #0 -; GISEL-NEXT: mov d3, v1.d[1] -; GISEL-NEXT: mov d4, v2.d[1] -; GISEL-NEXT: usubl v1.4s, v1.4h, v2.4h -; GISEL-NEXT: usubl v2.4s, v3.4h, v4.4h -; GISEL-NEXT: cmgt v3.4s, v0.4s, v1.4s -; GISEL-NEXT: neg v4.4s, v1.4s -; GISEL-NEXT: shl v3.4s, v3.4s, #31 -; GISEL-NEXT: cmgt v0.4s, v0.4s, v2.4s -; GISEL-NEXT: neg v5.4s, v2.4s -; GISEL-NEXT: sshr v3.4s, v3.4s, #31 +; GISEL-NEXT: usubl v3.4s, v1.4h, v2.4h +; GISEL-NEXT: usubl2 v1.4s, v1.8h, v2.8h +; GISEL-NEXT: cmgt v2.4s, v0.4s, v3.4s +; GISEL-NEXT: cmgt v0.4s, v0.4s, v1.4s +; GISEL-NEXT: neg v4.4s, v3.4s +; GISEL-NEXT: neg v5.4s, v1.4s +; GISEL-NEXT: shl v2.4s, v2.4s, #31 ; GISEL-NEXT: shl v0.4s, v0.4s, #31 -; GISEL-NEXT: bit v1.16b, v4.16b, v3.16b +; GISEL-NEXT: sshr v2.4s, v2.4s, #31 ; GISEL-NEXT: sshr v0.4s, v0.4s, #31 -; GISEL-NEXT: bsl v0.16b, v5.16b, v2.16b -; GISEL-NEXT: add v0.4s, v1.4s, v0.4s +; GISEL-NEXT: bsl v2.16b, v4.16b, v3.16b +; GISEL-NEXT: bsl v0.16b, v5.16b, v1.16b +; GISEL-NEXT: add v0.4s, v2.4s, v0.4s ; GISEL-NEXT: addv s0, v0.4s ; GISEL-NEXT: fmov w0, s0 ; GISEL-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll --- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll +++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll @@ -41,9 +41,9 @@ ; ; CHECK-GI-LABEL: func3: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov d1, v0[1] -; CHECK-GI-NEXT: ushll.8h v0, v0, #0 -; CHECK-GI-NEXT: ushll.8h v1, v1, #0 +; CHECK-GI-NEXT: ushll.8h v2, v0, #0 +; CHECK-GI-NEXT: ushll2.8h v1, v0, #0 +; CHECK-GI-NEXT: mov.16b v0, v2 ; CHECK-GI-NEXT: ret %r = zext <16 x i8> %v0 to <16 x i16> ret <16 x i16> %r @@ -58,9 +58,9 @@ ; ; CHECK-GI-LABEL: func4: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov d1, v0[1] -; CHECK-GI-NEXT: sshll.8h v0, v0, #0 -; CHECK-GI-NEXT: sshll.8h v1, v1, #0 +; CHECK-GI-NEXT: sshll.8h v2, v0, #0 +; CHECK-GI-NEXT: sshll2.8h v1, v0, #0 +; CHECK-GI-NEXT: mov.16b v0, v2 ; CHECK-GI-NEXT: ret %r = sext <16 x i8> %v0 to <16 x i16> ret <16 x i16> %r @@ -97,9 +97,9 @@ ; ; CHECK-GI-LABEL: afunc3: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov d1, v0[1] -; CHECK-GI-NEXT: ushll.4s v0, v0, #0 -; CHECK-GI-NEXT: ushll.4s v1, v1, #0 +; CHECK-GI-NEXT: ushll.4s v2, v0, #0 +; CHECK-GI-NEXT: ushll2.4s v1, v0, #0 +; CHECK-GI-NEXT: mov.16b v0, v2 ; CHECK-GI-NEXT: ret %r = zext <8 x i16> %v0 to <8 x i32> ret <8 x i32> %r @@ -114,9 +114,9 @@ ; ; CHECK-GI-LABEL: afunc4: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov d1, v0[1] -; CHECK-GI-NEXT: sshll.4s v0, v0, #0 -; CHECK-GI-NEXT: sshll.4s v1, v1, #0 +; CHECK-GI-NEXT: sshll.4s v2, v0, #0 +; CHECK-GI-NEXT: sshll2.4s v1, v0, #0 +; CHECK-GI-NEXT: mov.16b v0, v2 ; CHECK-GI-NEXT: ret %r = sext <8 x i16> %v0 to <8 x i32> ret <8 x i32> %r @@ -132,10 +132,9 @@ ; ; CHECK-GI-LABEL: bfunc1: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ushll.8h v0, v0, #0 -; CHECK-GI-NEXT: mov d1, v0[1] -; CHECK-GI-NEXT: ushll.4s v0, v0, #0 -; CHECK-GI-NEXT: ushll.4s v1, v1, #0 +; CHECK-GI-NEXT: ushll.8h v1, v0, #0 +; CHECK-GI-NEXT: ushll.4s v0, v1, #0 +; CHECK-GI-NEXT: ushll2.4s v1, v1, #0 ; CHECK-GI-NEXT: ret %r = zext <8 x i8> %v0 to <8 x i32> ret <8 x i32> %r @@ -151,10 +150,9 @@ ; ; CHECK-GI-LABEL: bfunc2: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshll.8h v0, v0, #0 -; CHECK-GI-NEXT: mov d1, v0[1] -; CHECK-GI-NEXT: sshll.4s v0, v0, #0 -; CHECK-GI-NEXT: sshll.4s v1, v1, #0 +; CHECK-GI-NEXT: sshll.8h v1, v0, #0 +; CHECK-GI-NEXT: sshll.4s v0, v1, #0 +; CHECK-GI-NEXT: sshll2.4s v1, v1, #0 ; CHECK-GI-NEXT: ret %r = sext <8 x i8> %v0 to <8 x i32> ret <8 x i32> %r @@ -173,9 +171,9 @@ ; ; CHECK-GI-LABEL: zfunc1: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov d1, v0[1] -; CHECK-GI-NEXT: ushll.2d v0, v0, #0 -; CHECK-GI-NEXT: ushll.2d v1, v1, #0 +; CHECK-GI-NEXT: ushll.2d v2, v0, #0 +; CHECK-GI-NEXT: ushll2.2d v1, v0, #0 +; CHECK-GI-NEXT: mov.16b v0, v2 ; CHECK-GI-NEXT: ret %r = zext <4 x i32> %v0 to <4 x i64> ret <4 x i64> %r @@ -190,9 +188,9 @@ ; ; CHECK-GI-LABEL: zfunc2: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov d1, v0[1] -; CHECK-GI-NEXT: sshll.2d v0, v0, #0 -; CHECK-GI-NEXT: sshll.2d v1, v1, #0 +; CHECK-GI-NEXT: sshll.2d v2, v0, #0 +; CHECK-GI-NEXT: sshll2.2d v1, v0, #0 +; CHECK-GI-NEXT: mov.16b v0, v2 ; CHECK-GI-NEXT: ret %r = sext <4 x i32> %v0 to <4 x i64> ret <4 x i64> %r @@ -208,10 +206,9 @@ ; ; CHECK-GI-LABEL: bfunc3: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ushll.4s v0, v0, #0 -; CHECK-GI-NEXT: mov d1, v0[1] -; CHECK-GI-NEXT: ushll.2d v0, v0, #0 -; CHECK-GI-NEXT: ushll.2d v1, v1, #0 +; CHECK-GI-NEXT: ushll.4s v1, v0, #0 +; CHECK-GI-NEXT: ushll.2d v0, v1, #0 +; CHECK-GI-NEXT: ushll2.2d v1, v1, #0 ; CHECK-GI-NEXT: ret %r = zext <4 x i16> %v0 to <4 x i64> ret <4 x i64> %r @@ -227,10 +224,9 @@ ; ; CHECK-GI-LABEL: cfunc4: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshll.4s v0, v0, #0 -; CHECK-GI-NEXT: mov d1, v0[1] -; CHECK-GI-NEXT: sshll.2d v0, v0, #0 -; CHECK-GI-NEXT: sshll.2d v1, v1, #0 +; CHECK-GI-NEXT: sshll.4s v1, v0, #0 +; CHECK-GI-NEXT: sshll.2d v0, v1, #0 +; CHECK-GI-NEXT: sshll2.2d v1, v1, #0 ; CHECK-GI-NEXT: ret %r = sext <4 x i16> %v0 to <4 x i64> ret <4 x i64> %r @@ -249,12 +245,11 @@ ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ushll.4s v0, v0, #0 ; CHECK-GI-NEXT: adrp x8, .LCPI14_0 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI14_0] -; CHECK-GI-NEXT: mov d1, v0[1] -; CHECK-GI-NEXT: ushll.2d v0, v0, #0 -; CHECK-GI-NEXT: and.16b v0, v0, v2 -; CHECK-GI-NEXT: ushll.2d v1, v1, #0 -; CHECK-GI-NEXT: and.16b v1, v1, v2 +; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI14_0] +; CHECK-GI-NEXT: ushll.2d v1, v0, #0 +; CHECK-GI-NEXT: ushll2.2d v2, v0, #0 +; CHECK-GI-NEXT: and.16b v0, v1, v3 +; CHECK-GI-NEXT: and.16b v1, v2, v3 ; CHECK-GI-NEXT: ret %r = zext <4 x i8> %v0 to <4 x i64> ret <4 x i64> %r @@ -275,13 +270,12 @@ ; CHECK-GI-LABEL: sext_v4i8_to_v4i64: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ushll.4s v0, v0, #0 -; CHECK-GI-NEXT: mov d1, v0[1] -; CHECK-GI-NEXT: ushll.2d v0, v0, #0 -; CHECK-GI-NEXT: shl.2d v0, v0, #56 -; CHECK-GI-NEXT: ushll.2d v1, v1, #0 -; CHECK-GI-NEXT: sshr.2d v0, v0, #56 +; CHECK-GI-NEXT: ushll.2d v1, v0, #0 +; CHECK-GI-NEXT: ushll2.2d v0, v0, #0 ; CHECK-GI-NEXT: shl.2d v1, v1, #56 -; CHECK-GI-NEXT: sshr.2d v1, v1, #56 +; CHECK-GI-NEXT: shl.2d v2, v0, #56 +; CHECK-GI-NEXT: sshr.2d v0, v1, #56 +; CHECK-GI-NEXT: sshr.2d v1, v2, #56 ; CHECK-GI-NEXT: ret %r = sext <4 x i8> %v0 to <4 x i64> ret <4 x i64> %r @@ -302,15 +296,12 @@ ; CHECK-GI-LABEL: zext_v8i8_to_v8i64: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ushll.8h v0, v0, #0 -; CHECK-GI-NEXT: mov d1, v0[1] -; CHECK-GI-NEXT: ushll.4s v0, v0, #0 -; CHECK-GI-NEXT: ushll.4s v2, v1, #0 -; CHECK-GI-NEXT: mov d1, v0[1] -; CHECK-GI-NEXT: ushll.2d v0, v0, #0 -; CHECK-GI-NEXT: mov d3, v2[1] -; CHECK-GI-NEXT: ushll.2d v2, v2, #0 -; CHECK-GI-NEXT: ushll.2d v1, v1, #0 -; CHECK-GI-NEXT: ushll.2d v3, v3, #0 +; CHECK-GI-NEXT: ushll.4s v1, v0, #0 +; CHECK-GI-NEXT: ushll2.4s v3, v0, #0 +; CHECK-GI-NEXT: ushll.2d v0, v1, #0 +; CHECK-GI-NEXT: ushll2.2d v1, v1, #0 +; CHECK-GI-NEXT: ushll.2d v2, v3, #0 +; CHECK-GI-NEXT: ushll2.2d v3, v3, #0 ; CHECK-GI-NEXT: ret %r = zext <8 x i8> %v0 to <8 x i64> ret <8 x i64> %r @@ -331,15 +322,12 @@ ; CHECK-GI-LABEL: sext_v8i8_to_v8i64: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sshll.8h v0, v0, #0 -; CHECK-GI-NEXT: mov d1, v0[1] -; CHECK-GI-NEXT: sshll.4s v0, v0, #0 -; CHECK-GI-NEXT: sshll.4s v2, v1, #0 -; CHECK-GI-NEXT: mov d1, v0[1] -; CHECK-GI-NEXT: sshll.2d v0, v0, #0 -; CHECK-GI-NEXT: mov d3, v2[1] -; CHECK-GI-NEXT: sshll.2d v2, v2, #0 -; CHECK-GI-NEXT: sshll.2d v1, v1, #0 -; CHECK-GI-NEXT: sshll.2d v3, v3, #0 +; CHECK-GI-NEXT: sshll.4s v1, v0, #0 +; CHECK-GI-NEXT: sshll2.4s v3, v0, #0 +; CHECK-GI-NEXT: sshll.2d v0, v1, #0 +; CHECK-GI-NEXT: sshll2.2d v1, v1, #0 +; CHECK-GI-NEXT: sshll.2d v2, v3, #0 +; CHECK-GI-NEXT: sshll2.2d v3, v3, #0 ; CHECK-GI-NEXT: ret %r = sext <8 x i8> %v0 to <8 x i64> ret <8 x i64> %r diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll --- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll @@ -287,44 +287,38 @@ ; ; CHECK-GI-LABEL: uabd16b_rdx_i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov d3, v0[1] -; CHECK-GI-NEXT: mov d4, v1[1] -; CHECK-GI-NEXT: ushll.8h v0, v0, #0 -; CHECK-GI-NEXT: ushll.8h v1, v1, #0 +; CHECK-GI-NEXT: ushll.8h v3, v0, #0 +; CHECK-GI-NEXT: ushll.8h v4, v1, #0 +; CHECK-GI-NEXT: ushll2.8h v0, v0, #0 +; CHECK-GI-NEXT: ushll2.8h v1, v1, #0 ; CHECK-GI-NEXT: movi.2d v2, #0000000000000000 -; CHECK-GI-NEXT: mov d5, v0[1] -; CHECK-GI-NEXT: ushll.8h v3, v3, #0 -; CHECK-GI-NEXT: ushll.8h v4, v4, #0 -; CHECK-GI-NEXT: mov d7, v1[1] -; CHECK-GI-NEXT: usubl.4s v0, v0, v1 -; CHECK-GI-NEXT: mov d6, v3[1] -; CHECK-GI-NEXT: mov d16, v4[1] -; CHECK-GI-NEXT: usubl.4s v3, v3, v4 -; CHECK-GI-NEXT: usubl.4s v1, v5, v7 -; CHECK-GI-NEXT: cmgt.4s v5, v2, v0 -; CHECK-GI-NEXT: usubl.4s v4, v6, v16 -; CHECK-GI-NEXT: cmgt.4s v7, v2, v3 -; CHECK-GI-NEXT: neg.4s v16, v0 -; CHECK-GI-NEXT: cmgt.4s v6, v2, v1 -; CHECK-GI-NEXT: shl.4s v5, v5, #31 -; CHECK-GI-NEXT: neg.4s v17, v1 -; CHECK-GI-NEXT: neg.4s v18, v3 -; CHECK-GI-NEXT: shl.4s v7, v7, #31 -; CHECK-GI-NEXT: cmgt.4s v2, v2, v4 +; CHECK-GI-NEXT: usubl.4s v5, v3, v4 +; CHECK-GI-NEXT: usubl2.4s v3, v3, v4 +; CHECK-GI-NEXT: usubl.4s v4, v0, v1 +; CHECK-GI-NEXT: usubl2.4s v0, v0, v1 +; CHECK-GI-NEXT: cmgt.4s v1, v2, v5 +; CHECK-GI-NEXT: cmgt.4s v6, v2, v3 +; CHECK-GI-NEXT: neg.4s v16, v5 +; CHECK-GI-NEXT: cmgt.4s v7, v2, v4 +; CHECK-GI-NEXT: cmgt.4s v2, v2, v0 +; CHECK-GI-NEXT: neg.4s v17, v3 +; CHECK-GI-NEXT: neg.4s v18, v4 +; CHECK-GI-NEXT: neg.4s v19, v0 +; CHECK-GI-NEXT: shl.4s v1, v1, #31 ; CHECK-GI-NEXT: shl.4s v6, v6, #31 -; CHECK-GI-NEXT: neg.4s v19, v4 -; CHECK-GI-NEXT: sshr.4s v5, v5, #31 -; CHECK-GI-NEXT: sshr.4s v7, v7, #31 +; CHECK-GI-NEXT: shl.4s v7, v7, #31 ; CHECK-GI-NEXT: shl.4s v2, v2, #31 +; CHECK-GI-NEXT: sshr.4s v1, v1, #31 ; CHECK-GI-NEXT: sshr.4s v6, v6, #31 -; CHECK-GI-NEXT: bit.16b v0, v16, v5 -; CHECK-GI-NEXT: bit.16b v3, v18, v7 +; CHECK-GI-NEXT: sshr.4s v7, v7, #31 ; CHECK-GI-NEXT: sshr.4s v2, v2, #31 -; CHECK-GI-NEXT: bit.16b v1, v17, v6 -; CHECK-GI-NEXT: bsl.16b v2, v19, v4 -; CHECK-GI-NEXT: add.4s v0, v0, v1 -; CHECK-GI-NEXT: add.4s v1, v3, v2 -; CHECK-GI-NEXT: add.4s v0, v0, v1 +; CHECK-GI-NEXT: bsl.16b v1, v16, v5 +; CHECK-GI-NEXT: bit.16b v3, v17, v6 +; CHECK-GI-NEXT: bit.16b v4, v18, v7 +; CHECK-GI-NEXT: bit.16b v0, v19, v2 +; CHECK-GI-NEXT: add.4s v1, v1, v3 +; CHECK-GI-NEXT: add.4s v0, v4, v0 +; CHECK-GI-NEXT: add.4s v0, v1, v0 ; CHECK-GI-NEXT: addv.4s s0, v0 ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret @@ -349,44 +343,38 @@ ; ; CHECK-GI-LABEL: sabd16b_rdx_i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov d3, v0[1] -; CHECK-GI-NEXT: mov d4, v1[1] -; CHECK-GI-NEXT: sshll.8h v0, v0, #0 -; CHECK-GI-NEXT: sshll.8h v1, v1, #0 +; CHECK-GI-NEXT: sshll.8h v3, v0, #0 +; CHECK-GI-NEXT: sshll.8h v4, v1, #0 +; CHECK-GI-NEXT: sshll2.8h v0, v0, #0 +; CHECK-GI-NEXT: sshll2.8h v1, v1, #0 ; CHECK-GI-NEXT: movi.2d v2, #0000000000000000 -; CHECK-GI-NEXT: mov d5, v0[1] -; CHECK-GI-NEXT: sshll.8h v3, v3, #0 -; CHECK-GI-NEXT: sshll.8h v4, v4, #0 -; CHECK-GI-NEXT: mov d7, v1[1] -; CHECK-GI-NEXT: ssubl.4s v0, v0, v1 -; CHECK-GI-NEXT: mov d6, v3[1] -; CHECK-GI-NEXT: mov d16, v4[1] -; CHECK-GI-NEXT: ssubl.4s v3, v3, v4 -; CHECK-GI-NEXT: ssubl.4s v1, v5, v7 -; CHECK-GI-NEXT: cmgt.4s v5, v2, v0 -; CHECK-GI-NEXT: ssubl.4s v4, v6, v16 -; CHECK-GI-NEXT: cmgt.4s v7, v2, v3 -; CHECK-GI-NEXT: neg.4s v16, v0 -; CHECK-GI-NEXT: cmgt.4s v6, v2, v1 -; CHECK-GI-NEXT: shl.4s v5, v5, #31 -; CHECK-GI-NEXT: neg.4s v17, v1 -; CHECK-GI-NEXT: neg.4s v18, v3 -; CHECK-GI-NEXT: shl.4s v7, v7, #31 -; CHECK-GI-NEXT: cmgt.4s v2, v2, v4 +; CHECK-GI-NEXT: ssubl.4s v5, v3, v4 +; CHECK-GI-NEXT: ssubl2.4s v3, v3, v4 +; CHECK-GI-NEXT: ssubl.4s v4, v0, v1 +; CHECK-GI-NEXT: ssubl2.4s v0, v0, v1 +; CHECK-GI-NEXT: cmgt.4s v1, v2, v5 +; CHECK-GI-NEXT: cmgt.4s v6, v2, v3 +; CHECK-GI-NEXT: neg.4s v16, v5 +; CHECK-GI-NEXT: cmgt.4s v7, v2, v4 +; CHECK-GI-NEXT: cmgt.4s v2, v2, v0 +; CHECK-GI-NEXT: neg.4s v17, v3 +; CHECK-GI-NEXT: neg.4s v18, v4 +; CHECK-GI-NEXT: neg.4s v19, v0 +; CHECK-GI-NEXT: shl.4s v1, v1, #31 ; CHECK-GI-NEXT: shl.4s v6, v6, #31 -; CHECK-GI-NEXT: neg.4s v19, v4 -; CHECK-GI-NEXT: sshr.4s v5, v5, #31 -; CHECK-GI-NEXT: sshr.4s v7, v7, #31 +; CHECK-GI-NEXT: shl.4s v7, v7, #31 ; CHECK-GI-NEXT: shl.4s v2, v2, #31 +; CHECK-GI-NEXT: sshr.4s v1, v1, #31 ; CHECK-GI-NEXT: sshr.4s v6, v6, #31 -; CHECK-GI-NEXT: bit.16b v0, v16, v5 -; CHECK-GI-NEXT: bit.16b v3, v18, v7 +; CHECK-GI-NEXT: sshr.4s v7, v7, #31 ; CHECK-GI-NEXT: sshr.4s v2, v2, #31 -; CHECK-GI-NEXT: bit.16b v1, v17, v6 -; CHECK-GI-NEXT: bsl.16b v2, v19, v4 -; CHECK-GI-NEXT: add.4s v0, v0, v1 -; CHECK-GI-NEXT: add.4s v1, v3, v2 -; CHECK-GI-NEXT: add.4s v0, v0, v1 +; CHECK-GI-NEXT: bsl.16b v1, v16, v5 +; CHECK-GI-NEXT: bit.16b v3, v17, v6 +; CHECK-GI-NEXT: bit.16b v4, v18, v7 +; CHECK-GI-NEXT: bit.16b v0, v19, v2 +; CHECK-GI-NEXT: add.4s v1, v1, v3 +; CHECK-GI-NEXT: add.4s v0, v4, v0 +; CHECK-GI-NEXT: add.4s v0, v1, v0 ; CHECK-GI-NEXT: addv.4s s0, v0 ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret @@ -419,21 +407,19 @@ ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: ldr q2, [x1] ; CHECK-GI-NEXT: movi.2d v0, #0000000000000000 -; CHECK-GI-NEXT: mov d3, v1[1] -; CHECK-GI-NEXT: mov d4, v2[1] -; CHECK-GI-NEXT: usubl.4s v1, v1, v2 -; CHECK-GI-NEXT: usubl.4s v2, v3, v4 -; CHECK-GI-NEXT: cmgt.4s v3, v0, v1 -; CHECK-GI-NEXT: neg.4s v4, v1 -; CHECK-GI-NEXT: shl.4s v3, v3, #31 -; CHECK-GI-NEXT: cmgt.4s v0, v0, v2 -; CHECK-GI-NEXT: neg.4s v5, v2 -; CHECK-GI-NEXT: sshr.4s v3, v3, #31 +; CHECK-GI-NEXT: usubl.4s v3, v1, v2 +; CHECK-GI-NEXT: usubl2.4s v1, v1, v2 +; CHECK-GI-NEXT: cmgt.4s v2, v0, v3 +; CHECK-GI-NEXT: cmgt.4s v0, v0, v1 +; CHECK-GI-NEXT: neg.4s v4, v3 +; CHECK-GI-NEXT: neg.4s v5, v1 +; CHECK-GI-NEXT: shl.4s v2, v2, #31 ; CHECK-GI-NEXT: shl.4s v0, v0, #31 -; CHECK-GI-NEXT: bit.16b v1, v4, v3 +; CHECK-GI-NEXT: sshr.4s v2, v2, #31 ; CHECK-GI-NEXT: sshr.4s v0, v0, #31 -; CHECK-GI-NEXT: bsl.16b v0, v5, v2 -; CHECK-GI-NEXT: add.4s v0, v1, v0 +; CHECK-GI-NEXT: bsl.16b v2, v4, v3 +; CHECK-GI-NEXT: bsl.16b v0, v5, v1 +; CHECK-GI-NEXT: add.4s v0, v2, v0 ; CHECK-GI-NEXT: addv.4s s0, v0 ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret @@ -459,22 +445,20 @@ ; ; CHECK-GI-LABEL: sabd8h_rdx: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov d3, v0[1] -; CHECK-GI-NEXT: mov d4, v1[1] ; CHECK-GI-NEXT: movi.2d v2, #0000000000000000 -; CHECK-GI-NEXT: ssubl.4s v0, v0, v1 -; CHECK-GI-NEXT: ssubl.4s v1, v3, v4 -; CHECK-GI-NEXT: cmgt.4s v3, v2, v0 -; CHECK-GI-NEXT: neg.4s v4, v0 -; CHECK-GI-NEXT: cmgt.4s v2, v2, v1 -; CHECK-GI-NEXT: shl.4s v3, v3, #31 -; CHECK-GI-NEXT: neg.4s v5, v1 +; CHECK-GI-NEXT: ssubl.4s v3, v0, v1 +; CHECK-GI-NEXT: ssubl2.4s v0, v0, v1 +; CHECK-GI-NEXT: cmgt.4s v1, v2, v3 +; CHECK-GI-NEXT: cmgt.4s v2, v2, v0 +; CHECK-GI-NEXT: neg.4s v4, v3 +; CHECK-GI-NEXT: neg.4s v5, v0 +; CHECK-GI-NEXT: shl.4s v1, v1, #31 ; CHECK-GI-NEXT: shl.4s v2, v2, #31 -; CHECK-GI-NEXT: sshr.4s v3, v3, #31 +; CHECK-GI-NEXT: sshr.4s v1, v1, #31 ; CHECK-GI-NEXT: sshr.4s v2, v2, #31 -; CHECK-GI-NEXT: bit.16b v0, v4, v3 -; CHECK-GI-NEXT: bit.16b v1, v5, v2 -; CHECK-GI-NEXT: add.4s v0, v0, v1 +; CHECK-GI-NEXT: bsl.16b v1, v4, v3 +; CHECK-GI-NEXT: bit.16b v0, v5, v2 +; CHECK-GI-NEXT: add.4s v0, v1, v0 ; CHECK-GI-NEXT: addv.4s s0, v0 ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sext.ll b/llvm/test/CodeGen/AArch64/sext.ll --- a/llvm/test/CodeGen/AArch64/sext.ll +++ b/llvm/test/CodeGen/AArch64/sext.ll @@ -474,13 +474,12 @@ ; CHECK-GI-LABEL: sext_v4i8_v4i64: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-GI-NEXT: shl v0.2d, v0.2d, #56 -; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0 -; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #56 +; CHECK-GI-NEXT: ushll v1.2d, v0.2s, #0 +; CHECK-GI-NEXT: ushll2 v0.2d, v0.4s, #0 ; CHECK-GI-NEXT: shl v1.2d, v1.2d, #56 -; CHECK-GI-NEXT: sshr v1.2d, v1.2d, #56 +; CHECK-GI-NEXT: shl v2.2d, v0.2d, #56 +; CHECK-GI-NEXT: sshr v0.2d, v1.2d, #56 +; CHECK-GI-NEXT: sshr v1.2d, v2.2d, #56 ; CHECK-GI-NEXT: ret entry: %c = sext <4 x i8> %a to <4 x i64> @@ -507,10 +506,9 @@ ; ; CHECK-GI-LABEL: sext_v4i16_v4i64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0 -; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0 +; CHECK-GI-NEXT: sshll v1.4s, v0.4h, #0 +; CHECK-GI-NEXT: sshll v0.2d, v1.2s, #0 +; CHECK-GI-NEXT: sshll2 v1.2d, v1.4s, #0 ; CHECK-GI-NEXT: ret entry: %c = sext <4 x i16> %a to <4 x i64> @@ -526,9 +524,9 @@ ; ; CHECK-GI-LABEL: sext_v4i32_v4i64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0 -; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0 +; CHECK-GI-NEXT: sshll v2.2d, v0.2s, #0 +; CHECK-GI-NEXT: sshll2 v1.2d, v0.4s, #0 +; CHECK-GI-NEXT: mov v0.16b, v2.16b ; CHECK-GI-NEXT: ret entry: %c = sext <4 x i32> %a to <4 x i64> @@ -573,13 +571,12 @@ ; CHECK-GI-LABEL: sext_v4i10_v4i64: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-GI-NEXT: shl v0.2d, v0.2d, #54 -; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0 -; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #54 +; CHECK-GI-NEXT: ushll v1.2d, v0.2s, #0 +; CHECK-GI-NEXT: ushll2 v0.2d, v0.4s, #0 ; CHECK-GI-NEXT: shl v1.2d, v1.2d, #54 -; CHECK-GI-NEXT: sshr v1.2d, v1.2d, #54 +; CHECK-GI-NEXT: shl v2.2d, v0.2d, #54 +; CHECK-GI-NEXT: sshr v0.2d, v1.2d, #54 +; CHECK-GI-NEXT: sshr v1.2d, v2.2d, #54 ; CHECK-GI-NEXT: ret entry: %c = sext <4 x i10> %a to <4 x i64> @@ -606,10 +603,9 @@ ; ; CHECK-GI-LABEL: sext_v8i8_v8i32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0 +; CHECK-GI-NEXT: sshll v1.8h, v0.8b, #0 +; CHECK-GI-NEXT: sshll v0.4s, v1.4h, #0 +; CHECK-GI-NEXT: sshll2 v1.4s, v1.8h, #0 ; CHECK-GI-NEXT: ret entry: %c = sext <8 x i8> %a to <8 x i32> @@ -631,15 +627,12 @@ ; CHECK-GI-LABEL: sext_v8i8_v8i64: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: sshll v2.4s, v1.4h, #0 -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0 -; CHECK-GI-NEXT: mov d3, v2.d[1] -; CHECK-GI-NEXT: sshll v2.2d, v2.2s, #0 -; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0 -; CHECK-GI-NEXT: sshll v3.2d, v3.2s, #0 +; CHECK-GI-NEXT: sshll v1.4s, v0.4h, #0 +; CHECK-GI-NEXT: sshll2 v3.4s, v0.8h, #0 +; CHECK-GI-NEXT: sshll v0.2d, v1.2s, #0 +; CHECK-GI-NEXT: sshll2 v1.2d, v1.4s, #0 +; CHECK-GI-NEXT: sshll v2.2d, v3.2s, #0 +; CHECK-GI-NEXT: sshll2 v3.2d, v3.4s, #0 ; CHECK-GI-NEXT: ret entry: %c = sext <8 x i8> %a to <8 x i64> @@ -655,9 +648,9 @@ ; ; CHECK-GI-LABEL: sext_v8i16_v8i32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0 +; CHECK-GI-NEXT: sshll v2.4s, v0.4h, #0 +; CHECK-GI-NEXT: sshll2 v1.4s, v0.8h, #0 +; CHECK-GI-NEXT: mov v0.16b, v2.16b ; CHECK-GI-NEXT: ret entry: %c = sext <8 x i16> %a to <8 x i32> @@ -677,15 +670,12 @@ ; ; CHECK-GI-LABEL: sext_v8i16_v8i64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: sshll v2.4s, v1.4h, #0 -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0 -; CHECK-GI-NEXT: mov d3, v2.d[1] -; CHECK-GI-NEXT: sshll v2.2d, v2.2s, #0 -; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0 -; CHECK-GI-NEXT: sshll v3.2d, v3.2s, #0 +; CHECK-GI-NEXT: sshll v1.4s, v0.4h, #0 +; CHECK-GI-NEXT: sshll2 v3.4s, v0.8h, #0 +; CHECK-GI-NEXT: sshll v0.2d, v1.2s, #0 +; CHECK-GI-NEXT: sshll2 v1.2d, v1.4s, #0 +; CHECK-GI-NEXT: sshll v2.2d, v3.2s, #0 +; CHECK-GI-NEXT: sshll2 v3.2d, v3.4s, #0 ; CHECK-GI-NEXT: ret entry: %c = sext <8 x i16> %a to <8 x i64> @@ -705,13 +695,12 @@ ; ; CHECK-GI-LABEL: sext_v8i32_v8i64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d2, v0.d[1] -; CHECK-GI-NEXT: mov d3, v1.d[1] -; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0 -; CHECK-GI-NEXT: sshll v4.2d, v2.2s, #0 +; CHECK-GI-NEXT: sshll v4.2d, v0.2s, #0 +; CHECK-GI-NEXT: sshll2 v5.2d, v0.4s, #0 ; CHECK-GI-NEXT: sshll v2.2d, v1.2s, #0 -; CHECK-GI-NEXT: sshll v3.2d, v3.2s, #0 -; CHECK-GI-NEXT: mov v1.16b, v4.16b +; CHECK-GI-NEXT: sshll2 v3.2d, v1.4s, #0 +; CHECK-GI-NEXT: mov v0.16b, v4.16b +; CHECK-GI-NEXT: mov v1.16b, v5.16b ; CHECK-GI-NEXT: ret entry: %c = sext <8 x i32> %a to <8 x i64> @@ -742,13 +731,12 @@ ; ; CHECK-GI-LABEL: sext_v8i10_v8i32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: shl v0.4s, v0.4s, #22 -; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #22 +; CHECK-GI-NEXT: ushll v1.4s, v0.4h, #0 +; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0 ; CHECK-GI-NEXT: shl v1.4s, v1.4s, #22 -; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #22 +; CHECK-GI-NEXT: shl v2.4s, v0.4s, #22 +; CHECK-GI-NEXT: sshr v0.4s, v1.4s, #22 +; CHECK-GI-NEXT: sshr v1.4s, v2.4s, #22 ; CHECK-GI-NEXT: ret entry: %c = sext <8 x i10> %a to <8 x i32> @@ -776,23 +764,20 @@ ; ; CHECK-GI-LABEL: sext_v8i10_v8i64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: mov d2, v0.d[1] -; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-GI-NEXT: shl v0.2d, v0.2d, #54 -; CHECK-GI-NEXT: mov d3, v1.d[1] -; CHECK-GI-NEXT: ushll v2.2d, v2.2s, #0 -; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0 -; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #54 +; CHECK-GI-NEXT: ushll v1.4s, v0.4h, #0 +; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0 +; CHECK-GI-NEXT: ushll v2.2d, v1.2s, #0 +; CHECK-GI-NEXT: ushll2 v1.2d, v1.4s, #0 +; CHECK-GI-NEXT: ushll v3.2d, v0.2s, #0 +; CHECK-GI-NEXT: ushll2 v0.2d, v0.4s, #0 ; CHECK-GI-NEXT: shl v2.2d, v2.2d, #54 -; CHECK-GI-NEXT: shl v4.2d, v1.2d, #54 -; CHECK-GI-NEXT: ushll v3.2d, v3.2s, #0 -; CHECK-GI-NEXT: sshr v1.2d, v2.2d, #54 -; CHECK-GI-NEXT: sshr v2.2d, v4.2d, #54 +; CHECK-GI-NEXT: shl v1.2d, v1.2d, #54 ; CHECK-GI-NEXT: shl v3.2d, v3.2d, #54 -; CHECK-GI-NEXT: sshr v3.2d, v3.2d, #54 +; CHECK-GI-NEXT: shl v4.2d, v0.2d, #54 +; CHECK-GI-NEXT: sshr v0.2d, v2.2d, #54 +; CHECK-GI-NEXT: sshr v1.2d, v1.2d, #54 +; CHECK-GI-NEXT: sshr v2.2d, v3.2d, #54 +; CHECK-GI-NEXT: sshr v3.2d, v4.2d, #54 ; CHECK-GI-NEXT: ret entry: %c = sext <8 x i10> %a to <8 x i64> @@ -808,9 +793,9 @@ ; ; CHECK-GI-LABEL: sext_v16i8_v16i16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-GI-NEXT: sshll v2.8h, v0.8b, #0 +; CHECK-GI-NEXT: sshll2 v1.8h, v0.16b, #0 +; CHECK-GI-NEXT: mov v0.16b, v2.16b ; CHECK-GI-NEXT: ret entry: %c = sext <16 x i8> %a to <16 x i16> @@ -830,15 +815,12 @@ ; ; CHECK-GI-LABEL: sext_v16i8_v16i32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-GI-NEXT: sshll v2.8h, v1.8b, #0 -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: mov d3, v2.d[1] -; CHECK-GI-NEXT: sshll v2.4s, v2.4h, #0 -; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0 -; CHECK-GI-NEXT: sshll v3.4s, v3.4h, #0 +; CHECK-GI-NEXT: sshll v1.8h, v0.8b, #0 +; CHECK-GI-NEXT: sshll2 v3.8h, v0.16b, #0 +; CHECK-GI-NEXT: sshll v0.4s, v1.4h, #0 +; CHECK-GI-NEXT: sshll2 v1.4s, v1.8h, #0 +; CHECK-GI-NEXT: sshll v2.4s, v3.4h, #0 +; CHECK-GI-NEXT: sshll2 v3.4s, v3.8h, #0 ; CHECK-GI-NEXT: ret entry: %c = sext <16 x i8> %a to <16 x i32> @@ -866,27 +848,20 @@ ; ; CHECK-GI-LABEL: sext_v16i8_v16i64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-GI-NEXT: mov d2, v0.d[1] -; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-GI-NEXT: mov d3, v1.d[1] -; CHECK-GI-NEXT: sshll v2.4s, v2.4h, #0 -; CHECK-GI-NEXT: sshll v4.4s, v1.4h, #0 -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0 -; CHECK-GI-NEXT: mov d5, v4.d[1] -; CHECK-GI-NEXT: sshll v4.2d, v4.2s, #0 -; CHECK-GI-NEXT: sshll v6.4s, v3.4h, #0 -; CHECK-GI-NEXT: mov d3, v2.d[1] -; CHECK-GI-NEXT: sshll v2.2d, v2.2s, #0 -; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0 -; CHECK-GI-NEXT: mov d7, v6.d[1] -; CHECK-GI-NEXT: sshll v5.2d, v5.2s, #0 -; CHECK-GI-NEXT: sshll v6.2d, v6.2s, #0 -; CHECK-GI-NEXT: sshll v3.2d, v3.2s, #0 -; CHECK-GI-NEXT: sshll v7.2d, v7.2s, #0 +; CHECK-GI-NEXT: sshll v1.8h, v0.8b, #0 +; CHECK-GI-NEXT: sshll2 v0.8h, v0.16b, #0 +; CHECK-GI-NEXT: sshll v2.4s, v1.4h, #0 +; CHECK-GI-NEXT: sshll2 v3.4s, v1.8h, #0 +; CHECK-GI-NEXT: sshll v5.4s, v0.4h, #0 +; CHECK-GI-NEXT: sshll2 v7.4s, v0.8h, #0 +; CHECK-GI-NEXT: sshll v0.2d, v2.2s, #0 +; CHECK-GI-NEXT: sshll2 v1.2d, v2.4s, #0 +; CHECK-GI-NEXT: sshll v2.2d, v3.2s, #0 +; CHECK-GI-NEXT: sshll2 v3.2d, v3.4s, #0 +; CHECK-GI-NEXT: sshll v4.2d, v5.2s, #0 +; CHECK-GI-NEXT: sshll2 v5.2d, v5.4s, #0 +; CHECK-GI-NEXT: sshll v6.2d, v7.2s, #0 +; CHECK-GI-NEXT: sshll2 v7.2d, v7.4s, #0 ; CHECK-GI-NEXT: ret entry: %c = sext <16 x i8> %a to <16 x i64> @@ -906,13 +881,12 @@ ; ; CHECK-GI-LABEL: sext_v16i16_v16i32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d2, v0.d[1] -; CHECK-GI-NEXT: mov d3, v1.d[1] -; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: sshll v4.4s, v2.4h, #0 +; CHECK-GI-NEXT: sshll v4.4s, v0.4h, #0 +; CHECK-GI-NEXT: sshll2 v5.4s, v0.8h, #0 ; CHECK-GI-NEXT: sshll v2.4s, v1.4h, #0 -; CHECK-GI-NEXT: sshll v3.4s, v3.4h, #0 -; CHECK-GI-NEXT: mov v1.16b, v4.16b +; CHECK-GI-NEXT: sshll2 v3.4s, v1.8h, #0 +; CHECK-GI-NEXT: mov v0.16b, v4.16b +; CHECK-GI-NEXT: mov v1.16b, v5.16b ; CHECK-GI-NEXT: ret entry: %c = sext <16 x i16> %a to <16 x i32> @@ -938,24 +912,18 @@ ; ; CHECK-GI-LABEL: sext_v16i16_v16i64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d2, v0.d[1] -; CHECK-GI-NEXT: mov d3, v1.d[1] -; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0 -; CHECK-GI-NEXT: sshll v2.4s, v2.4h, #0 -; CHECK-GI-NEXT: sshll v6.4s, v3.4h, #0 -; CHECK-GI-NEXT: mov d3, v0.d[1] -; CHECK-GI-NEXT: mov d7, v1.d[1] -; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0 -; CHECK-GI-NEXT: sshll v4.2d, v1.2s, #0 -; CHECK-GI-NEXT: mov d5, v2.d[1] -; CHECK-GI-NEXT: mov d16, v6.d[1] -; CHECK-GI-NEXT: sshll v2.2d, v2.2s, #0 -; CHECK-GI-NEXT: sshll v1.2d, v3.2s, #0 -; CHECK-GI-NEXT: sshll v6.2d, v6.2s, #0 -; CHECK-GI-NEXT: sshll v3.2d, v5.2s, #0 -; CHECK-GI-NEXT: sshll v5.2d, v7.2s, #0 -; CHECK-GI-NEXT: sshll v7.2d, v16.2s, #0 +; CHECK-GI-NEXT: sshll v2.4s, v0.4h, #0 +; CHECK-GI-NEXT: sshll2 v3.4s, v0.8h, #0 +; CHECK-GI-NEXT: sshll v5.4s, v1.4h, #0 +; CHECK-GI-NEXT: sshll2 v7.4s, v1.8h, #0 +; CHECK-GI-NEXT: sshll v0.2d, v2.2s, #0 +; CHECK-GI-NEXT: sshll2 v1.2d, v2.4s, #0 +; CHECK-GI-NEXT: sshll v2.2d, v3.2s, #0 +; CHECK-GI-NEXT: sshll2 v3.2d, v3.4s, #0 +; CHECK-GI-NEXT: sshll v4.2d, v5.2s, #0 +; CHECK-GI-NEXT: sshll2 v5.2d, v5.4s, #0 +; CHECK-GI-NEXT: sshll v6.2d, v7.2s, #0 +; CHECK-GI-NEXT: sshll2 v7.2d, v7.4s, #0 ; CHECK-GI-NEXT: ret entry: %c = sext <16 x i16> %a to <16 x i64> @@ -980,20 +948,18 @@ ; ; CHECK-GI-LABEL: sext_v16i32_v16i64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d6, v1.d[1] -; CHECK-GI-NEXT: mov d5, v0.d[1] -; CHECK-GI-NEXT: mov d7, v2.d[1] -; CHECK-GI-NEXT: mov d18, v3.d[1] -; CHECK-GI-NEXT: sshll v16.2d, v1.2s, #0 -; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-GI-NEXT: sshll v16.2d, v0.2s, #0 +; CHECK-GI-NEXT: sshll2 v17.2d, v0.4s, #0 +; CHECK-GI-NEXT: sshll v18.2d, v1.2s, #0 +; CHECK-GI-NEXT: sshll2 v19.2d, v1.4s, #0 ; CHECK-GI-NEXT: sshll v4.2d, v2.2s, #0 -; CHECK-GI-NEXT: sshll v17.2d, v6.2s, #0 -; CHECK-GI-NEXT: sshll v1.2d, v5.2s, #0 +; CHECK-GI-NEXT: sshll2 v5.2d, v2.4s, #0 ; CHECK-GI-NEXT: sshll v6.2d, v3.2s, #0 -; CHECK-GI-NEXT: sshll v5.2d, v7.2s, #0 -; CHECK-GI-NEXT: sshll v7.2d, v18.2s, #0 -; CHECK-GI-NEXT: mov v2.16b, v16.16b -; CHECK-GI-NEXT: mov v3.16b, v17.16b +; CHECK-GI-NEXT: sshll2 v7.2d, v3.4s, #0 +; CHECK-GI-NEXT: mov v0.16b, v16.16b +; CHECK-GI-NEXT: mov v1.16b, v17.16b +; CHECK-GI-NEXT: mov v2.16b, v18.16b +; CHECK-GI-NEXT: mov v3.16b, v19.16b ; CHECK-GI-NEXT: ret entry: %c = sext <16 x i32> %a to <16 x i64> diff --git a/llvm/test/CodeGen/AArch64/zext.ll b/llvm/test/CodeGen/AArch64/zext.ll --- a/llvm/test/CodeGen/AArch64/zext.ll +++ b/llvm/test/CodeGen/AArch64/zext.ll @@ -597,12 +597,11 @@ ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-NEXT: adrp x8, .LCPI30_0 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI30_0] -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0 -; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b +; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI30_0] +; CHECK-GI-NEXT: ushll v1.2d, v0.2s, #0 +; CHECK-GI-NEXT: ushll2 v2.2d, v0.4s, #0 +; CHECK-GI-NEXT: and v0.16b, v1.16b, v3.16b +; CHECK-GI-NEXT: and v1.16b, v2.16b, v3.16b ; CHECK-GI-NEXT: ret entry: %c = zext <4 x i8> %a to <4 x i64> @@ -629,10 +628,9 @@ ; ; CHECK-GI-LABEL: zext_v4i16_v4i64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0 +; CHECK-GI-NEXT: ushll v1.4s, v0.4h, #0 +; CHECK-GI-NEXT: ushll v0.2d, v1.2s, #0 +; CHECK-GI-NEXT: ushll2 v1.2d, v1.4s, #0 ; CHECK-GI-NEXT: ret entry: %c = zext <4 x i16> %a to <4 x i64> @@ -648,9 +646,9 @@ ; ; CHECK-GI-LABEL: zext_v4i32_v4i64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0 +; CHECK-GI-NEXT: ushll v2.2d, v0.2s, #0 +; CHECK-GI-NEXT: ushll2 v1.2d, v0.4s, #0 +; CHECK-GI-NEXT: mov v0.16b, v2.16b ; CHECK-GI-NEXT: ret entry: %c = zext <4 x i32> %a to <4 x i64> @@ -706,12 +704,11 @@ ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-NEXT: adrp x8, .LCPI36_0 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI36_0] -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0 -; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b +; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI36_0] +; CHECK-GI-NEXT: ushll v1.2d, v0.2s, #0 +; CHECK-GI-NEXT: ushll2 v2.2d, v0.4s, #0 +; CHECK-GI-NEXT: and v0.16b, v1.16b, v3.16b +; CHECK-GI-NEXT: and v1.16b, v2.16b, v3.16b ; CHECK-GI-NEXT: ret entry: %c = zext <4 x i10> %a to <4 x i64> @@ -738,10 +735,9 @@ ; ; CHECK-GI-LABEL: zext_v8i8_v8i32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-GI-NEXT: ushll v1.8h, v0.8b, #0 +; CHECK-GI-NEXT: ushll v0.4s, v1.4h, #0 +; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0 ; CHECK-GI-NEXT: ret entry: %c = zext <8 x i8> %a to <8 x i32> @@ -763,15 +759,12 @@ ; CHECK-GI-LABEL: zext_v8i8_v8i64: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: ushll v2.4s, v1.4h, #0 -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-GI-NEXT: mov d3, v2.d[1] -; CHECK-GI-NEXT: ushll v2.2d, v2.2s, #0 -; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0 -; CHECK-GI-NEXT: ushll v3.2d, v3.2s, #0 +; CHECK-GI-NEXT: ushll v1.4s, v0.4h, #0 +; CHECK-GI-NEXT: ushll2 v3.4s, v0.8h, #0 +; CHECK-GI-NEXT: ushll v0.2d, v1.2s, #0 +; CHECK-GI-NEXT: ushll2 v1.2d, v1.4s, #0 +; CHECK-GI-NEXT: ushll v2.2d, v3.2s, #0 +; CHECK-GI-NEXT: ushll2 v3.2d, v3.4s, #0 ; CHECK-GI-NEXT: ret entry: %c = zext <8 x i8> %a to <8 x i64> @@ -787,9 +780,9 @@ ; ; CHECK-GI-LABEL: zext_v8i16_v8i32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-GI-NEXT: ushll v2.4s, v0.4h, #0 +; CHECK-GI-NEXT: ushll2 v1.4s, v0.8h, #0 +; CHECK-GI-NEXT: mov v0.16b, v2.16b ; CHECK-GI-NEXT: ret entry: %c = zext <8 x i16> %a to <8 x i32> @@ -809,15 +802,12 @@ ; ; CHECK-GI-LABEL: zext_v8i16_v8i64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: ushll v2.4s, v1.4h, #0 -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-GI-NEXT: mov d3, v2.d[1] -; CHECK-GI-NEXT: ushll v2.2d, v2.2s, #0 -; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0 -; CHECK-GI-NEXT: ushll v3.2d, v3.2s, #0 +; CHECK-GI-NEXT: ushll v1.4s, v0.4h, #0 +; CHECK-GI-NEXT: ushll2 v3.4s, v0.8h, #0 +; CHECK-GI-NEXT: ushll v0.2d, v1.2s, #0 +; CHECK-GI-NEXT: ushll2 v1.2d, v1.4s, #0 +; CHECK-GI-NEXT: ushll v2.2d, v3.2s, #0 +; CHECK-GI-NEXT: ushll2 v3.2d, v3.4s, #0 ; CHECK-GI-NEXT: ret entry: %c = zext <8 x i16> %a to <8 x i64> @@ -837,13 +827,12 @@ ; ; CHECK-GI-LABEL: zext_v8i32_v8i64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d2, v0.d[1] -; CHECK-GI-NEXT: mov d3, v1.d[1] -; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-GI-NEXT: ushll v4.2d, v2.2s, #0 +; CHECK-GI-NEXT: ushll v4.2d, v0.2s, #0 +; CHECK-GI-NEXT: ushll2 v5.2d, v0.4s, #0 ; CHECK-GI-NEXT: ushll v2.2d, v1.2s, #0 -; CHECK-GI-NEXT: ushll v3.2d, v3.2s, #0 -; CHECK-GI-NEXT: mov v1.16b, v4.16b +; CHECK-GI-NEXT: ushll2 v3.2d, v1.4s, #0 +; CHECK-GI-NEXT: mov v0.16b, v4.16b +; CHECK-GI-NEXT: mov v1.16b, v5.16b ; CHECK-GI-NEXT: ret entry: %c = zext <8 x i32> %a to <8 x i64> @@ -877,13 +866,12 @@ ; ; CHECK-GI-LABEL: zext_v8i10_v8i32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d1, v0.d[1] ; CHECK-GI-NEXT: adrp x8, .LCPI44_0 -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI44_0] -; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b +; CHECK-GI-NEXT: ushll v1.4s, v0.4h, #0 +; CHECK-GI-NEXT: ushll2 v2.4s, v0.8h, #0 +; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI44_0] +; CHECK-GI-NEXT: and v0.16b, v1.16b, v3.16b +; CHECK-GI-NEXT: and v1.16b, v2.16b, v3.16b ; CHECK-GI-NEXT: ret entry: %c = zext <8 x i10> %a to <8 x i32> @@ -904,21 +892,18 @@ ; ; CHECK-GI-LABEL: zext_v8i10_v8i64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: ushll v1.4s, v0.4h, #0 +; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0 ; CHECK-GI-NEXT: adrp x8, .LCPI45_0 -; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI45_0] -; CHECK-GI-NEXT: mov d2, v0.d[1] -; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-GI-NEXT: and v0.16b, v0.16b, v4.16b -; CHECK-GI-NEXT: mov d3, v1.d[1] -; CHECK-GI-NEXT: ushll v2.2d, v2.2s, #0 -; CHECK-GI-NEXT: ushll v5.2d, v1.2s, #0 -; CHECK-GI-NEXT: and v1.16b, v2.16b, v4.16b -; CHECK-GI-NEXT: and v2.16b, v5.16b, v4.16b -; CHECK-GI-NEXT: ushll v3.2d, v3.2s, #0 -; CHECK-GI-NEXT: and v3.16b, v3.16b, v4.16b +; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI45_0] +; CHECK-GI-NEXT: ushll v2.2d, v1.2s, #0 +; CHECK-GI-NEXT: ushll2 v1.2d, v1.4s, #0 +; CHECK-GI-NEXT: ushll v4.2d, v0.2s, #0 +; CHECK-GI-NEXT: ushll2 v5.2d, v0.4s, #0 +; CHECK-GI-NEXT: and v0.16b, v2.16b, v3.16b +; CHECK-GI-NEXT: and v1.16b, v1.16b, v3.16b +; CHECK-GI-NEXT: and v2.16b, v4.16b, v3.16b +; CHECK-GI-NEXT: and v3.16b, v5.16b, v3.16b ; CHECK-GI-NEXT: ret entry: %c = zext <8 x i10> %a to <8 x i64> @@ -934,9 +919,9 @@ ; ; CHECK-GI-LABEL: zext_v16i8_v16i16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-GI-NEXT: ushll v2.8h, v0.8b, #0 +; CHECK-GI-NEXT: ushll2 v1.8h, v0.16b, #0 +; CHECK-GI-NEXT: mov v0.16b, v2.16b ; CHECK-GI-NEXT: ret entry: %c = zext <16 x i8> %a to <16 x i16> @@ -956,15 +941,12 @@ ; ; CHECK-GI-LABEL: zext_v16i8_v16i32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-GI-NEXT: ushll v2.8h, v1.8b, #0 -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: mov d3, v2.d[1] -; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0 -; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-GI-NEXT: ushll v3.4s, v3.4h, #0 +; CHECK-GI-NEXT: ushll v1.8h, v0.8b, #0 +; CHECK-GI-NEXT: ushll2 v3.8h, v0.16b, #0 +; CHECK-GI-NEXT: ushll v0.4s, v1.4h, #0 +; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0 +; CHECK-GI-NEXT: ushll v2.4s, v3.4h, #0 +; CHECK-GI-NEXT: ushll2 v3.4s, v3.8h, #0 ; CHECK-GI-NEXT: ret entry: %c = zext <16 x i8> %a to <16 x i32> @@ -992,27 +974,20 @@ ; ; CHECK-GI-LABEL: zext_v16i8_v16i64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-GI-NEXT: mov d2, v0.d[1] -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-GI-NEXT: mov d3, v1.d[1] -; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0 -; CHECK-GI-NEXT: ushll v4.4s, v1.4h, #0 -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-GI-NEXT: mov d5, v4.d[1] -; CHECK-GI-NEXT: ushll v4.2d, v4.2s, #0 -; CHECK-GI-NEXT: ushll v6.4s, v3.4h, #0 -; CHECK-GI-NEXT: mov d3, v2.d[1] -; CHECK-GI-NEXT: ushll v2.2d, v2.2s, #0 -; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0 -; CHECK-GI-NEXT: mov d7, v6.d[1] -; CHECK-GI-NEXT: ushll v5.2d, v5.2s, #0 -; CHECK-GI-NEXT: ushll v6.2d, v6.2s, #0 -; CHECK-GI-NEXT: ushll v3.2d, v3.2s, #0 -; CHECK-GI-NEXT: ushll v7.2d, v7.2s, #0 +; CHECK-GI-NEXT: ushll v1.8h, v0.8b, #0 +; CHECK-GI-NEXT: ushll2 v0.8h, v0.16b, #0 +; CHECK-GI-NEXT: ushll v2.4s, v1.4h, #0 +; CHECK-GI-NEXT: ushll2 v3.4s, v1.8h, #0 +; CHECK-GI-NEXT: ushll v5.4s, v0.4h, #0 +; CHECK-GI-NEXT: ushll2 v7.4s, v0.8h, #0 +; CHECK-GI-NEXT: ushll v0.2d, v2.2s, #0 +; CHECK-GI-NEXT: ushll2 v1.2d, v2.4s, #0 +; CHECK-GI-NEXT: ushll v2.2d, v3.2s, #0 +; CHECK-GI-NEXT: ushll2 v3.2d, v3.4s, #0 +; CHECK-GI-NEXT: ushll v4.2d, v5.2s, #0 +; CHECK-GI-NEXT: ushll2 v5.2d, v5.4s, #0 +; CHECK-GI-NEXT: ushll v6.2d, v7.2s, #0 +; CHECK-GI-NEXT: ushll2 v7.2d, v7.4s, #0 ; CHECK-GI-NEXT: ret entry: %c = zext <16 x i8> %a to <16 x i64> @@ -1032,13 +1007,12 @@ ; ; CHECK-GI-LABEL: zext_v16i16_v16i32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d2, v0.d[1] -; CHECK-GI-NEXT: mov d3, v1.d[1] -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: ushll v4.4s, v2.4h, #0 +; CHECK-GI-NEXT: ushll v4.4s, v0.4h, #0 +; CHECK-GI-NEXT: ushll2 v5.4s, v0.8h, #0 ; CHECK-GI-NEXT: ushll v2.4s, v1.4h, #0 -; CHECK-GI-NEXT: ushll v3.4s, v3.4h, #0 -; CHECK-GI-NEXT: mov v1.16b, v4.16b +; CHECK-GI-NEXT: ushll2 v3.4s, v1.8h, #0 +; CHECK-GI-NEXT: mov v0.16b, v4.16b +; CHECK-GI-NEXT: mov v1.16b, v5.16b ; CHECK-GI-NEXT: ret entry: %c = zext <16 x i16> %a to <16 x i32> @@ -1064,24 +1038,18 @@ ; ; CHECK-GI-LABEL: zext_v16i16_v16i64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d2, v0.d[1] -; CHECK-GI-NEXT: mov d3, v1.d[1] -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0 -; CHECK-GI-NEXT: ushll v6.4s, v3.4h, #0 -; CHECK-GI-NEXT: mov d3, v0.d[1] -; CHECK-GI-NEXT: mov d7, v1.d[1] -; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-GI-NEXT: ushll v4.2d, v1.2s, #0 -; CHECK-GI-NEXT: mov d5, v2.d[1] -; CHECK-GI-NEXT: mov d16, v6.d[1] -; CHECK-GI-NEXT: ushll v2.2d, v2.2s, #0 -; CHECK-GI-NEXT: ushll v1.2d, v3.2s, #0 -; CHECK-GI-NEXT: ushll v6.2d, v6.2s, #0 -; CHECK-GI-NEXT: ushll v3.2d, v5.2s, #0 -; CHECK-GI-NEXT: ushll v5.2d, v7.2s, #0 -; CHECK-GI-NEXT: ushll v7.2d, v16.2s, #0 +; CHECK-GI-NEXT: ushll v2.4s, v0.4h, #0 +; CHECK-GI-NEXT: ushll2 v3.4s, v0.8h, #0 +; CHECK-GI-NEXT: ushll v5.4s, v1.4h, #0 +; CHECK-GI-NEXT: ushll2 v7.4s, v1.8h, #0 +; CHECK-GI-NEXT: ushll v0.2d, v2.2s, #0 +; CHECK-GI-NEXT: ushll2 v1.2d, v2.4s, #0 +; CHECK-GI-NEXT: ushll v2.2d, v3.2s, #0 +; CHECK-GI-NEXT: ushll2 v3.2d, v3.4s, #0 +; CHECK-GI-NEXT: ushll v4.2d, v5.2s, #0 +; CHECK-GI-NEXT: ushll2 v5.2d, v5.4s, #0 +; CHECK-GI-NEXT: ushll v6.2d, v7.2s, #0 +; CHECK-GI-NEXT: ushll2 v7.2d, v7.4s, #0 ; CHECK-GI-NEXT: ret entry: %c = zext <16 x i16> %a to <16 x i64> @@ -1106,20 +1074,18 @@ ; ; CHECK-GI-LABEL: zext_v16i32_v16i64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d6, v1.d[1] -; CHECK-GI-NEXT: mov d5, v0.d[1] -; CHECK-GI-NEXT: mov d7, v2.d[1] -; CHECK-GI-NEXT: mov d18, v3.d[1] -; CHECK-GI-NEXT: ushll v16.2d, v1.2s, #0 -; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-GI-NEXT: ushll v16.2d, v0.2s, #0 +; CHECK-GI-NEXT: ushll2 v17.2d, v0.4s, #0 +; CHECK-GI-NEXT: ushll v18.2d, v1.2s, #0 +; CHECK-GI-NEXT: ushll2 v19.2d, v1.4s, #0 ; CHECK-GI-NEXT: ushll v4.2d, v2.2s, #0 -; CHECK-GI-NEXT: ushll v17.2d, v6.2s, #0 -; CHECK-GI-NEXT: ushll v1.2d, v5.2s, #0 +; CHECK-GI-NEXT: ushll2 v5.2d, v2.4s, #0 ; CHECK-GI-NEXT: ushll v6.2d, v3.2s, #0 -; CHECK-GI-NEXT: ushll v5.2d, v7.2s, #0 -; CHECK-GI-NEXT: ushll v7.2d, v18.2s, #0 -; CHECK-GI-NEXT: mov v2.16b, v16.16b -; CHECK-GI-NEXT: mov v3.16b, v17.16b +; CHECK-GI-NEXT: ushll2 v7.2d, v3.4s, #0 +; CHECK-GI-NEXT: mov v0.16b, v16.16b +; CHECK-GI-NEXT: mov v1.16b, v17.16b +; CHECK-GI-NEXT: mov v2.16b, v18.16b +; CHECK-GI-NEXT: mov v3.16b, v19.16b ; CHECK-GI-NEXT: ret entry: %c = zext <16 x i32> %a to <16 x i64>