diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -5316,6 +5316,19 @@ (v2i32 (trunc (v2i64 V128:$Vm))))), (UZP1v4i32 V128:$Vn, V128:$Vm)>; +def : Pat<(v16i8 (concat_vectors + (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vn), (i32 8)))), + (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vm), (i32 8)))))), + (UZP2v16i8 V128:$Vn, V128:$Vm)>; +def : Pat<(v8i16 (concat_vectors + (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vn), (i32 16)))), + (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vm), (i32 16)))))), + (UZP2v8i16 V128:$Vn, V128:$Vm)>; +def : Pat<(v4i32 (concat_vectors + (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vn), (i32 32)))), + (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vm), (i32 32)))))), + (UZP2v4i32 V128:$Vn, V128:$Vm)>; + //---------------------------------------------------------------------------- // AdvSIMD TBL/TBX instructions //---------------------------------------------------------------------------- diff --git a/llvm/test/CodeGen/AArch64/arm64-uzp2-combine.ll b/llvm/test/CodeGen/AArch64/arm64-uzp2-combine.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-uzp2-combine.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple aarch64-none-linux-gnu | FileCheck %s + +; Test the (concat_vectors (trunc (lshr)), (trunc (lshr))) pattern. + +define <16 x i8> @test_combine_v8i16_to_v16i8(<8 x i16> %x, <8 x i16> %y) { +; CHECK-LABEL: test_combine_v8i16_to_v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: uzp2 v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret +entry: + %lshr1 = lshr <8 x i16> %x, + %trunc1 = trunc <8 x i16> %lshr1 to <8 x i8> + %lshr2 = lshr <8 x i16> %y, + %trunc2 = trunc <8 x i16> %lshr2 to <8 x i8> + %shuffle = shufflevector <8 x i8> %trunc1, <8 x i8> %trunc2, <16 x i32> + ret <16 x i8> %shuffle +} + +define <8 x i16> @test_combine_v4i32_to_v8i16(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: test_combine_v4i32_to_v8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: uzp2 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret +entry: + %lshr1 = lshr <4 x i32> %x, + %trunc1 = trunc <4 x i32> %lshr1 to <4 x i16> + %lshr2 = lshr <4 x i32> %y, + %trunc2 = trunc <4 x i32> %lshr2 to <4 x i16> + %shuffle = shufflevector <4 x i16> %trunc1, <4 x i16> %trunc2, <8 x i32> + ret <8 x i16> %shuffle +} + +define <4 x i32> @test_combine_v2i64_to_v4i32(<2 x i64> %x, <2 x i64> %y) { +; CHECK-LABEL: test_combine_v2i64_to_v4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret +entry: + %lshr1 = lshr <2 x i64> %x, + %trunc1 = trunc <2 x i64> %lshr1 to <2 x i32> + %lshr2 = lshr <2 x i64> %y, + %trunc2 = trunc <2 x i64> %lshr2 to <2 x i32> + %shuffle = shufflevector <2 x i32> %trunc1, <2 x i32> %trunc2, <4 x i32> + ret <4 x i32> %shuffle +} +