Index: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -3030,11 +3030,13 @@ switch (VT.getSimpleVT().SimpleTy) { default: return; case MVT::v8i8: Opc = ARM::VZIPd8; break; + case MVT::v4f16: case MVT::v4i16: Opc = ARM::VZIPd16; break; case MVT::v2f32: // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. case MVT::v2i32: Opc = ARM::VTRNd32; break; case MVT::v16i8: Opc = ARM::VZIPq8; break; + case MVT::v8f16: case MVT::v8i16: Opc = ARM::VZIPq16; break; case MVT::v4f32: case MVT::v4i32: Opc = ARM::VZIPq32; break; @@ -3051,11 +3053,13 @@ switch (VT.getSimpleVT().SimpleTy) { default: return; case MVT::v8i8: Opc = ARM::VUZPd8; break; + case MVT::v4f16: case MVT::v4i16: Opc = ARM::VUZPd16; break; case MVT::v2f32: // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. case MVT::v2i32: Opc = ARM::VTRNd32; break; case MVT::v16i8: Opc = ARM::VUZPq8; break; + case MVT::v8f16: case MVT::v8i16: Opc = ARM::VUZPq16; break; case MVT::v4f32: case MVT::v4i32: Opc = ARM::VUZPq32; break; Index: llvm/trunk/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll +++ llvm/trunk/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll @@ -1015,44 +1015,56 @@ ret <8 x half> %3 } +define dso_local %struct.float16x4x2_t @test_vzip_f16(<4 x half> %a, <4 x half> %b) { +; CHECK-LABEL: test_vzip_f16: +; CHECK: vzip.16 d0, d1 +; CHECK-NEXT: bx lr +entry: + %vzip.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> + %vzip1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vzip1.i, 0, 1 + ret %struct.float16x4x2_t %.fca.0.1.insert +} + +define dso_local %struct.float16x8x2_t @test_vzipq_f16(<8 x half> %a, <8 x half> %b) { +; CHECK-LABEL: test_vzipq_f16: +; CHECK: vzip.16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %vzip.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> + %vzip1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vzip1.i, 0, 1 + ret %struct.float16x8x2_t %.fca.0.1.insert +} + +define dso_local %struct.float16x4x2_t @test_vuzp_f16(<4 x half> %a, <4 x half> %b) { +; CHECK-LABEL: test_vuzp_f16: +; CHECK: vuzp.16 d0, d1 +; CHECK-NEXT: bx lr +entry: + %vuzp.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> + %vuzp1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vuzp1.i, 0, 1 + ret %struct.float16x4x2_t %.fca.0.1.insert +} + +define dso_local %struct.float16x8x2_t @test_vuzpq_f16(<8 x half> %a, <8 x half> %b) { +; CHECK-LABEL: test_vuzpq_f16: +; CHECK: vuzp.16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %vuzp.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> + %vuzp1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vuzp1.i, 0, 1 + ret %struct.float16x8x2_t %.fca.0.1.insert +} + ; FIXME (PR38404) ; -;define dso_local %struct.float16x4x2_t @test_vzip_f16(<4 x half> %a, <4 x half> %b) { -;entry: -; %vzip.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> -; %vzip1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> -; %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vzip.i, 0, 0 -; %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vzip1.i, 0, 1 -; ret %struct.float16x4x2_t %.fca.0.1.insert -;} -; -;define dso_local %struct.float16x8x2_t @test_vzipq_f16(<8 x half> %a, <8 x half> %b) { -;entry: -; %vzip.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> -; %vzip1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> -; %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vzip.i, 0, 0 -; %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vzip1.i, 0, 1 -; ret %struct.float16x8x2_t %.fca.0.1.insert -;} -; -;define dso_local %struct.float16x4x2_t @test_vuzp_f16(<4 x half> %a, <4 x half> %b) { -;entry: -; %vuzp.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> -; %vuzp1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> -; %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vuzp.i, 0, 0 -; %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vuzp1.i, 0, 1 -; ret %struct.float16x4x2_t %.fca.0.1.insert -;} -; -;define dso_local %struct.float16x8x2_t @test_vuzpq_f16(<8 x half> %a, <8 x half> %b) { -;entry: -; %vuzp.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> -; %vuzp1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> -; %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vuzp.i, 0, 0 -; %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vuzp1.i, 0, 1 -; ret %struct.float16x8x2_t %.fca.0.1.insert -;} -; ;define dso_local %struct.float16x4x2_t @test_vtrn_f16(<4 x half> %a, <4 x half> %b) { ;entry: ; %vtrn.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32>