Index: lib/Target/ARM/ARMInstrNEON.td =================================================================== --- lib/Target/ARM/ARMInstrNEON.td +++ lib/Target/ARM/ARMInstrNEON.td @@ -6564,6 +6564,7 @@ def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>; +def : Pat<(v8f16 (NEONvrev64 (v8f16 QPR:$Vm))), (VREV64q16 QPR:$Vm)>; // VREV32 : Vector Reverse elements within 32-bit words @@ -6682,6 +6683,12 @@ (i32 imm:$index))), (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>; + +def : Pat<(v8f16 (NEONvext (v8f16 QPR:$Vn), + (v8f16 QPR:$Vm), + (i32 imm:$index))), + (VEXTq16 QPR:$Vn, QPR:$Vm, imm:$index)>; + // VTRN : Vector Transpose def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; Index: test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll =================================================================== --- test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll +++ test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll @@ -1141,11 +1141,12 @@ ; ret <4 x half> %vext ;} ; -;define dso_local <8 x half> @test_vextq_f16(<8 x half> %a, <8 x half> %b) { -;entry: -; %vext = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> -; ret <8 x half> %vext -;} +define dso_local <8 x half> @test_vextq_f16(<8 x half> %a, <8 x half> %b) { +entry: + %vext = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> + ret <8 x half> %vext +} +; FIXME (PR38404) ; ;define dso_local <4 x half> @test_vrev64_f16(<4 x half> %a) { ;entry: @@ -1153,11 +1154,11 @@ ; ret <4 x half> %shuffle.i ;} ; -;define dso_local <8 x half> @test_vrev64q_f16(<8 x half> %a) { -;entry: -; %shuffle.i = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> -; ret <8 x half> %shuffle.i -;} +define dso_local <8 x half> @test_vrev64q_f16(<8 x half> %a) { +entry: + %shuffle.i = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> + ret <8 x half> %shuffle.i +} declare <4 x half> @llvm.fabs.v4f16(<4 x half>) declare <8 x half> @llvm.fabs.v8f16(<8 x half>)