Index: lib/Target/ARM/ARMInstrNEON.td =================================================================== --- lib/Target/ARM/ARMInstrNEON.td +++ lib/Target/ARM/ARMInstrNEON.td @@ -6589,6 +6589,8 @@ def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>; +def : Pat<(v8f16 (NEONvrev64 (v8f16 QPR:$Vm))), (VREV64q16 QPR:$Vm)>; +def : Pat<(v4f16 (NEONvrev64 (v4f16 DPR:$Vm))), (VREV64d16 DPR:$Vm)>; // VREV32 : Vector Reverse elements within 32-bit words Index: test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll =================================================================== --- test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll +++ test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll @@ -1213,20 +1213,18 @@ ret <8 x half> %vext } -; FIXME (PR38404) -; -;define dso_local <4 x half> @test_vrev64_f16(<4 x half> %a) { -;entry: -; %shuffle.i = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> -; ret <4 x half> %shuffle.i -;} -; -;define dso_local <8 x half> @test_vrev64q_f16(<8 x half> %a) { -;entry: -; %shuffle.i = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> -; ret <8 x half> %shuffle.i -;} +define dso_local <4 x half> @test_vrev64_f16(<4 x half> %a) { +entry: + %shuffle.i = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> + ret <4 x half> %shuffle.i +} +define dso_local <8 x half> @test_vrev64q_f16(<8 x half> %a) { +entry: + %shuffle.i = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> + ret <8 x half> %shuffle.i +} + declare <4 x half> @llvm.fabs.v4f16(<4 x half>) declare <8 x half> @llvm.fabs.v8f16(<8 x half>) declare <4 x i16> @llvm.arm.neon.vcvtas.v4i16.v4f16(<4 x half>)