Index: llvm/lib/Target/ARM/ARMInstrNEON.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrNEON.td +++ llvm/lib/Target/ARM/ARMInstrNEON.td @@ -6797,9 +6797,12 @@ def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; let Predicates = [HasNEON] in { -def : Pat<(v4f32 (ARMvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>; -def : Pat<(v8f16 (ARMvrev64 (v8f16 QPR:$Vm))), (VREV64q16 QPR:$Vm)>; -def : Pat<(v4f16 (ARMvrev64 (v4f16 DPR:$Vm))), (VREV64d16 DPR:$Vm)>; + def : Pat<(v4f32 (ARMvrev64 (v4f32 QPR:$Vm))), + (VREV64q32 QPR:$Vm)>; + def : Pat<(v8f16 (ARMvrev64 (v8f16 QPR:$Vm))), + (VREV64q16 QPR:$Vm)>; + def : Pat<(v4f16 (ARMvrev64 (v4f16 DPR:$Vm))), + (VREV64d16 DPR:$Vm)>; } // VREV32 : Vector Reverse elements within 32-bit words @@ -6821,6 +6824,13 @@ def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; +let Predicates = [HasNEON] in { + def : Pat<(v8f16 (ARMvrev32 (v8f16 QPR:$Vm))), + (VREV32q16 QPR:$Vm)>; + def : Pat<(v4f16 (ARMvrev32 (v4f16 DPR:$Vm))), + (VREV32d16 DPR:$Vm)>; +} + // VREV16 : Vector Reverse elements within 16-bit halfwords class VREV16D op19_18, string OpcodeStr, string Dt, ValueType Ty> Index: llvm/test/CodeGen/ARM/vrev.ll =================================================================== --- llvm/test/CodeGen/ARM/vrev.ll +++ llvm/test/CodeGen/ARM/vrev.ll @@ -26,6 +26,12 @@ } define <4 x half> @test_vrev64Df16(<4 x half>* %A) nounwind { +; CHECK-LABEL: test_vrev64Df16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr d16, [r0] +; CHECK-NEXT: vrev64.16 d16, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %tmp1 = load <4 x half>, <4 x half>* %A %tmp2 = shufflevector <4 x half> %tmp1, <4 x half> undef, <4 x i32> ret <4 x half> %tmp2 @@ -82,6 +88,13 @@ } define <8 x half> @test_vrev64Qf16(<8 x half>* %A) nounwind { +; CHECK-LABEL: test_vrev64Qf16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r0] +; CHECK-NEXT: vrev64.16 q8, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1 = load <8 x half>, <8 x half>* %A %tmp2 = shufflevector <8 x half> %tmp1, <8 x half> undef, <8 x i32> ret <8 x half> %tmp2 @@ -138,6 +151,12 @@ } define <4 x half> @test_vrev32Df16(<4 x half>* %A) nounwind { +; CHECK-LABEL: test_vrev32Df16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr d16, [r0] +; CHECK-NEXT: vrev32.16 d16, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: mov pc, lr %tmp1 = load <4 x half>, <4 x half>* %A %tmp2 = shufflevector <4 x half> %tmp1, <4 x half> undef, <4 x i32> ret <4 x half> %tmp2 @@ -170,6 +189,13 @@ } define <8 x half> @test_vrev32Qf16(<8 x half>* %A) nounwind { +; CHECK-LABEL: test_vrev32Qf16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r0] +; CHECK-NEXT: vrev32.16 q8, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1 = load <8 x half>, <8 x half>* %A %tmp2 = shufflevector <8 x half> %tmp1, <8 x half> undef, <8 x i32> ret <8 x half> %tmp2 @@ -228,6 +254,13 @@ } define <8 x half> @test_vrev32Qf16_undef(<8 x half>* %A) nounwind { +; CHECK-LABEL: test_vrev32Qf16_undef: +; CHECK: @ %bb.0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r0] +; CHECK-NEXT: vrev32.16 q8, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1 = load <8 x half>, <8 x half>* %A %tmp2 = shufflevector <8 x half> %tmp1, <8 x half> undef, <8 x i32> ret <8 x half> %tmp2