Index: lib/Target/ARM/ARMInstrNEON.td =================================================================== --- lib/Target/ARM/ARMInstrNEON.td +++ lib/Target/ARM/ARMInstrNEON.td @@ -6678,13 +6678,14 @@ let Inst{10-9} = index{1-0}; let Inst{8} = 0b0; } +def : Pat<(v4f16 (NEONvext (v4f16 DPR:$Vn), (v4f16 DPR:$Vm), (i32 imm:$index))), + (VEXTd16 DPR:$Vn, DPR:$Vm, imm:$index)>; + def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { let Inst{10} = index{0}; let Inst{9-8} = 0b00; } -def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), - (v2f32 DPR:$Vm), - (i32 imm:$index))), +def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), (v2f32 DPR:$Vm), (i32 imm:$index))), (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>; def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> { @@ -6694,6 +6695,9 @@ let Inst{11-9} = index{2-0}; let Inst{8} = 0b0; } +def : Pat<(v8f16 (NEONvext (v8f16 QPR:$Vn), (v8f16 QPR:$Vm), (i32 imm:$index))), + (VEXTq16 QPR:$Vn, QPR:$Vm, imm:$index)>; + def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> { let Inst{11-10} = index{1-0}; let Inst{9-8} = 0b00; @@ -6702,9 +6706,7 @@ let Inst{11} = index{0}; let Inst{10-8} = 0b000; } -def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), - (v4f32 QPR:$Vm), - (i32 imm:$index))), +def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), (v4f32 QPR:$Vm), (i32 imm:$index))), (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>; // VTRN : Vector Transpose Index: test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll =================================================================== --- test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll +++ test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll @@ -1191,20 +1191,26 @@ ret <8 x half> %shuffle } +define dso_local <4 x half> @test_vext_f16(<4 x half> %a, <4 x half> %b) { +; CHECK-LABEL: test_vext_f16: +; CHECK: vext.16 d0, d0, d1, #2 +; CHECK-NEXT: bx lr +entry: + %vext = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> + ret <4 x half> %vext +} + +define dso_local <8 x half> @test_vextq_f16(<8 x half> %a, <8 x half> %b) { +; CHECK-LABEL: test_vextq_f16: +; CHECK: vext.16 q0, q0, q1, #5 +; CHECK-NEXT: bx lr +entry: + %vext = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> + ret <8 x half> %vext +} + ; FIXME (PR38404) ; -;define dso_local <4 x half> @test_vext_f16(<4 x half> %a, <4 x half> %b) { -;entry: -; %vext = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> -; ret <4 x half> %vext -;} -; -;define dso_local <8 x half> @test_vextq_f16(<8 x half> %a, <8 x half> %b) { -;entry: -; %vext = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> -; ret <8 x half> %vext -;} -; ;define dso_local <4 x half> @test_vrev64_f16(<4 x half> %a) { ;entry: ; %shuffle.i = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32>