Index: lib/Target/ARM/ARMISelDAGToDAG.cpp =================================================================== --- lib/Target/ARM/ARMISelDAGToDAG.cpp +++ lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -2212,7 +2212,10 @@ case MVT::v8i8: case MVT::v16i8: OpcodeIndex = 0; break; case MVT::v4i16: - case MVT::v8i16: OpcodeIndex = 1; break; + case MVT::v8i16: + case MVT::v4f16: + case MVT::v8f16: + OpcodeIndex = 1; break; case MVT::v2f32: case MVT::v2i32: case MVT::v4f32: Index: lib/Target/ARM/ARMInstrNEON.td =================================================================== --- lib/Target/ARM/ARMInstrNEON.td +++ lib/Target/ARM/ARMInstrNEON.td @@ -7576,6 +7576,8 @@ (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)), (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; +def : Pat<(v8f16 (concat_vectors DPR:$Dn, DPR:$Dm)), + (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; //===----------------------------------------------------------------------===// // Assembler aliases Index: test/CodeGen/ARM/armv8.2a-fp16.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/armv8.2a-fp16.ll @@ -0,0 +1,46 @@ +; RUN: llc -o - %s | FileCheck %s +target triple = "armv8.2a-unknown-linux-gnu" + +define <4 x half> @test_vld_dup1_4xhalf(half* %b) #0 { +; CHECK-LABEL: test_vld_dup1_4xhalf: +; CHECK: vld1.16 {d16[]}, [r0:16] +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr + +entry: + %b1 = load half, half* %b, align 2 + %vecinit = insertelement <4 x half> undef, half %b1, i32 0 + %vecinit2 = insertelement <4 x half> %vecinit, half %b1, i32 1 + %vecinit3 = insertelement <4 x half> %vecinit2, half %b1, i32 2 + %vecinit4 = insertelement <4 x half> %vecinit3, half %b1, i32 3 + ret <4 x half> %vecinit4 +} + +define <8 x half> @test_vld_dup1_8xhalf(half* %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vld_dup1_8xhalf: +; CHECK: vld1.16 {d16[], d17[]}, [r0:16] +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr + +entry: + %b1 = load half, half* %b, align 2 + %vecinit = insertelement <8 x half> undef, half %b1, i32 0 + %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer + ret <8 x half> %vecinit8 +} + +define <8 x half> @test_shufflevector8xhalf(<4 x half> %a) #0 { +; CHECK-LABEK: test_shufflevector8xhalf: +; CHECK: vmov d16, r0, r1 +; CHECK-NEXT: vorr d17, d16, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr + +entry: + %r = shufflevector <4 x half> %a, <4 x half> %a, <8 x i32> + ret <8 x half> %r +} + +attributes #0 = { "target-features"="+armv8.2-a,+fullfp16,+neon,-thumb-mode" }