Index: lib/Target/ARM/ARMInstrNEON.td =================================================================== --- lib/Target/ARM/ARMInstrNEON.td +++ lib/Target/ARM/ARMInstrNEON.td @@ -6694,6 +6694,7 @@ def : AlignedVEXTq; +def : AlignedVEXTq; // v8f16 -> v4f16 // VEXT : Vector Extract @@ -7243,6 +7244,7 @@ def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; + def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; // 128 bit conversions def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; @@ -7276,6 +7278,9 @@ def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; } + def : Pat<(v4i16 (bitconvert (v4f16 DPR:$src))), (v4i16 DPR:$src)>; + def : Pat<(v8i16 (bitconvert (v8f16 QPR:$src))), (v8i16 QPR:$src)>; + // Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), (VREV64q8 (VLD1q8 addrmode6:$addr))>, Requires<[IsBE]>; Index: test/CodeGen/ARM/fp16-vector-basic-instructions.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/fp16-vector-basic-instructions.ll @@ -0,0 +1,383 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -o - -mtriple=arm-eabi -mattr=+v8.2a,+fullfp16,+neon -float-abi=hard %s | FileCheck %s --check-prefix=HARD +; RUN: llc -o - -mtriple=armeb-eabi -mattr=+v8.2a,+fullfp16,+neon -float-abi=hard %s | FileCheck %s --check-prefix=HARDBE +; RUN: llc -o - -mtriple=arm-eabi -mattr=+v8.2a,+fullfp16,+neon %s | FileCheck %s --check-prefix=SOFT +; RUN: llc -o - -mtriple=armeb-eabi -mattr=+v8.2a,+fullfp16,+neon %s | FileCheck %s --check-prefix=SOFTFPBE + +declare <8 x half> @llvm.fabs.v8f16(<8 x half>) + +; TODO 1: (vmov one_use(vdup)) can be merged to single vmov +define i16 @bitcast_8xhalf_8xi16(<8 x half> %c) { +; HARD-LABEL: bitcast_8xhalf_8xi16: +; HARD: @ %bb.0: @ %entry +; HARD-NEXT: vdup.16 q8, d1[3] +; HARD-NEXT: vmov.u16 r0, d16[0] +; HARD-NEXT: bx lr +; +; HARDBE-LABEL: bitcast_8xhalf_8xi16: +; HARDBE: @ %bb.0: @ %entry +; HARDBE-NEXT: vrev64.16 q8, q0 +; HARDBE-NEXT: vdup.16 q8, d17[3] +; HARDBE-NEXT: vmov.u16 r0, d16[0] +; HARDBE-NEXT: bx lr +; +; SOFT-LABEL: bitcast_8xhalf_8xi16: +; SOFT: @ %bb.0: @ %entry +; SOFT-NEXT: vmov d17, r2, r3 +; SOFT-NEXT: vdup.16 q8, d17[3] +; SOFT-NEXT: vmov.u16 r0, d16[0] +; SOFT-NEXT: bx lr +; +; SOFTFPBE-LABEL: bitcast_8xhalf_8xi16: +; SOFTFPBE: @ %bb.0: @ %entry +; SOFTFPBE-NEXT: vmov d17, r3, r2 +; SOFTFPBE-NEXT: vrev64.16 q8, q8 +; SOFTFPBE-NEXT: vdup.16 q8, d17[3] +; SOFTFPBE-NEXT: vmov.u16 r0, d16[0] +; SOFTFPBE-NEXT: bx lr +; TODO 2: After TODO1: (vmov one_use(vrev(V)), index) can be merged to single vmov(V, index2) +; TODO 3: After TODO2: vmov r0, one_use(vmov d17, r3, r2)[#N])) can be merged to a single bit-extract from r2 (or r3) +entry: + %shuffle = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> + %0 = bitcast <8 x half> %shuffle to <8 x i16> + %vget_lane = extractelement <8 x i16> %0, i32 0 + ret i16 %vget_lane +} + +define i16 @bitcast_4xhalf_4xi16(<4 x half> %c) { +; HARD-LABEL: bitcast_4xhalf_4xi16: +; HARD: @ %bb.0: @ %entry +; HARD-NEXT: vdup.32 d16, d0[3] +; HARD-NEXT: vmov.u16 r0, d16[0] +; HARD-NEXT: bx lr +; +; HARDBE-LABEL: bitcast_4xhalf_4xi16: +; HARDBE: @ %bb.0: @ %entry +; HARDBE-NEXT: vrev64.16 d16, d0 +; HARDBE-NEXT: vdup.32 d16, d16[3] +; HARDBE-NEXT: vmov.u16 r0, d16[0] +; HARDBE-NEXT: bx lr +; +; SOFT-LABEL: bitcast_4xhalf_4xi16: +; SOFT: @ %bb.0: @ %entry +; SOFT-NEXT: vmov d16, r0, r1 +; SOFT-NEXT: vdup.32 d16, d16[3] +; SOFT-NEXT: vmov.u16 r0, d16[0] +; SOFT-NEXT: bx lr +; +; SOFTFPBE-LABEL: bitcast_4xhalf_4xi16: +; SOFTFPBE: @ %bb.0: @ %entry +; SOFTFPBE-NEXT: vmov d16, r1, r0 +; SOFTFPBE-NEXT: vrev64.16 d16, d16 +; SOFTFPBE-NEXT: vdup.32 d16, d16[3] +; SOFTFPBE-NEXT: vmov.u16 r0, d16[0] +; SOFTFPBE-NEXT: bx lr +entry: + %shuffle = shufflevector <4 x half> %c, <4 x half> undef, <4 x i32> + %0 = bitcast <4 x half> %shuffle to <4 x i16> + %vget_lane = extractelement <4 x i16> %0, i32 0 + ret i16 %vget_lane +} + +define <4 x half> @bitcast_8xi16_8xhalf(<8 x i16> %c) { +; HARD-LABEL: bitcast_8xi16_8xhalf: +; HARD: @ %bb.0: @ %entry +; HARD-NEXT: vorr d16, d0, d0 +; HARD-NEXT: vtrn.16 d0, d16 +; HARD-NEXT: vext.16 d0, d16, d16, #1 +; HARD-NEXT: bx lr +; +; HARDBE-LABEL: bitcast_8xi16_8xhalf: +; HARDBE: @ %bb.0: @ %entry +; HARDBE-NEXT: vrev64.16 q8, q0 +; HARDBE-NEXT: vorr d18, d16, d16 +; HARDBE-NEXT: vtrn.16 d16, d18 +; HARDBE-NEXT: vext.16 d16, d18, d18, #1 +; HARDBE-NEXT: vrev64.16 d0, d16 +; HARDBE-NEXT: bx lr +; +; SOFT-LABEL: bitcast_8xi16_8xhalf: +; SOFT: @ %bb.0: @ %entry +; SOFT-NEXT: vmov d16, r0, r1 +; SOFT-NEXT: vorr d18, d16, d16 +; SOFT-NEXT: vtrn.16 d16, d18 +; SOFT-NEXT: vext.16 d16, d18, d18, #1 +; SOFT-NEXT: vmov r0, r1, d16 +; SOFT-NEXT: bx lr +; +; SOFTFPBE-LABEL: bitcast_8xi16_8xhalf: +; SOFTFPBE: @ %bb.0: @ %entry +; SOFTFPBE-NEXT: vmov d16, r1, r0 +; SOFTFPBE-NEXT: vrev64.16 q8, q8 +; SOFTFPBE-NEXT: vorr d18, d16, d16 +; SOFTFPBE-NEXT: vtrn.16 d16, d18 +; SOFTFPBE-NEXT: vext.16 d16, d18, d18, #1 +; SOFTFPBE-NEXT: vrev64.16 d16, d16 +; SOFTFPBE-NEXT: vmov r1, r0, d16 +; SOFTFPBE-NEXT: bx lr +entry: + %0 = bitcast <8 x i16> %c to <8 x half> + %shuffle = shufflevector <8 x half> %0, <8 x half> undef, <4 x i32> + ret <4 x half> %shuffle +} + +define <4 x i16> @extract_first_half(<8 x half> %c) { +; HARD-LABEL: extract_first_half: +; HARD: @ %bb.0: @ %entry +; HARD-NEXT: vorr d16, d0, d0 +; HARD-NEXT: vtrn.16 d0, d16 +; HARD-NEXT: vext.16 d0, d16, d16, #1 +; HARD-NEXT: bx lr +; +; HARDBE-LABEL: extract_first_half: +; HARDBE: @ %bb.0: @ %entry +; HARDBE-NEXT: vrev64.16 q8, q0 +; HARDBE-NEXT: vorr d18, d16, d16 +; HARDBE-NEXT: vtrn.16 d16, d18 +; HARDBE-NEXT: vext.16 d16, d18, d18, #1 +; HARDBE-NEXT: vrev64.16 d0, d16 +; HARDBE-NEXT: bx lr +; +; SOFT-LABEL: extract_first_half: +; SOFT: @ %bb.0: @ %entry +; SOFT-NEXT: vmov d16, r0, r1 +; SOFT-NEXT: vorr d18, d16, d16 +; SOFT-NEXT: vtrn.16 d16, d18 +; SOFT-NEXT: vext.16 d16, d18, d18, #1 +; SOFT-NEXT: vmov r0, r1, d16 +; SOFT-NEXT: bx lr +; +; SOFTFPBE-LABEL: extract_first_half: +; SOFTFPBE: @ %bb.0: @ %entry +; SOFTFPBE-NEXT: vmov d16, r1, r0 +; SOFTFPBE-NEXT: vrev64.16 q8, q8 +; SOFTFPBE-NEXT: vorr d18, d16, d16 +; SOFTFPBE-NEXT: vtrn.16 d16, d18 +; SOFTFPBE-NEXT: vext.16 d16, d18, d18, #1 +; SOFTFPBE-NEXT: vrev64.16 d16, d16 +; SOFTFPBE-NEXT: vmov r1, r0, d16 +; SOFTFPBE-NEXT: bx lr +entry: + %0 = bitcast <8 x half> %c to <8 x i16> + %shuffle = shufflevector <8 x i16> %0, <8 x i16> undef, <4 x i32> + ret <4 x i16> %shuffle +} + +define <4 x i16> @extract_second_half(<8 x half> %c) { +; HARD-LABEL: extract_second_half: +; HARD: @ %bb.0: @ %entry +; HARD-NEXT: vorr d16, d1, d1 +; HARD-NEXT: vorr d17, d1, d1 +; HARD-NEXT: vtrn.16 d16, d17 +; HARD-NEXT: vext.16 d0, d1, d17, #3 +; HARD-NEXT: bx lr +; +; HARDBE-LABEL: extract_second_half: +; HARDBE: @ %bb.0: @ %entry +; HARDBE-NEXT: vrev64.16 q8, q0 +; HARDBE-NEXT: vorr d18, d17, d17 +; HARDBE-NEXT: vorr d19, d17, d17 +; HARDBE-NEXT: vtrn.16 d18, d19 +; HARDBE-NEXT: vext.16 d16, d17, d19, #3 +; HARDBE-NEXT: vrev64.16 d0, d16 +; HARDBE-NEXT: bx lr +; +; SOFT-LABEL: extract_second_half: +; SOFT: @ %bb.0: @ %entry +; SOFT-NEXT: vmov d17, r2, r3 +; SOFT-NEXT: vorr d18, d17, d17 +; SOFT-NEXT: vorr d19, d17, d17 +; SOFT-NEXT: vtrn.16 d18, d19 +; SOFT-NEXT: vext.16 d16, d17, d19, #3 +; SOFT-NEXT: vmov r0, r1, d16 +; SOFT-NEXT: bx lr +; +; SOFTFPBE-LABEL: extract_second_half: +; SOFTFPBE: @ %bb.0: @ %entry +; SOFTFPBE-NEXT: vmov d17, r3, r2 +; SOFTFPBE-NEXT: vrev64.16 q8, q8 +; SOFTFPBE-NEXT: vorr d18, d17, d17 +; SOFTFPBE-NEXT: vorr d19, d17, d17 +; SOFTFPBE-NEXT: vtrn.16 d18, d19 +; SOFTFPBE-NEXT: vext.16 d16, d17, d19, #3 +; SOFTFPBE-NEXT: vrev64.16 d16, d16 +; SOFTFPBE-NEXT: vmov r1, r0, d16 +; SOFTFPBE-NEXT: bx lr +entry: + %0 = bitcast <8 x half> %c to <8 x i16> + %shuffle = shufflevector <8 x i16> %0, <8 x i16> undef, <4 x i32> + ret <4 x i16> %shuffle +} + +define <2 x i16> @extract_quarter1(<8 x half> %c) { +; HARD-LABEL: extract_quarter1: +; HARD: @ %bb.0: @ %entry +; HARD-NEXT: vmov.u16 r0, d0[1] +; HARD-NEXT: vmov.u16 r1, d0[0] +; HARD-NEXT: vmov.32 d0[0], r0 +; HARD-NEXT: vmov.32 d0[1], r1 +; HARD-NEXT: bx lr +; +; HARDBE-LABEL: extract_quarter1: +; HARDBE: @ %bb.0: @ %entry +; HARDBE-NEXT: vrev64.16 q8, q0 +; HARDBE-NEXT: vmov.u16 r0, d16[1] +; HARDBE-NEXT: vmov.u16 r1, d16[0] +; HARDBE-NEXT: vmov.32 d16[0], r0 +; HARDBE-NEXT: vmov.32 d16[1], r1 +; HARDBE-NEXT: vrev64.32 d0, d16 +; HARDBE-NEXT: bx lr +; +; SOFT-LABEL: extract_quarter1: +; SOFT: @ %bb.0: @ %entry +; SOFT-NEXT: vmov d16, r0, r1 +; SOFT-NEXT: vmov.u16 r0, d16[1] +; SOFT-NEXT: vmov.u16 r1, d16[0] +; SOFT-NEXT: bx lr +; +; SOFTFPBE-LABEL: extract_quarter1: +; SOFTFPBE: @ %bb.0: @ %entry +; SOFTFPBE-NEXT: vmov d16, r1, r0 +; SOFTFPBE-NEXT: vrev64.16 q8, q8 +; SOFTFPBE-NEXT: vmov.u16 r0, d16[1] +; SOFTFPBE-NEXT: vmov.u16 r1, d16[0] +; SOFTFPBE-NEXT: vmov.32 d16[0], r0 +; SOFTFPBE-NEXT: vmov.32 d16[1], r1 +; SOFTFPBE-NEXT: vrev64.32 d16, d16 +; SOFTFPBE-NEXT: vmov r1, r0, d16 +; SOFTFPBE-NEXT: bx lr +entry: + %0 = bitcast <8 x half> %c to <8 x i16> + %shuffle = shufflevector <8 x i16> %0, <8 x i16> undef, <2 x i32> + ret <2 x i16> %shuffle +} + +define <2 x i16> @extract_quarter2(<8 x half> %c) { +; HARD-LABEL: extract_quarter2: +; HARD: @ %bb.0: @ %entry +; HARD-NEXT: vmov.u16 r0, d0[2] +; HARD-NEXT: vmov.u16 r1, d0[3] +; HARD-NEXT: vmov.32 d0[0], r0 +; HARD-NEXT: vmov.32 d0[1], r1 +; HARD-NEXT: bx lr +; +; HARDBE-LABEL: extract_quarter2: +; HARDBE: @ %bb.0: @ %entry +; HARDBE-NEXT: vrev64.16 q8, q0 +; HARDBE-NEXT: vmov.u16 r0, d16[2] +; HARDBE-NEXT: vmov.u16 r1, d16[3] +; HARDBE-NEXT: vmov.32 d16[0], r0 +; HARDBE-NEXT: vmov.32 d16[1], r1 +; HARDBE-NEXT: vrev64.32 d0, d16 +; HARDBE-NEXT: bx lr +; +; SOFT-LABEL: extract_quarter2: +; SOFT: @ %bb.0: @ %entry +; SOFT-NEXT: vmov d16, r0, r1 +; SOFT-NEXT: vmov.u16 r0, d16[2] +; SOFT-NEXT: vmov.u16 r1, d16[3] +; SOFT-NEXT: bx lr +; +; SOFTFPBE-LABEL: extract_quarter2: +; SOFTFPBE: @ %bb.0: @ %entry +; SOFTFPBE-NEXT: vmov d16, r1, r0 +; SOFTFPBE-NEXT: vrev64.16 q8, q8 +; SOFTFPBE-NEXT: vmov.u16 r0, d16[2] +; SOFTFPBE-NEXT: vmov.u16 r1, d16[3] +; SOFTFPBE-NEXT: vmov.32 d16[0], r0 +; SOFTFPBE-NEXT: vmov.32 d16[1], r1 +; SOFTFPBE-NEXT: vrev64.32 d16, d16 +; SOFTFPBE-NEXT: vmov r1, r0, d16 +; SOFTFPBE-NEXT: bx lr +entry: + %0 = bitcast <8 x half> %c to <8 x i16> + %shuffle = shufflevector <8 x i16> %0, <8 x i16> undef, <2 x i32> + ret <2 x i16> %shuffle +} + +define <2 x i16> @extract_quarter3(<8 x half> %c) { +; HARD-LABEL: extract_quarter3: +; HARD: @ %bb.0: @ %entry +; HARD-NEXT: vmov.u16 r0, d1[0] +; HARD-NEXT: vmov.u16 r1, d1[1] +; HARD-NEXT: vmov.32 d0[0], r0 +; HARD-NEXT: vmov.32 d0[1], r1 +; HARD-NEXT: bx lr +; +; HARDBE-LABEL: extract_quarter3: +; HARDBE: @ %bb.0: @ %entry +; HARDBE-NEXT: vrev64.16 q8, q0 +; HARDBE-NEXT: vmov.u16 r0, d17[0] +; HARDBE-NEXT: vmov.u16 r1, d17[1] +; HARDBE-NEXT: vmov.32 d16[0], r0 +; HARDBE-NEXT: vmov.32 d16[1], r1 +; HARDBE-NEXT: vrev64.32 d0, d16 +; HARDBE-NEXT: bx lr +; +; SOFT-LABEL: extract_quarter3: +; SOFT: @ %bb.0: @ %entry +; SOFT-NEXT: vmov d17, r2, r3 +; SOFT-NEXT: vmov.u16 r0, d17[0] +; SOFT-NEXT: vmov.u16 r1, d17[1] +; SOFT-NEXT: bx lr +; +; SOFTFPBE-LABEL: extract_quarter3: +; SOFTFPBE: @ %bb.0: @ %entry +; SOFTFPBE-NEXT: vmov d17, r3, r2 +; SOFTFPBE-NEXT: vrev64.16 q8, q8 +; SOFTFPBE-NEXT: vmov.u16 r0, d17[0] +; SOFTFPBE-NEXT: vmov.u16 r1, d17[1] +; SOFTFPBE-NEXT: vmov.32 d16[0], r0 +; SOFTFPBE-NEXT: vmov.32 d16[1], r1 +; SOFTFPBE-NEXT: vrev64.32 d16, d16 +; SOFTFPBE-NEXT: vmov r1, r0, d16 +; SOFTFPBE-NEXT: bx lr +entry: + %0 = bitcast <8 x half> %c to <8 x i16> + %shuffle = shufflevector <8 x i16> %0, <8 x i16> undef, <2 x i32> + ret <2 x i16> %shuffle +} + +define <2 x i16> @extract_quarter4(<8 x half> %c) { +; HARD-LABEL: extract_quarter4: +; HARD: @ %bb.0: @ %entry +; HARD-NEXT: vmov.u16 r0, d1[2] +; HARD-NEXT: vmov.u16 r1, d1[3] +; HARD-NEXT: vmov.32 d0[0], r0 +; HARD-NEXT: vmov.32 d0[1], r1 +; HARD-NEXT: bx lr +; +; HARDBE-LABEL: extract_quarter4: +; HARDBE: @ %bb.0: @ %entry +; HARDBE-NEXT: vrev64.16 q8, q0 +; HARDBE-NEXT: vmov.u16 r0, d17[2] +; HARDBE-NEXT: vmov.u16 r1, d17[3] +; HARDBE-NEXT: vmov.32 d16[0], r0 +; HARDBE-NEXT: vmov.32 d16[1], r1 +; HARDBE-NEXT: vrev64.32 d0, d16 +; HARDBE-NEXT: bx lr +; +; SOFT-LABEL: extract_quarter4: +; SOFT: @ %bb.0: @ %entry +; SOFT-NEXT: vmov d17, r2, r3 +; SOFT-NEXT: vmov.u16 r0, d17[2] +; SOFT-NEXT: vmov.u16 r1, d17[3] +; SOFT-NEXT: bx lr +; +; SOFTFPBE-LABEL: extract_quarter4: +; SOFTFPBE: @ %bb.0: @ %entry +; SOFTFPBE-NEXT: vmov d17, r3, r2 +; SOFTFPBE-NEXT: vrev64.16 q8, q8 +; SOFTFPBE-NEXT: vmov.u16 r0, d17[2] +; SOFTFPBE-NEXT: vmov.u16 r1, d17[3] +; SOFTFPBE-NEXT: vmov.32 d16[0], r0 +; SOFTFPBE-NEXT: vmov.32 d16[1], r1 +; SOFTFPBE-NEXT: vrev64.32 d16, d16 +; SOFTFPBE-NEXT: vmov r1, r0, d16 +; SOFTFPBE-NEXT: bx lr +entry: + %0 = bitcast <8 x half> %c to <8 x i16> + %shuffle = shufflevector <8 x i16> %0, <8 x i16> undef, <2 x i32> + ret <2 x i16> %shuffle +} +