Index: llvm/lib/Target/ARM/ARMInstrMVE.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrMVE.td +++ llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1833,6 +1833,13 @@ def MVE_VMOV_from_lane_u8 : MVE_VMOV_lane_8 < "u8", 0b1, MVE_VMOV_from_lane>; def MVE_VMOV_to_lane_8 : MVE_VMOV_lane_8 < "8", 0b0, MVE_VMOV_to_lane>; +// This is the same as insertelt but allows the inserted value to be an i32 as +// will be used when it is the only legal type. +def ARMVecInsert : SDTypeProfile<1, 3, [ + SDTCisVT<2, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<3> +]>; +def ARMinsertelt : SDNode<"ISD::INSERT_VECTOR_ELT", ARMVecInsert>; + let Predicates = [HasMVEInt] in { def : Pat<(extractelt (v2f64 MQPR:$src), imm:$lane), (f64 (EXTRACT_SUBREG MQPR:$src, (DSubReg_f64_reg imm:$lane)))>; @@ -1893,6 +1900,44 @@ (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), (f16 HPR:$src), ssub_0)>; def : Pat<(v8f16 (scalar_to_vector GPR:$src)), (MVE_VMOV_to_lane_16 (v8f16 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>; + + foreach LANE = [0, 2, 4, 6] in { + defvar SSUB = !cast("ssub_"#!srl(LANE, 1)); + + // v8f16 pattern for inserting two lanes using a VINS + def : Pat<(insertelt (insertelt (v8f16 MQPR:$srcV), (f16 HPR:$src1), LANE), + (f16 HPR:$src2), !add(LANE,1)), + (COPY_TO_REGCLASS (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS MQPR:$srcV, MQPR)), + (VINSH (COPY_TO_REGCLASS HPR:$src1, SPR), + (COPY_TO_REGCLASS HPR:$src2, SPR)), + SSUB), MQPR)>; + + // v8i16 pattern for extracting 2 even lane elements and inserting them using a VINS + def : Pat<(ARMinsertelt (ARMinsertelt (v8i16 MQPR:$srcV), + (ARMvgetlaneu (v8i16 MQPR:$src1), imm_even:$lane1), + LANE), + (ARMvgetlaneu (v8i16 MQPR:$src2), imm_even:$lane2), + !add(LANE,1)), + (COPY_TO_REGCLASS (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS MQPR:$srcV, MQPR)), + (VINSH (EXTRACT_SUBREG (v8f16 (COPY_TO_REGCLASS MQPR:$src1, MQPR)), + (SSubReg_f16_reg imm_even:$lane1)), + (EXTRACT_SUBREG (v8f16 (COPY_TO_REGCLASS MQPR:$src2, MQPR)), + (SSubReg_f16_reg imm_even:$lane2))), + SSUB), MQPR)>; + + // v8i16 pattern for extracting an element using VMOVX and inserting another using a VINS + def : Pat<(ARMinsertelt (ARMinsertelt (v8i16 MQPR:$srcV), + (ARMvgetlaneu (v8i16 MQPR:$src1), imm_odd:$lane1), + LANE), + (ARMvgetlaneu (v8i16 MQPR:$src2), imm_even:$lane2), + !add(LANE,1)), + (COPY_TO_REGCLASS (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS MQPR:$srcV, MQPR)), + (VINSH (VMOVH (EXTRACT_SUBREG (v8f16 (COPY_TO_REGCLASS MQPR:$src1, MQPR)), + (SSubReg_f16_reg imm_odd:$lane1))), + (EXTRACT_SUBREG (v8f16 (COPY_TO_REGCLASS MQPR:$src2, MQPR)), + (SSubReg_f16_reg imm_even:$lane2))), + SSUB), MQPR)>; + } } // end of mve_bit instructions Index: llvm/lib/Target/ARM/ARMInstrVFP.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrVFP.td +++ llvm/lib/Target/ARM/ARMInstrVFP.td @@ -798,6 +798,8 @@ Requires<[HasFP16]>, Sched<[WriteFPCVT]>; +// AddedComplexity to use over the dual-insert MVE pattern +let AddedComplexity = 6 in def : FP16Pat<(insertelt (v8f16 MQPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_odd:$lane), (v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1), (VCVTTSH SPR:$src2), (SSubReg_f16_reg imm:$lane)))>; @@ -1126,9 +1128,12 @@ Requires<[HasFullFP16]>; def VINSH : ASuInp<0b11101, 0b11, 0b0000, 0b11, 0, - (outs SPR:$Sd), (ins SPR:$Sm), + (outs SPR:$Sd), (ins SPR:$Sda, SPR:$Sm), IIC_fpUNA16, "vins.f16\t$Sd, $Sm", []>, - Requires<[HasFullFP16]>; + Requires<[HasFullFP16]> { + let Constraints = "$Sd = $Sda"; +} + } // PostEncoderMethod } // hasSideEffects Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll @@ -311,10 +311,10 @@ ; CHECK-LABEL: fast_float_half_mac: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: sub sp, #32 +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} +; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: beq.w .LBB2_22 +; CHECK-NEXT: beq.w .LBB2_20 ; CHECK-NEXT: @ %bb.1: @ %vector.ph ; CHECK-NEXT: adds r3, r2, #3 ; CHECK-NEXT: vmov.i32 q5, #0x0 @@ -329,41 +329,27 @@ ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: vdup.32 q1, r12 ; CHECK-NEXT: vdup.32 q2, r12 -; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill -; CHECK-NEXT: b .LBB2_4 -; CHECK-NEXT: .LBB2_2: @ %cond.load25 -; CHECK-NEXT: @ in Loop: Header=BB2_4 Depth=1 -; CHECK-NEXT: vmovx.f16 s0, s28 -; CHECK-NEXT: vmov r4, s28 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmov.16 q6[0], r4 -; CHECK-NEXT: vldr.16 s0, [r1, #6] -; CHECK-NEXT: vmov.16 q6[1], r2 -; CHECK-NEXT: vmov r2, s29 -; CHECK-NEXT: vmov.16 q6[2], r2 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmov.16 q6[3], r2 -; CHECK-NEXT: .LBB2_3: @ %else26 -; CHECK-NEXT: @ in Loop: Header=BB2_4 Depth=1 -; CHECK-NEXT: vmul.f16 q0, q6, q5 +; CHECK-NEXT: b .LBB2_3 +; CHECK-NEXT: .LBB2_2: @ %else26 +; CHECK-NEXT: @ in Loop: Header=BB2_3 Depth=1 +; CHECK-NEXT: vmul.f16 q5, q6, q5 ; CHECK-NEXT: adds r0, #8 -; CHECK-NEXT: vcvtt.f32.f16 s23, s1 +; CHECK-NEXT: vcvtt.f32.f16 s27, s21 ; CHECK-NEXT: adds r1, #8 -; CHECK-NEXT: vcvtb.f32.f16 s22, s1 +; CHECK-NEXT: vcvtb.f32.f16 s26, s21 ; CHECK-NEXT: adds r3, #4 -; CHECK-NEXT: vcvtt.f32.f16 s21, s0 -; CHECK-NEXT: vcvtb.f32.f16 s20, s0 -; CHECK-NEXT: vadd.f32 q5, q3, q5 +; CHECK-NEXT: vcvtt.f32.f16 s25, s20 +; CHECK-NEXT: vcvtb.f32.f16 s24, s20 +; CHECK-NEXT: vadd.f32 q5, q3, q6 ; CHECK-NEXT: subs.w lr, lr, #1 -; CHECK-NEXT: bne .LBB2_4 -; CHECK-NEXT: b .LBB2_21 -; CHECK-NEXT: .LBB2_4: @ %vector.body +; CHECK-NEXT: bne .LBB2_3 +; CHECK-NEXT: b .LBB2_19 +; CHECK-NEXT: .LBB2_3: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload -; CHECK-NEXT: vmov q3, q5 -; CHECK-NEXT: @ implicit-def: $q6 ; CHECK-NEXT: vadd.i32 q4, q0, r3 +; CHECK-NEXT: vmov q3, q5 ; CHECK-NEXT: vcmp.u32 cs, q1, q4 +; CHECK-NEXT: @ implicit-def: $q5 ; CHECK-NEXT: vmrs r4, p0 ; CHECK-NEXT: and r2, r4, #1 ; CHECK-NEXT: rsbs r5, r2, #0 @@ -379,72 +365,29 @@ ; CHECK-NEXT: rsbs r4, r4, #0 ; CHECK-NEXT: bfi r2, r4, #3, #1 ; CHECK-NEXT: lsls r4, r2, #31 -; CHECK-NEXT: bne .LBB2_9 -; CHECK-NEXT: @ %bb.5: @ %else -; CHECK-NEXT: @ in Loop: Header=BB2_4 Depth=1 +; CHECK-NEXT: bne .LBB2_12 +; CHECK-NEXT: @ %bb.4: @ %else +; CHECK-NEXT: @ in Loop: Header=BB2_3 Depth=1 ; CHECK-NEXT: lsls r4, r2, #30 -; CHECK-NEXT: bpl .LBB2_10 -; CHECK-NEXT: .LBB2_6: @ %cond.load6 -; CHECK-NEXT: @ in Loop: Header=BB2_4 Depth=1 -; CHECK-NEXT: vldr.16 s20, [r0, #2] -; CHECK-NEXT: vmov r5, s24 -; CHECK-NEXT: vmovx.f16 s24, s25 -; CHECK-NEXT: vmov r4, s20 -; CHECK-NEXT: vmov.16 q5[0], r5 -; CHECK-NEXT: vmov.16 q5[1], r4 -; CHECK-NEXT: vmov r4, s25 -; CHECK-NEXT: vmov.16 q5[2], r4 -; CHECK-NEXT: vmov r4, s24 -; CHECK-NEXT: vmov.16 q5[3], r4 +; CHECK-NEXT: bmi .LBB2_13 +; CHECK-NEXT: .LBB2_5: @ %else7 +; CHECK-NEXT: @ in Loop: Header=BB2_3 Depth=1 ; CHECK-NEXT: lsls r4, r2, #29 -; CHECK-NEXT: bmi .LBB2_11 -; CHECK-NEXT: .LBB2_7: @ in Loop: Header=BB2_4 Depth=1 -; CHECK-NEXT: vmov q6, q5 -; CHECK-NEXT: lsls r2, r2, #28 -; CHECK-NEXT: bmi .LBB2_12 -; CHECK-NEXT: .LBB2_8: @ in Loop: Header=BB2_4 Depth=1 -; CHECK-NEXT: vmov q5, q6 -; CHECK-NEXT: b .LBB2_13 -; CHECK-NEXT: .LBB2_9: @ %cond.load -; CHECK-NEXT: @ in Loop: Header=BB2_4 Depth=1 -; CHECK-NEXT: vldr.16 s24, [r0] -; CHECK-NEXT: lsls r4, r2, #30 -; CHECK-NEXT: bmi .LBB2_6 -; CHECK-NEXT: .LBB2_10: @ in Loop: Header=BB2_4 Depth=1 -; CHECK-NEXT: vmov q5, q6 -; CHECK-NEXT: lsls r4, r2, #29 -; CHECK-NEXT: bpl .LBB2_7 -; CHECK-NEXT: .LBB2_11: @ %cond.load9 -; CHECK-NEXT: @ in Loop: Header=BB2_4 Depth=1 -; CHECK-NEXT: vmovx.f16 s24, s20 -; CHECK-NEXT: vmov r4, s20 -; CHECK-NEXT: vldr.16 s28, [r0, #4] -; CHECK-NEXT: vmov r5, s24 -; CHECK-NEXT: vmov.16 q6[0], r4 -; CHECK-NEXT: vmovx.f16 s20, s21 -; CHECK-NEXT: vmov.16 q6[1], r5 -; CHECK-NEXT: vmov r4, s28 -; CHECK-NEXT: vmov.16 q6[2], r4 -; CHECK-NEXT: vmov r4, s20 -; CHECK-NEXT: vmov.16 q6[3], r4 +; CHECK-NEXT: bmi .LBB2_14 +; CHECK-NEXT: .LBB2_6: @ %else10 +; CHECK-NEXT: @ in Loop: Header=BB2_3 Depth=1 ; CHECK-NEXT: lsls r2, r2, #28 ; CHECK-NEXT: bpl .LBB2_8 -; CHECK-NEXT: .LBB2_12: @ %cond.load12 -; CHECK-NEXT: @ in Loop: Header=BB2_4 Depth=1 -; CHECK-NEXT: vmovx.f16 s20, s24 -; CHECK-NEXT: vmov r4, s24 -; CHECK-NEXT: vmov r2, s20 -; CHECK-NEXT: vmov.16 q5[0], r4 -; CHECK-NEXT: vmov.16 q5[1], r2 -; CHECK-NEXT: vmov r2, s25 +; CHECK-NEXT: .LBB2_7: @ %cond.load12 +; CHECK-NEXT: @ in Loop: Header=BB2_3 Depth=1 +; CHECK-NEXT: vmovx.f16 s24, s20 +; CHECK-NEXT: vins.f16 s20, s24 ; CHECK-NEXT: vldr.16 s24, [r0, #6] -; CHECK-NEXT: vmov.16 q5[2], r2 -; CHECK-NEXT: vmov r2, s24 -; CHECK-NEXT: vmov.16 q5[3], r2 -; CHECK-NEXT: .LBB2_13: @ %else13 -; CHECK-NEXT: @ in Loop: Header=BB2_4 Depth=1 +; CHECK-NEXT: vins.f16 s21, s24 +; CHECK-NEXT: .LBB2_8: @ %else13 +; CHECK-NEXT: @ in Loop: Header=BB2_3 Depth=1 ; CHECK-NEXT: vcmp.u32 cs, q2, q4 -; CHECK-NEXT: @ implicit-def: $q7 +; CHECK-NEXT: @ implicit-def: $q6 ; CHECK-NEXT: vmrs r4, p0 ; CHECK-NEXT: and r2, r4, #1 ; CHECK-NEXT: rsbs r5, r2, #0 @@ -460,58 +403,73 @@ ; CHECK-NEXT: rsbs r4, r4, #0 ; CHECK-NEXT: bfi r2, r4, #3, #1 ; CHECK-NEXT: lsls r4, r2, #31 -; CHECK-NEXT: bne .LBB2_17 -; CHECK-NEXT: @ %bb.14: @ %else17 -; CHECK-NEXT: @ in Loop: Header=BB2_4 Depth=1 +; CHECK-NEXT: bne .LBB2_15 +; CHECK-NEXT: @ %bb.9: @ %else17 +; CHECK-NEXT: @ in Loop: Header=BB2_3 Depth=1 +; CHECK-NEXT: lsls r4, r2, #30 +; CHECK-NEXT: bmi .LBB2_16 +; CHECK-NEXT: .LBB2_10: @ %else20 +; CHECK-NEXT: @ in Loop: Header=BB2_3 Depth=1 +; CHECK-NEXT: lsls r4, r2, #29 +; CHECK-NEXT: bmi .LBB2_17 +; CHECK-NEXT: .LBB2_11: @ %else23 +; CHECK-NEXT: @ in Loop: Header=BB2_3 Depth=1 +; CHECK-NEXT: lsls r2, r2, #28 +; CHECK-NEXT: bpl .LBB2_2 +; CHECK-NEXT: b .LBB2_18 +; CHECK-NEXT: .LBB2_12: @ %cond.load +; CHECK-NEXT: @ in Loop: Header=BB2_3 Depth=1 +; CHECK-NEXT: vldr.16 s20, [r0] ; CHECK-NEXT: lsls r4, r2, #30 -; CHECK-NEXT: bpl .LBB2_18 -; CHECK-NEXT: .LBB2_15: @ %cond.load19 -; CHECK-NEXT: @ in Loop: Header=BB2_4 Depth=1 -; CHECK-NEXT: vldr.16 s24, [r1, #2] -; CHECK-NEXT: vmov r5, s28 -; CHECK-NEXT: vmovx.f16 s28, s29 -; CHECK-NEXT: vmov r4, s24 -; CHECK-NEXT: vmov.16 q6[0], r5 -; CHECK-NEXT: vmov.16 q6[1], r4 -; CHECK-NEXT: vmov r4, s29 -; CHECK-NEXT: vmov.16 q6[2], r4 -; CHECK-NEXT: vmov r4, s28 -; CHECK-NEXT: vmov.16 q6[3], r4 +; CHECK-NEXT: bpl .LBB2_5 +; CHECK-NEXT: .LBB2_13: @ %cond.load6 +; CHECK-NEXT: @ in Loop: Header=BB2_3 Depth=1 +; CHECK-NEXT: vldr.16 s24, [r0, #2] +; CHECK-NEXT: vins.f16 s20, s24 +; CHECK-NEXT: vmovx.f16 s24, s21 +; CHECK-NEXT: vins.f16 s21, s24 ; CHECK-NEXT: lsls r4, r2, #29 -; CHECK-NEXT: bmi .LBB2_19 -; CHECK-NEXT: .LBB2_16: @ in Loop: Header=BB2_4 Depth=1 -; CHECK-NEXT: vmov q7, q6 +; CHECK-NEXT: bpl .LBB2_6 +; CHECK-NEXT: .LBB2_14: @ %cond.load9 +; CHECK-NEXT: @ in Loop: Header=BB2_3 Depth=1 +; CHECK-NEXT: vmovx.f16 s24, s20 +; CHECK-NEXT: vins.f16 s20, s24 +; CHECK-NEXT: vmovx.f16 s24, s21 +; CHECK-NEXT: vldr.16 s21, [r0, #4] +; CHECK-NEXT: vins.f16 s21, s24 ; CHECK-NEXT: lsls r2, r2, #28 -; CHECK-NEXT: bmi.w .LBB2_2 -; CHECK-NEXT: b .LBB2_20 -; CHECK-NEXT: .LBB2_17: @ %cond.load16 -; CHECK-NEXT: @ in Loop: Header=BB2_4 Depth=1 -; CHECK-NEXT: vldr.16 s28, [r1] +; CHECK-NEXT: bmi .LBB2_7 +; CHECK-NEXT: b .LBB2_8 +; CHECK-NEXT: .LBB2_15: @ %cond.load16 +; CHECK-NEXT: @ in Loop: Header=BB2_3 Depth=1 +; CHECK-NEXT: vldr.16 s24, [r1] ; CHECK-NEXT: lsls r4, r2, #30 -; CHECK-NEXT: bmi .LBB2_15 -; CHECK-NEXT: .LBB2_18: @ in Loop: Header=BB2_4 Depth=1 -; CHECK-NEXT: vmov q6, q7 +; CHECK-NEXT: bpl .LBB2_10 +; CHECK-NEXT: .LBB2_16: @ %cond.load19 +; CHECK-NEXT: @ in Loop: Header=BB2_3 Depth=1 +; CHECK-NEXT: vldr.16 s28, [r1, #2] +; CHECK-NEXT: vins.f16 s24, s28 +; CHECK-NEXT: vmovx.f16 s28, s25 +; CHECK-NEXT: vins.f16 s25, s28 ; CHECK-NEXT: lsls r4, r2, #29 -; CHECK-NEXT: bpl .LBB2_16 -; CHECK-NEXT: .LBB2_19: @ %cond.load22 -; CHECK-NEXT: @ in Loop: Header=BB2_4 Depth=1 +; CHECK-NEXT: bpl .LBB2_11 +; CHECK-NEXT: .LBB2_17: @ %cond.load22 +; CHECK-NEXT: @ in Loop: Header=BB2_3 Depth=1 ; CHECK-NEXT: vmovx.f16 s28, s24 -; CHECK-NEXT: vmov r4, s24 -; CHECK-NEXT: vldr.16 s0, [r1, #4] -; CHECK-NEXT: vmov r5, s28 -; CHECK-NEXT: vmov.16 q7[0], r4 -; CHECK-NEXT: vmov r4, s0 -; CHECK-NEXT: vmov.16 q7[1], r5 -; CHECK-NEXT: vmovx.f16 s0, s25 -; CHECK-NEXT: vmov.16 q7[2], r4 -; CHECK-NEXT: vmov r4, s0 -; CHECK-NEXT: vmov.16 q7[3], r4 +; CHECK-NEXT: vins.f16 s24, s28 +; CHECK-NEXT: vmovx.f16 s28, s25 +; CHECK-NEXT: vldr.16 s25, [r1, #4] +; CHECK-NEXT: vins.f16 s25, s28 ; CHECK-NEXT: lsls r2, r2, #28 -; CHECK-NEXT: bmi.w .LBB2_2 -; CHECK-NEXT: .LBB2_20: @ in Loop: Header=BB2_4 Depth=1 -; CHECK-NEXT: vmov q6, q7 -; CHECK-NEXT: b .LBB2_3 -; CHECK-NEXT: .LBB2_21: @ %middle.block +; CHECK-NEXT: bpl.w .LBB2_2 +; CHECK-NEXT: .LBB2_18: @ %cond.load25 +; CHECK-NEXT: @ in Loop: Header=BB2_3 Depth=1 +; CHECK-NEXT: vmovx.f16 s28, s24 +; CHECK-NEXT: vins.f16 s24, s28 +; CHECK-NEXT: vldr.16 s28, [r1, #6] +; CHECK-NEXT: vins.f16 s25, s28 +; CHECK-NEXT: b .LBB2_2 +; CHECK-NEXT: .LBB2_19: @ %middle.block ; CHECK-NEXT: vdup.32 q0, r12 ; CHECK-NEXT: vcmp.u32 cs, q0, q4 ; CHECK-NEXT: vpsel q0, q5, q3 @@ -520,16 +478,16 @@ ; CHECK-NEXT: vadd.f32 q0, q0, q1 ; CHECK-NEXT: vmov r0, s1 ; CHECK-NEXT: vadd.f32 q0, q0, r0 -; CHECK-NEXT: b .LBB2_23 -; CHECK-NEXT: .LBB2_22: +; CHECK-NEXT: b .LBB2_21 +; CHECK-NEXT: .LBB2_20: ; CHECK-NEXT: vldr s0, .LCPI2_0 -; CHECK-NEXT: .LBB2_23: @ %for.cond.cleanup +; CHECK-NEXT: .LBB2_21: @ %for.cond.cleanup ; CHECK-NEXT: @ kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: add sp, #32 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} ; CHECK-NEXT: pop {r4, r5, r7, pc} ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.24: +; CHECK-NEXT: @ %bb.22: ; CHECK-NEXT: .LCPI2_1: ; CHECK-NEXT: .long 0 @ 0x0 ; CHECK-NEXT: .long 1 @ 0x1 Index: llvm/test/CodeGen/Thumb2/mve-div-expand.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-div-expand.ll +++ llvm/test/CodeGen/Thumb2/mve-div-expand.ll @@ -806,38 +806,26 @@ define arm_aapcs_vfpcc <8 x half> @fdiv_f16(<8 x half> %in1, <8 x half> %in2) { ; CHECK-LABEL: fdiv_f16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vdiv.f16 s8, s0, s4 -; CHECK-NEXT: vmovx.f16 s10, s0 -; CHECK-NEXT: vmov r0, s8 ; CHECK-NEXT: vmovx.f16 s8, s4 -; CHECK-NEXT: vdiv.f16 s8, s10, s8 -; CHECK-NEXT: vdiv.f16 s12, s1, s5 -; CHECK-NEXT: vmov r1, s8 -; CHECK-NEXT: vmov.16 q2[0], r0 -; CHECK-NEXT: vmov r0, s12 +; CHECK-NEXT: vmovx.f16 s10, s0 +; CHECK-NEXT: vdiv.f16 s12, s10, s8 +; CHECK-NEXT: vdiv.f16 s8, s0, s4 +; CHECK-NEXT: vins.f16 s8, s12 ; CHECK-NEXT: vmovx.f16 s12, s5 ; CHECK-NEXT: vmovx.f16 s14, s1 -; CHECK-NEXT: vmov.16 q2[1], r1 +; CHECK-NEXT: vdiv.f16 s9, s1, s5 ; CHECK-NEXT: vdiv.f16 s12, s14, s12 -; CHECK-NEXT: vmov.16 q2[2], r0 -; CHECK-NEXT: vmov r0, s12 -; CHECK-NEXT: vdiv.f16 s12, s2, s6 -; CHECK-NEXT: vmov.16 q2[3], r0 -; CHECK-NEXT: vmov r0, s12 -; CHECK-NEXT: vmovx.f16 s12, s6 ; CHECK-NEXT: vmovx.f16 s14, s2 +; CHECK-NEXT: vins.f16 s9, s12 +; CHECK-NEXT: vmovx.f16 s12, s6 ; CHECK-NEXT: vdiv.f16 s12, s14, s12 -; CHECK-NEXT: vmov.16 q2[4], r0 -; CHECK-NEXT: vmov r0, s12 -; CHECK-NEXT: vdiv.f16 s12, s3, s7 -; CHECK-NEXT: vmovx.f16 s4, s7 -; CHECK-NEXT: vmovx.f16 s0, s3 -; CHECK-NEXT: vmov.16 q2[5], r0 -; CHECK-NEXT: vmov r0, s12 -; CHECK-NEXT: vdiv.f16 s0, s0, s4 -; CHECK-NEXT: vmov.16 q2[6], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q2[7], r0 +; CHECK-NEXT: vdiv.f16 s10, s2, s6 +; CHECK-NEXT: vins.f16 s10, s12 +; CHECK-NEXT: vmovx.f16 s12, s7 +; CHECK-NEXT: vmovx.f16 s14, s3 +; CHECK-NEXT: vdiv.f16 s11, s3, s7 +; CHECK-NEXT: vdiv.f16 s12, s14, s12 +; CHECK-NEXT: vins.f16 s11, s12 ; CHECK-NEXT: vmov q0, q2 ; CHECK-NEXT: bx lr entry: Index: llvm/test/CodeGen/Thumb2/mve-fmas.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-fmas.ll +++ llvm/test/CodeGen/Thumb2/mve-fmas.ll @@ -17,49 +17,30 @@ ; ; CHECK-MVE-LABEL: vfma16_v1: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: .vsave {d8, d9, d10} -; CHECK-MVE-NEXT: vpush {d8, d9, d10} ; CHECK-MVE-NEXT: vmovx.f16 s13, s0 -; CHECK-MVE-NEXT: vmla.f16 s0, s4, s8 ; CHECK-MVE-NEXT: vmovx.f16 s12, s8 ; CHECK-MVE-NEXT: vmovx.f16 s14, s4 -; CHECK-MVE-NEXT: vmov.f32 s16, s1 -; CHECK-MVE-NEXT: vmla.f16 s13, s14, s12 -; CHECK-MVE-NEXT: vmov r1, s0 -; CHECK-MVE-NEXT: vmla.f16 s16, s5, s9 -; CHECK-MVE-NEXT: vmov r0, s13 -; CHECK-MVE-NEXT: vmov.16 q3[0], r1 -; CHECK-MVE-NEXT: vmov.16 q3[1], r0 -; CHECK-MVE-NEXT: vmov r0, s16 -; CHECK-MVE-NEXT: vmovx.f16 s16, s9 -; CHECK-MVE-NEXT: vmovx.f16 s18, s5 -; CHECK-MVE-NEXT: vmovx.f16 s20, s1 -; CHECK-MVE-NEXT: vmov.16 q3[2], r0 -; CHECK-MVE-NEXT: vmla.f16 s20, s18, s16 -; CHECK-MVE-NEXT: vmov.f32 s16, s2 -; CHECK-MVE-NEXT: vmov r0, s20 -; CHECK-MVE-NEXT: vmla.f16 s16, s6, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r0 -; CHECK-MVE-NEXT: vmov r0, s16 -; CHECK-MVE-NEXT: vmovx.f16 s16, s10 -; CHECK-MVE-NEXT: vmovx.f16 s18, s6 -; CHECK-MVE-NEXT: vmovx.f16 s20, s2 -; CHECK-MVE-NEXT: vmov.16 q3[4], r0 -; CHECK-MVE-NEXT: vmla.f16 s20, s18, s16 -; CHECK-MVE-NEXT: vmov.f32 s16, s3 -; CHECK-MVE-NEXT: vmov r0, s20 -; CHECK-MVE-NEXT: vmla.f16 s16, s7, s11 -; CHECK-MVE-NEXT: vmovx.f16 s8, s11 -; CHECK-MVE-NEXT: vmovx.f16 s4, s7 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q3[5], r0 -; CHECK-MVE-NEXT: vmov r0, s16 ; CHECK-MVE-NEXT: vmla.f16 s0, s4, s8 -; CHECK-MVE-NEXT: vmov.16 q3[6], r0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 -; CHECK-MVE-NEXT: vmov q0, q3 -; CHECK-MVE-NEXT: vpop {d8, d9, d10} +; CHECK-MVE-NEXT: vmla.f16 s13, s14, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s9 +; CHECK-MVE-NEXT: vins.f16 s0, s13 +; CHECK-MVE-NEXT: vmovx.f16 s13, s1 +; CHECK-MVE-NEXT: vmovx.f16 s14, s5 +; CHECK-MVE-NEXT: vmla.f16 s1, s5, s9 +; CHECK-MVE-NEXT: vmla.f16 s13, s14, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s10 +; CHECK-MVE-NEXT: vins.f16 s1, s13 +; CHECK-MVE-NEXT: vmovx.f16 s13, s2 +; CHECK-MVE-NEXT: vmovx.f16 s14, s6 +; CHECK-MVE-NEXT: vmla.f16 s2, s6, s10 +; CHECK-MVE-NEXT: vmla.f16 s13, s14, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s11 +; CHECK-MVE-NEXT: vins.f16 s2, s13 +; CHECK-MVE-NEXT: vmovx.f16 s13, s3 +; CHECK-MVE-NEXT: vmovx.f16 s14, s7 +; CHECK-MVE-NEXT: vmla.f16 s3, s7, s11 +; CHECK-MVE-NEXT: vmla.f16 s13, s14, s12 +; CHECK-MVE-NEXT: vins.f16 s3, s13 ; CHECK-MVE-NEXT: bx lr entry: %0 = fmul <8 x half> %src2, %src3 @@ -81,49 +62,30 @@ ; ; CHECK-MVE-LABEL: vfma16_v2: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: .vsave {d8, d9, d10} -; CHECK-MVE-NEXT: vpush {d8, d9, d10} ; CHECK-MVE-NEXT: vmovx.f16 s13, s0 -; CHECK-MVE-NEXT: vmla.f16 s0, s4, s8 ; CHECK-MVE-NEXT: vmovx.f16 s12, s8 ; CHECK-MVE-NEXT: vmovx.f16 s14, s4 -; CHECK-MVE-NEXT: vmov.f32 s16, s1 -; CHECK-MVE-NEXT: vmla.f16 s13, s14, s12 -; CHECK-MVE-NEXT: vmov r1, s0 -; CHECK-MVE-NEXT: vmla.f16 s16, s5, s9 -; CHECK-MVE-NEXT: vmov r0, s13 -; CHECK-MVE-NEXT: vmov.16 q3[0], r1 -; CHECK-MVE-NEXT: vmov.16 q3[1], r0 -; CHECK-MVE-NEXT: vmov r0, s16 -; CHECK-MVE-NEXT: vmovx.f16 s16, s9 -; CHECK-MVE-NEXT: vmovx.f16 s18, s5 -; CHECK-MVE-NEXT: vmovx.f16 s20, s1 -; CHECK-MVE-NEXT: vmov.16 q3[2], r0 -; CHECK-MVE-NEXT: vmla.f16 s20, s18, s16 -; CHECK-MVE-NEXT: vmov.f32 s16, s2 -; CHECK-MVE-NEXT: vmov r0, s20 -; CHECK-MVE-NEXT: vmla.f16 s16, s6, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r0 -; CHECK-MVE-NEXT: vmov r0, s16 -; CHECK-MVE-NEXT: vmovx.f16 s16, s10 -; CHECK-MVE-NEXT: vmovx.f16 s18, s6 -; CHECK-MVE-NEXT: vmovx.f16 s20, s2 -; CHECK-MVE-NEXT: vmov.16 q3[4], r0 -; CHECK-MVE-NEXT: vmla.f16 s20, s18, s16 -; CHECK-MVE-NEXT: vmov.f32 s16, s3 -; CHECK-MVE-NEXT: vmov r0, s20 -; CHECK-MVE-NEXT: vmla.f16 s16, s7, s11 -; CHECK-MVE-NEXT: vmovx.f16 s8, s11 -; CHECK-MVE-NEXT: vmovx.f16 s4, s7 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q3[5], r0 -; CHECK-MVE-NEXT: vmov r0, s16 ; CHECK-MVE-NEXT: vmla.f16 s0, s4, s8 -; CHECK-MVE-NEXT: vmov.16 q3[6], r0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 -; CHECK-MVE-NEXT: vmov q0, q3 -; CHECK-MVE-NEXT: vpop {d8, d9, d10} +; CHECK-MVE-NEXT: vmla.f16 s13, s14, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s9 +; CHECK-MVE-NEXT: vins.f16 s0, s13 +; CHECK-MVE-NEXT: vmovx.f16 s13, s1 +; CHECK-MVE-NEXT: vmovx.f16 s14, s5 +; CHECK-MVE-NEXT: vmla.f16 s1, s5, s9 +; CHECK-MVE-NEXT: vmla.f16 s13, s14, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s10 +; CHECK-MVE-NEXT: vins.f16 s1, s13 +; CHECK-MVE-NEXT: vmovx.f16 s13, s2 +; CHECK-MVE-NEXT: vmovx.f16 s14, s6 +; CHECK-MVE-NEXT: vmla.f16 s2, s6, s10 +; CHECK-MVE-NEXT: vmla.f16 s13, s14, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s11 +; CHECK-MVE-NEXT: vins.f16 s2, s13 +; CHECK-MVE-NEXT: vmovx.f16 s13, s3 +; CHECK-MVE-NEXT: vmovx.f16 s14, s7 +; CHECK-MVE-NEXT: vmla.f16 s3, s7, s11 +; CHECK-MVE-NEXT: vmla.f16 s13, s14, s12 +; CHECK-MVE-NEXT: vins.f16 s3, s13 ; CHECK-MVE-NEXT: bx lr entry: %0 = fmul <8 x half> %src2, %src3 @@ -145,49 +107,30 @@ ; ; CHECK-MVE-LABEL: vfms16: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: .vsave {d8, d9, d10} -; CHECK-MVE-NEXT: vpush {d8, d9, d10} ; CHECK-MVE-NEXT: vmovx.f16 s13, s0 -; CHECK-MVE-NEXT: vmls.f16 s0, s4, s8 ; CHECK-MVE-NEXT: vmovx.f16 s12, s8 ; CHECK-MVE-NEXT: vmovx.f16 s14, s4 -; CHECK-MVE-NEXT: vmov.f32 s16, s1 -; CHECK-MVE-NEXT: vmls.f16 s13, s14, s12 -; CHECK-MVE-NEXT: vmov r1, s0 -; CHECK-MVE-NEXT: vmls.f16 s16, s5, s9 -; CHECK-MVE-NEXT: vmov r0, s13 -; CHECK-MVE-NEXT: vmov.16 q3[0], r1 -; CHECK-MVE-NEXT: vmov.16 q3[1], r0 -; CHECK-MVE-NEXT: vmov r0, s16 -; CHECK-MVE-NEXT: vmovx.f16 s16, s9 -; CHECK-MVE-NEXT: vmovx.f16 s18, s5 -; CHECK-MVE-NEXT: vmovx.f16 s20, s1 -; CHECK-MVE-NEXT: vmov.16 q3[2], r0 -; CHECK-MVE-NEXT: vmls.f16 s20, s18, s16 -; CHECK-MVE-NEXT: vmov.f32 s16, s2 -; CHECK-MVE-NEXT: vmov r0, s20 -; CHECK-MVE-NEXT: vmls.f16 s16, s6, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r0 -; CHECK-MVE-NEXT: vmov r0, s16 -; CHECK-MVE-NEXT: vmovx.f16 s16, s10 -; CHECK-MVE-NEXT: vmovx.f16 s18, s6 -; CHECK-MVE-NEXT: vmovx.f16 s20, s2 -; CHECK-MVE-NEXT: vmov.16 q3[4], r0 -; CHECK-MVE-NEXT: vmls.f16 s20, s18, s16 -; CHECK-MVE-NEXT: vmov.f32 s16, s3 -; CHECK-MVE-NEXT: vmov r0, s20 -; CHECK-MVE-NEXT: vmls.f16 s16, s7, s11 -; CHECK-MVE-NEXT: vmovx.f16 s8, s11 -; CHECK-MVE-NEXT: vmovx.f16 s4, s7 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q3[5], r0 -; CHECK-MVE-NEXT: vmov r0, s16 ; CHECK-MVE-NEXT: vmls.f16 s0, s4, s8 -; CHECK-MVE-NEXT: vmov.16 q3[6], r0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 -; CHECK-MVE-NEXT: vmov q0, q3 -; CHECK-MVE-NEXT: vpop {d8, d9, d10} +; CHECK-MVE-NEXT: vmls.f16 s13, s14, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s9 +; CHECK-MVE-NEXT: vins.f16 s0, s13 +; CHECK-MVE-NEXT: vmovx.f16 s13, s1 +; CHECK-MVE-NEXT: vmovx.f16 s14, s5 +; CHECK-MVE-NEXT: vmls.f16 s1, s5, s9 +; CHECK-MVE-NEXT: vmls.f16 s13, s14, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s10 +; CHECK-MVE-NEXT: vins.f16 s1, s13 +; CHECK-MVE-NEXT: vmovx.f16 s13, s2 +; CHECK-MVE-NEXT: vmovx.f16 s14, s6 +; CHECK-MVE-NEXT: vmls.f16 s2, s6, s10 +; CHECK-MVE-NEXT: vmls.f16 s13, s14, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s11 +; CHECK-MVE-NEXT: vins.f16 s2, s13 +; CHECK-MVE-NEXT: vmovx.f16 s13, s3 +; CHECK-MVE-NEXT: vmovx.f16 s14, s7 +; CHECK-MVE-NEXT: vmls.f16 s3, s7, s11 +; CHECK-MVE-NEXT: vmls.f16 s13, s14, s12 +; CHECK-MVE-NEXT: vins.f16 s3, s13 ; CHECK-MVE-NEXT: bx lr entry: %0 = fmul <8 x half> %src2, %src3 @@ -213,44 +156,27 @@ ; ; CHECK-MVE-LABEL: vfmar16: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcvtb.f16.f32 s12, s8 -; CHECK-MVE-NEXT: vmov.f32 s8, s0 -; CHECK-MVE-NEXT: vmla.f16 s8, s4, s12 -; CHECK-MVE-NEXT: vmov.f32 s14, s1 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmovx.f16 s8, s4 -; CHECK-MVE-NEXT: vmovx.f16 s10, s0 -; CHECK-MVE-NEXT: vmla.f16 s14, s5, s12 -; CHECK-MVE-NEXT: vmla.f16 s10, s8, s12 -; CHECK-MVE-NEXT: vmovx.f16 s13, s1 -; CHECK-MVE-NEXT: vmov r1, s10 -; CHECK-MVE-NEXT: vmov.16 q2[0], r0 -; CHECK-MVE-NEXT: vmov r0, s14 -; CHECK-MVE-NEXT: vmovx.f16 s14, s5 -; CHECK-MVE-NEXT: vmov.16 q2[1], r1 -; CHECK-MVE-NEXT: vmla.f16 s13, s14, s12 -; CHECK-MVE-NEXT: vmov.f32 s14, s2 -; CHECK-MVE-NEXT: vmov.16 q2[2], r0 -; CHECK-MVE-NEXT: vmov r0, s13 -; CHECK-MVE-NEXT: vmla.f16 s14, s6, s12 -; CHECK-MVE-NEXT: vmov.16 q2[3], r0 -; CHECK-MVE-NEXT: vmov r0, s14 -; CHECK-MVE-NEXT: vmovx.f16 s14, s6 -; CHECK-MVE-NEXT: vmovx.f16 s13, s2 -; CHECK-MVE-NEXT: vmla.f16 s13, s14, s12 -; CHECK-MVE-NEXT: vmov.f32 s14, s3 -; CHECK-MVE-NEXT: vmov.16 q2[4], r0 -; CHECK-MVE-NEXT: vmov r0, s13 -; CHECK-MVE-NEXT: vmla.f16 s14, s7, s12 -; CHECK-MVE-NEXT: vmovx.f16 s4, s7 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q2[5], r0 -; CHECK-MVE-NEXT: vmov r0, s14 -; CHECK-MVE-NEXT: vmla.f16 s0, s4, s12 -; CHECK-MVE-NEXT: vmov.16 q2[6], r0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q2[7], r0 -; CHECK-MVE-NEXT: vmov q0, q2 +; CHECK-MVE-NEXT: vcvtb.f16.f32 s8, s8 +; CHECK-MVE-NEXT: vmovx.f16 s12, s0 +; CHECK-MVE-NEXT: vmovx.f16 s10, s4 +; CHECK-MVE-NEXT: vmla.f16 s0, s4, s8 +; CHECK-MVE-NEXT: vmla.f16 s12, s10, s8 +; CHECK-MVE-NEXT: vmovx.f16 s10, s5 +; CHECK-MVE-NEXT: vins.f16 s0, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s1 +; CHECK-MVE-NEXT: vmla.f16 s12, s10, s8 +; CHECK-MVE-NEXT: vmla.f16 s1, s5, s8 +; CHECK-MVE-NEXT: vins.f16 s1, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s2 +; CHECK-MVE-NEXT: vmovx.f16 s10, s6 +; CHECK-MVE-NEXT: vmla.f16 s2, s6, s8 +; CHECK-MVE-NEXT: vmla.f16 s12, s10, s8 +; CHECK-MVE-NEXT: vmovx.f16 s10, s7 +; CHECK-MVE-NEXT: vins.f16 s2, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s3 +; CHECK-MVE-NEXT: vmla.f16 s12, s10, s8 +; CHECK-MVE-NEXT: vmla.f16 s3, s7, s8 +; CHECK-MVE-NEXT: vins.f16 s3, s12 ; CHECK-MVE-NEXT: bx lr entry: %src3 = fptrunc float %src3o to half @@ -279,47 +205,35 @@ ; ; CHECK-MVE-LABEL: vfma16: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcvtb.f16.f32 s12, s8 -; CHECK-MVE-NEXT: vmovx.f16 s10, s0 -; CHECK-MVE-NEXT: vmov.f32 s8, s12 -; CHECK-MVE-NEXT: vmovx.f16 s13, s1 -; CHECK-MVE-NEXT: vmla.f16 s8, s0, s4 -; CHECK-MVE-NEXT: vmov.f32 s14, s12 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmovx.f16 s8, s4 -; CHECK-MVE-NEXT: vmla.f16 s14, s10, s8 -; CHECK-MVE-NEXT: vmov.16 q2[0], r0 -; CHECK-MVE-NEXT: vmov r1, s14 -; CHECK-MVE-NEXT: vmovx.f16 s4, s7 -; CHECK-MVE-NEXT: vmov.f32 s14, s12 -; CHECK-MVE-NEXT: vmov.16 q2[1], r1 -; CHECK-MVE-NEXT: vmla.f16 s14, s1, s5 -; CHECK-MVE-NEXT: vmov.f32 s15, s12 -; CHECK-MVE-NEXT: vmov r0, s14 -; CHECK-MVE-NEXT: vmovx.f16 s14, s5 -; CHECK-MVE-NEXT: vmla.f16 s15, s13, s14 -; CHECK-MVE-NEXT: vmov.f32 s14, s12 -; CHECK-MVE-NEXT: vmov.16 q2[2], r0 -; CHECK-MVE-NEXT: vmov r0, s15 -; CHECK-MVE-NEXT: vmla.f16 s14, s2, s6 -; CHECK-MVE-NEXT: vmov.16 q2[3], r0 -; CHECK-MVE-NEXT: vmov r0, s14 -; CHECK-MVE-NEXT: vmovx.f16 s14, s6 -; CHECK-MVE-NEXT: vmovx.f16 s13, s2 -; CHECK-MVE-NEXT: vmov.f32 s15, s12 -; CHECK-MVE-NEXT: vmla.f16 s15, s13, s14 -; CHECK-MVE-NEXT: vmov.f32 s14, s12 -; CHECK-MVE-NEXT: vmov.16 q2[4], r0 -; CHECK-MVE-NEXT: vmov r0, s15 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmla.f16 s14, s3, s7 -; CHECK-MVE-NEXT: vmov.16 q2[5], r0 -; CHECK-MVE-NEXT: vmov r0, s14 -; CHECK-MVE-NEXT: vmla.f16 s12, s0, s4 -; CHECK-MVE-NEXT: vmov.16 q2[6], r0 -; CHECK-MVE-NEXT: vmov r0, s12 -; CHECK-MVE-NEXT: vmov.16 q2[7], r0 -; CHECK-MVE-NEXT: vmov q0, q2 +; CHECK-MVE-NEXT: vmov q3, q0 +; CHECK-MVE-NEXT: vcvtb.f16.f32 s3, s8 +; CHECK-MVE-NEXT: vmov.f32 s8, s3 +; CHECK-MVE-NEXT: vmovx.f16 s10, s4 +; CHECK-MVE-NEXT: vmov.f32 s0, s3 +; CHECK-MVE-NEXT: vmovx.f16 s9, s12 +; CHECK-MVE-NEXT: vmla.f16 s8, s9, s10 +; CHECK-MVE-NEXT: vmla.f16 s0, s12, s4 +; CHECK-MVE-NEXT: vins.f16 s0, s8 +; CHECK-MVE-NEXT: vmov.f32 s9, s3 +; CHECK-MVE-NEXT: vmov.f32 s1, s3 +; CHECK-MVE-NEXT: vmovx.f16 s8, s5 +; CHECK-MVE-NEXT: vmovx.f16 s10, s13 +; CHECK-MVE-NEXT: vmla.f16 s1, s13, s5 +; CHECK-MVE-NEXT: vmla.f16 s9, s10, s8 +; CHECK-MVE-NEXT: vmovx.f16 s8, s6 +; CHECK-MVE-NEXT: vins.f16 s1, s9 +; CHECK-MVE-NEXT: vmov.f32 s9, s3 +; CHECK-MVE-NEXT: vmov.f32 s2, s3 +; CHECK-MVE-NEXT: vmovx.f16 s10, s14 +; CHECK-MVE-NEXT: vmla.f16 s9, s10, s8 +; CHECK-MVE-NEXT: vmla.f16 s2, s14, s6 +; CHECK-MVE-NEXT: vins.f16 s2, s9 +; CHECK-MVE-NEXT: vmov.f32 s9, s3 +; CHECK-MVE-NEXT: vmovx.f16 s8, s7 +; CHECK-MVE-NEXT: vmovx.f16 s10, s15 +; CHECK-MVE-NEXT: vmla.f16 s9, s10, s8 +; CHECK-MVE-NEXT: vmla.f16 s3, s15, s7 +; CHECK-MVE-NEXT: vins.f16 s3, s9 ; CHECK-MVE-NEXT: bx lr entry: %src3 = fptrunc float %src3o to half @@ -489,50 +403,33 @@ ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11} -; CHECK-MVE-NEXT: vcmp.f16 s4, #0 +; CHECK-MVE-NEXT: vmovx.f16 s14, s4 ; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s14, #0 +; CHECK-MVE-NEXT: vmovx.f16 s13, s0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vmov.f32 s12, s0 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s14, s4 -; CHECK-MVE-NEXT: vmla.f16 s12, s4, s8 -; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s14, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s0, s12 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vmovx.f16 s13, s0 -; CHECK-MVE-NEXT: it mi -; CHECK-MVE-NEXT: movmi r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vmov r1, s12 -; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmovx.f16 s12, s8 +; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmov.f32 s15, s13 -; CHECK-MVE-NEXT: lsls r2, r2, #31 +; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vmla.f16 s15, s14, s12 -; CHECK-MVE-NEXT: vcmp.f16 s5, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s13, s15 +; CHECK-MVE-NEXT: vcmp.f16 s4, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s13, s15 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s12 -; CHECK-MVE-NEXT: vmov.16 q3[0], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 -; CHECK-MVE-NEXT: vmov.f32 s16, s1 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmla.f16 s16, s5, s9 -; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vmov.f32 s12, s0 ; CHECK-MVE-NEXT: vmovx.f16 s18, s5 -; CHECK-MVE-NEXT: vseleq.f16 s16, s1, s16 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vmla.f16 s12, s4, s8 ; CHECK-MVE-NEXT: vcmp.f16 s18, #0 -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmov.16 q3[1], r2 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vseleq.f16 s12, s0, s12 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: vmovx.f16 s20, s1 @@ -540,71 +437,76 @@ ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vins.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s9 ; CHECK-MVE-NEXT: vmov.f32 s22, s20 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vmla.f16 s22, s18, s16 -; CHECK-MVE-NEXT: vcmp.f16 s6, #0 +; CHECK-MVE-NEXT: vcmp.f16 s5, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s20, s22 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmovx.f16 s18, s6 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmov.f32 s16, s2 -; CHECK-MVE-NEXT: vmla.f16 s16, s6, s10 +; CHECK-MVE-NEXT: vmov.f32 s18, s1 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s2, s16 +; CHECK-MVE-NEXT: vmla.f16 s18, s5, s9 +; CHECK-MVE-NEXT: vseleq.f16 s13, s1, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s6 ; CHECK-MVE-NEXT: vcmp.f16 s18, #0 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: vmovx.f16 s20, s2 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vins.f16 s13, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s10 ; CHECK-MVE-NEXT: vmov.f32 s22, s20 -; CHECK-MVE-NEXT: vmla.f16 s22, s18, s16 ; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vmla.f16 s22, s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s6, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s20, s22 -; CHECK-MVE-NEXT: vcmp.f16 s7, #0 -; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmov.f32 s16, s3 -; CHECK-MVE-NEXT: vmovx.f16 s4, s7 -; CHECK-MVE-NEXT: vmla.f16 s16, s7, s11 +; CHECK-MVE-NEXT: vmov.f32 s18, s2 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, #0 -; CHECK-MVE-NEXT: vseleq.f16 s16, s3, s16 +; CHECK-MVE-NEXT: vmla.f16 s18, s6, s10 +; CHECK-MVE-NEXT: vseleq.f16 s14, s2, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s7 +; CHECK-MVE-NEXT: vcmp.f16 s18, #0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s20, s3 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vins.f16 s14, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s11 +; CHECK-MVE-NEXT: vmov.f32 s22, s20 +; CHECK-MVE-NEXT: vmla.f16 s22, s18, s16 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vcmp.f16 s7, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s20, s22 ; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmovx.f16 s8, s11 -; CHECK-MVE-NEXT: vmov.f32 s2, s0 +; CHECK-MVE-NEXT: vmov.f32 s18, s3 +; CHECK-MVE-NEXT: vmla.f16 s18, s7, s11 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmla.f16 s2, s4, s8 -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vseleq.f16 s0, s0, s2 -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s15, s3, s18 +; CHECK-MVE-NEXT: vins.f16 s15, s16 ; CHECK-MVE-NEXT: vmov q0, q3 ; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11} ; CHECK-MVE-NEXT: bx lr @@ -634,50 +536,33 @@ ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11} -; CHECK-MVE-NEXT: vcmp.f16 s4, #0 +; CHECK-MVE-NEXT: vmovx.f16 s14, s4 ; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s14, #0 +; CHECK-MVE-NEXT: vmovx.f16 s13, s0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vmov.f32 s12, s0 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s14, s4 -; CHECK-MVE-NEXT: vmla.f16 s12, s4, s8 -; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s14, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s0, s12 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vmovx.f16 s13, s0 -; CHECK-MVE-NEXT: it mi -; CHECK-MVE-NEXT: movmi r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vmov r1, s12 -; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmovx.f16 s12, s8 +; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmov.f32 s15, s13 -; CHECK-MVE-NEXT: lsls r2, r2, #31 +; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vmla.f16 s15, s14, s12 -; CHECK-MVE-NEXT: vcmp.f16 s5, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s13, s15 +; CHECK-MVE-NEXT: vcmp.f16 s4, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s13, s15 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s12 -; CHECK-MVE-NEXT: vmov.16 q3[0], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 -; CHECK-MVE-NEXT: vmov.f32 s16, s1 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmla.f16 s16, s5, s9 -; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vmov.f32 s12, s0 ; CHECK-MVE-NEXT: vmovx.f16 s18, s5 -; CHECK-MVE-NEXT: vseleq.f16 s16, s1, s16 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vmla.f16 s12, s4, s8 ; CHECK-MVE-NEXT: vcmp.f16 s18, #0 -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmov.16 q3[1], r2 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vseleq.f16 s12, s0, s12 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: vmovx.f16 s20, s1 @@ -685,71 +570,76 @@ ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vins.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s9 ; CHECK-MVE-NEXT: vmov.f32 s22, s20 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vmla.f16 s22, s18, s16 -; CHECK-MVE-NEXT: vcmp.f16 s6, #0 +; CHECK-MVE-NEXT: vcmp.f16 s5, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s20, s22 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmovx.f16 s18, s6 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmov.f32 s16, s2 -; CHECK-MVE-NEXT: vmla.f16 s16, s6, s10 +; CHECK-MVE-NEXT: vmov.f32 s18, s1 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s2, s16 +; CHECK-MVE-NEXT: vmla.f16 s18, s5, s9 +; CHECK-MVE-NEXT: vseleq.f16 s13, s1, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s6 ; CHECK-MVE-NEXT: vcmp.f16 s18, #0 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: vmovx.f16 s20, s2 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vins.f16 s13, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s10 ; CHECK-MVE-NEXT: vmov.f32 s22, s20 -; CHECK-MVE-NEXT: vmla.f16 s22, s18, s16 ; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vmla.f16 s22, s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s6, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s20, s22 -; CHECK-MVE-NEXT: vcmp.f16 s7, #0 -; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmov.f32 s16, s3 -; CHECK-MVE-NEXT: vmovx.f16 s4, s7 -; CHECK-MVE-NEXT: vmla.f16 s16, s7, s11 +; CHECK-MVE-NEXT: vmov.f32 s18, s2 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, #0 -; CHECK-MVE-NEXT: vseleq.f16 s16, s3, s16 +; CHECK-MVE-NEXT: vmla.f16 s18, s6, s10 +; CHECK-MVE-NEXT: vseleq.f16 s14, s2, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s7 +; CHECK-MVE-NEXT: vcmp.f16 s18, #0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s20, s3 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vins.f16 s14, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s11 +; CHECK-MVE-NEXT: vmov.f32 s22, s20 +; CHECK-MVE-NEXT: vmla.f16 s22, s18, s16 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vcmp.f16 s7, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s20, s22 ; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmovx.f16 s8, s11 -; CHECK-MVE-NEXT: vmov.f32 s2, s0 +; CHECK-MVE-NEXT: vmov.f32 s18, s3 +; CHECK-MVE-NEXT: vmla.f16 s18, s7, s11 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmla.f16 s2, s4, s8 -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vseleq.f16 s0, s0, s2 -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s15, s3, s18 +; CHECK-MVE-NEXT: vins.f16 s15, s16 ; CHECK-MVE-NEXT: vmov q0, q3 ; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11} ; CHECK-MVE-NEXT: bx lr @@ -779,50 +669,33 @@ ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11} -; CHECK-MVE-NEXT: vcmp.f16 s4, #0 +; CHECK-MVE-NEXT: vmovx.f16 s14, s4 ; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s14, #0 +; CHECK-MVE-NEXT: vmovx.f16 s13, s0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vmov.f32 s12, s0 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s14, s4 -; CHECK-MVE-NEXT: vmls.f16 s12, s4, s8 -; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s14, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s0, s12 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vmovx.f16 s13, s0 -; CHECK-MVE-NEXT: it mi -; CHECK-MVE-NEXT: movmi r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vmov r1, s12 -; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmovx.f16 s12, s8 +; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmov.f32 s15, s13 -; CHECK-MVE-NEXT: lsls r2, r2, #31 +; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vmls.f16 s15, s14, s12 -; CHECK-MVE-NEXT: vcmp.f16 s5, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s13, s15 +; CHECK-MVE-NEXT: vcmp.f16 s4, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s13, s15 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s12 -; CHECK-MVE-NEXT: vmov.16 q3[0], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 -; CHECK-MVE-NEXT: vmov.f32 s16, s1 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmls.f16 s16, s5, s9 -; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vmov.f32 s12, s0 ; CHECK-MVE-NEXT: vmovx.f16 s18, s5 -; CHECK-MVE-NEXT: vseleq.f16 s16, s1, s16 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vmls.f16 s12, s4, s8 ; CHECK-MVE-NEXT: vcmp.f16 s18, #0 -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmov.16 q3[1], r2 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vseleq.f16 s12, s0, s12 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: vmovx.f16 s20, s1 @@ -830,71 +703,76 @@ ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vins.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s9 ; CHECK-MVE-NEXT: vmov.f32 s22, s20 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vmls.f16 s22, s18, s16 -; CHECK-MVE-NEXT: vcmp.f16 s6, #0 +; CHECK-MVE-NEXT: vcmp.f16 s5, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s20, s22 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmovx.f16 s18, s6 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmov.f32 s16, s2 -; CHECK-MVE-NEXT: vmls.f16 s16, s6, s10 +; CHECK-MVE-NEXT: vmov.f32 s18, s1 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s2, s16 +; CHECK-MVE-NEXT: vmls.f16 s18, s5, s9 +; CHECK-MVE-NEXT: vseleq.f16 s13, s1, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s6 ; CHECK-MVE-NEXT: vcmp.f16 s18, #0 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: vmovx.f16 s20, s2 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vins.f16 s13, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s10 ; CHECK-MVE-NEXT: vmov.f32 s22, s20 -; CHECK-MVE-NEXT: vmls.f16 s22, s18, s16 ; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vmls.f16 s22, s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s6, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s20, s22 -; CHECK-MVE-NEXT: vcmp.f16 s7, #0 -; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmov.f32 s16, s3 -; CHECK-MVE-NEXT: vmovx.f16 s4, s7 -; CHECK-MVE-NEXT: vmls.f16 s16, s7, s11 +; CHECK-MVE-NEXT: vmov.f32 s18, s2 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, #0 -; CHECK-MVE-NEXT: vseleq.f16 s16, s3, s16 +; CHECK-MVE-NEXT: vmls.f16 s18, s6, s10 +; CHECK-MVE-NEXT: vseleq.f16 s14, s2, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s7 +; CHECK-MVE-NEXT: vcmp.f16 s18, #0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s20, s3 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vins.f16 s14, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s11 +; CHECK-MVE-NEXT: vmov.f32 s22, s20 +; CHECK-MVE-NEXT: vmls.f16 s22, s18, s16 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vcmp.f16 s7, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s20, s22 ; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmovx.f16 s8, s11 -; CHECK-MVE-NEXT: vmov.f32 s2, s0 +; CHECK-MVE-NEXT: vmov.f32 s18, s3 +; CHECK-MVE-NEXT: vmls.f16 s18, s7, s11 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmls.f16 s2, s4, s8 -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vseleq.f16 s0, s0, s2 -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s15, s3, s18 +; CHECK-MVE-NEXT: vins.f16 s15, s16 ; CHECK-MVE-NEXT: vmov q0, q3 ; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11} ; CHECK-MVE-NEXT: bx lr @@ -927,119 +805,107 @@ ; ; CHECK-MVE-LABEL: vfmar16_pred: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f16 s4, #0 +; CHECK-MVE-NEXT: vcvtb.f16.f32 s12, s8 +; CHECK-MVE-NEXT: vmovx.f16 s8, s4 +; CHECK-MVE-NEXT: vcmp.f16 s8, #0 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcvtb.f16.f32 s12, s8 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmov.f32 s8, s0 -; CHECK-MVE-NEXT: vmla.f16 s8, s4, s12 -; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s8, s0, s8 -; CHECK-MVE-NEXT: movs r2, #0 -; CHECK-MVE-NEXT: vmov r1, s8 -; CHECK-MVE-NEXT: vmovx.f16 s8, s4 -; CHECK-MVE-NEXT: vcmp.f16 s8, #0 ; CHECK-MVE-NEXT: vmovx.f16 s10, s0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it mi -; CHECK-MVE-NEXT: movmi r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmov.f32 s14, s10 -; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vmla.f16 s14, s8, s12 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s5, #0 -; CHECK-MVE-NEXT: vseleq.f16 s8, s10, s14 +; CHECK-MVE-NEXT: vcmp.f16 s4, #0 +; CHECK-MVE-NEXT: vseleq.f16 s14, s10, s14 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s8 -; CHECK-MVE-NEXT: vmov.16 q2[0], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 -; CHECK-MVE-NEXT: vmov.f32 s14, s1 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmla.f16 s14, s5, s12 +; CHECK-MVE-NEXT: vmov.f32 s8, s0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmov.16 q2[1], r2 -; CHECK-MVE-NEXT: vseleq.f16 s14, s1, s14 -; CHECK-MVE-NEXT: vmovx.f16 s13, s1 -; CHECK-MVE-NEXT: vmov r1, s14 +; CHECK-MVE-NEXT: vmla.f16 s8, s4, s12 +; CHECK-MVE-NEXT: vseleq.f16 s8, s0, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s8, s14 ; CHECK-MVE-NEXT: vmovx.f16 s14, s5 ; CHECK-MVE-NEXT: vcmp.f16 s14, #0 -; CHECK-MVE-NEXT: vmov.16 q2[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s13, s1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmov.f32 s15, s13 +; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmla.f16 s15, s14, s12 ; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vcmp.f16 s5, #0 ; CHECK-MVE-NEXT: vseleq.f16 s14, s13, s15 -; CHECK-MVE-NEXT: vcmp.f16 s6, #0 -; CHECK-MVE-NEXT: vmov r1, s14 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q2[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmov.f32 s13, s1 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmov.f32 s14, s2 -; CHECK-MVE-NEXT: vmla.f16 s14, s6, s12 +; CHECK-MVE-NEXT: vmla.f16 s13, s5, s12 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s14, s2, s14 -; CHECK-MVE-NEXT: vmovx.f16 s13, s2 -; CHECK-MVE-NEXT: vmov r1, s14 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s9, s1, s13 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s9, s14 ; CHECK-MVE-NEXT: vmovx.f16 s14, s6 ; CHECK-MVE-NEXT: vcmp.f16 s14, #0 -; CHECK-MVE-NEXT: vmov.16 q2[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s13, s2 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmov.f32 s15, s13 +; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmla.f16 s15, s14, s12 ; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vcmp.f16 s6, #0 ; CHECK-MVE-NEXT: vseleq.f16 s14, s13, s15 -; CHECK-MVE-NEXT: vcmp.f16 s7, #0 -; CHECK-MVE-NEXT: vmov r1, s14 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q2[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmov.f32 s13, s2 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmov.f32 s14, s3 -; CHECK-MVE-NEXT: vmovx.f16 s4, s7 -; CHECK-MVE-NEXT: vmla.f16 s14, s7, s12 +; CHECK-MVE-NEXT: vmla.f16 s13, s6, s12 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, #0 -; CHECK-MVE-NEXT: vseleq.f16 s14, s3, s14 -; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s10, s2, s13 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s10, s14 +; CHECK-MVE-NEXT: vmovx.f16 s14, s7 +; CHECK-MVE-NEXT: vcmp.f16 s14, #0 +; CHECK-MVE-NEXT: vmovx.f16 s13, s3 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov.f32 s15, s13 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmla.f16 s15, s14, s12 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vcmp.f16 s7, #0 +; CHECK-MVE-NEXT: vseleq.f16 s14, s13, s15 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov.f32 s2, s0 -; CHECK-MVE-NEXT: vmla.f16 s2, s4, s12 +; CHECK-MVE-NEXT: vmov.f32 s13, s3 +; CHECK-MVE-NEXT: vmla.f16 s13, s7, s12 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov r1, s14 -; CHECK-MVE-NEXT: vseleq.f16 s0, s0, s2 -; CHECK-MVE-NEXT: vmov.16 q2[6], r1 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q2[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s11, s3, s13 +; CHECK-MVE-NEXT: vins.f16 s11, s14 ; CHECK-MVE-NEXT: vmov q0, q2 ; CHECK-MVE-NEXT: bx lr entry: @@ -1073,118 +939,106 @@ ; ; CHECK-MVE-LABEL: vfma16_pred: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f16 s4, #0 +; CHECK-MVE-NEXT: vmovx.f16 s10, s4 ; CHECK-MVE-NEXT: movs r1, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f16 s10, #0 ; CHECK-MVE-NEXT: vcvtb.f16.f32 s12, s8 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s14, s0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmov.f32 s8, s12 -; CHECK-MVE-NEXT: vmla.f16 s8, s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s8, s0, s8 -; CHECK-MVE-NEXT: movs r2, #0 -; CHECK-MVE-NEXT: vmov r1, s8 -; CHECK-MVE-NEXT: vmovx.f16 s8, s4 -; CHECK-MVE-NEXT: vcmp.f16 s8, #0 -; CHECK-MVE-NEXT: vmovx.f16 s10, s0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it mi -; CHECK-MVE-NEXT: movmi r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vmov.f32 s14, s12 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmla.f16 s14, s10, s8 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s5, #0 -; CHECK-MVE-NEXT: vseleq.f16 s8, s10, s14 +; CHECK-MVE-NEXT: vmla.f16 s8, s14, s10 +; CHECK-MVE-NEXT: vcmp.f16 s4, #0 +; CHECK-MVE-NEXT: vseleq.f16 s14, s14, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s8 -; CHECK-MVE-NEXT: vmov.16 q2[0], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 -; CHECK-MVE-NEXT: vmov.f32 s14, s12 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmla.f16 s14, s1, s5 +; CHECK-MVE-NEXT: vmov.f32 s8, s12 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmov.16 q2[1], r2 -; CHECK-MVE-NEXT: vseleq.f16 s14, s1, s14 -; CHECK-MVE-NEXT: vmovx.f16 s13, s1 -; CHECK-MVE-NEXT: vmov r1, s14 +; CHECK-MVE-NEXT: vmla.f16 s8, s0, s4 +; CHECK-MVE-NEXT: vseleq.f16 s8, s0, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s8, s14 ; CHECK-MVE-NEXT: vmovx.f16 s14, s5 ; CHECK-MVE-NEXT: vcmp.f16 s14, #0 -; CHECK-MVE-NEXT: vmov.16 q2[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s13, s1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmov.f32 s15, s12 +; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmla.f16 s15, s13, s14 ; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vcmp.f16 s5, #0 ; CHECK-MVE-NEXT: vseleq.f16 s14, s13, s15 -; CHECK-MVE-NEXT: vcmp.f16 s6, #0 -; CHECK-MVE-NEXT: vmov r1, s14 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q2[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmov.f32 s13, s12 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmov.f32 s14, s12 -; CHECK-MVE-NEXT: vmla.f16 s14, s2, s6 +; CHECK-MVE-NEXT: vmla.f16 s13, s1, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s14, s2, s14 -; CHECK-MVE-NEXT: vmovx.f16 s13, s2 -; CHECK-MVE-NEXT: vmov r1, s14 +; CHECK-MVE-NEXT: vmov.f32 s15, s12 +; CHECK-MVE-NEXT: vseleq.f16 s9, s1, s13 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s9, s14 ; CHECK-MVE-NEXT: vmovx.f16 s14, s6 ; CHECK-MVE-NEXT: vcmp.f16 s14, #0 -; CHECK-MVE-NEXT: vmov.16 q2[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s13, s2 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmov.f32 s15, s12 ; CHECK-MVE-NEXT: vmla.f16 s15, s13, s14 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s6, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s14, s13, s15 -; CHECK-MVE-NEXT: vcmp.f16 s7, #0 -; CHECK-MVE-NEXT: vmov r1, s14 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q2[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmov.f32 s13, s12 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmov.f32 s14, s12 -; CHECK-MVE-NEXT: vmovx.f16 s4, s7 -; CHECK-MVE-NEXT: vmla.f16 s14, s3, s7 +; CHECK-MVE-NEXT: vmla.f16 s13, s2, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, #0 -; CHECK-MVE-NEXT: vseleq.f16 s14, s3, s14 -; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmov.f32 s15, s12 +; CHECK-MVE-NEXT: vseleq.f16 s10, s2, s13 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s10, s14 +; CHECK-MVE-NEXT: vmovx.f16 s14, s7 +; CHECK-MVE-NEXT: vcmp.f16 s14, #0 +; CHECK-MVE-NEXT: vmovx.f16 s13, s3 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmla.f16 s15, s13, s14 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s7, #0 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vmla.f16 s12, s3, s7 +; CHECK-MVE-NEXT: vseleq.f16 s14, s13, s15 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmla.f16 s12, s0, s4 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov r1, s14 -; CHECK-MVE-NEXT: vseleq.f16 s0, s0, s12 -; CHECK-MVE-NEXT: vmov.16 q2[6], r1 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q2[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s11, s3, s12 +; CHECK-MVE-NEXT: vins.f16 s11, s14 ; CHECK-MVE-NEXT: vmov q0, q2 ; CHECK-MVE-NEXT: bx lr entry: Index: llvm/test/CodeGen/Thumb2/mve-fmath.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-fmath.ll +++ llvm/test/CodeGen/Thumb2/mve-fmath.ll @@ -19,35 +19,23 @@ define arm_aapcs_vfpcc <8 x half> @sqrt_float16_t(<8 x half> %src) { ; CHECK-LABEL: sqrt_float16_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmovx.f16 s4, s0 -; CHECK-NEXT: vsqrt.f16 s8, s1 -; CHECK-NEXT: vsqrt.f16 s4, s4 -; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vsqrt.f16 s4, s0 -; CHECK-NEXT: vmov r1, s4 -; CHECK-NEXT: vmovx.f16 s0, s3 -; CHECK-NEXT: vmov.16 q1[0], r1 -; CHECK-NEXT: vsqrt.f16 s0, s0 -; CHECK-NEXT: vmov.16 q1[1], r0 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmovx.f16 s8, s1 -; CHECK-NEXT: vmov.16 q1[2], r0 +; CHECK-NEXT: vmov q1, q0 +; CHECK-NEXT: vmovx.f16 s0, s4 +; CHECK-NEXT: vsqrt.f16 s8, s0 +; CHECK-NEXT: vsqrt.f16 s0, s4 +; CHECK-NEXT: vins.f16 s0, s8 +; CHECK-NEXT: vmovx.f16 s8, s5 ; CHECK-NEXT: vsqrt.f16 s8, s8 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vsqrt.f16 s8, s2 -; CHECK-NEXT: vmov.16 q1[3], r0 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmovx.f16 s8, s2 -; CHECK-NEXT: vmov.16 q1[4], r0 +; CHECK-NEXT: vsqrt.f16 s1, s5 +; CHECK-NEXT: vins.f16 s1, s8 +; CHECK-NEXT: vmovx.f16 s8, s6 ; CHECK-NEXT: vsqrt.f16 s8, s8 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vsqrt.f16 s8, s3 -; CHECK-NEXT: vmov.16 q1[5], r0 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmov.16 q1[6], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q1[7], r0 -; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: vsqrt.f16 s2, s6 +; CHECK-NEXT: vins.f16 s2, s8 +; CHECK-NEXT: vmovx.f16 s8, s7 +; CHECK-NEXT: vsqrt.f16 s8, s8 +; CHECK-NEXT: vsqrt.f16 s3, s7 +; CHECK-NEXT: vins.f16 s3, s8 ; CHECK-NEXT: bx lr entry: %0 = call fast <8 x half> @llvm.sqrt.v8f16(<8 x half> %src) @@ -1037,93 +1025,81 @@ ; CHECK-NEXT: .pad #32 ; CHECK-NEXT: sub sp, #32 ; CHECK-NEXT: vmovx.f16 s8, s4 -; CHECK-NEXT: vstr.16 s4, [sp, #28] ; CHECK-NEXT: vstr.16 s8, [sp, #24] ; CHECK-NEXT: vmovx.f16 s8, s5 -; CHECK-NEXT: vstr.16 s5, [sp, #20] +; CHECK-NEXT: vstr.16 s4, [sp, #28] ; CHECK-NEXT: vstr.16 s8, [sp, #16] ; CHECK-NEXT: vmovx.f16 s8, s6 -; CHECK-NEXT: vmovx.f16 s4, s7 -; CHECK-NEXT: vstr.16 s6, [sp, #12] +; CHECK-NEXT: vstr.16 s5, [sp, #20] ; CHECK-NEXT: vstr.16 s8, [sp, #8] +; CHECK-NEXT: vmovx.f16 s8, s7 +; CHECK-NEXT: vstr.16 s6, [sp, #12] +; CHECK-NEXT: vstr.16 s8, [sp] ; CHECK-NEXT: vstr.16 s7, [sp, #4] -; CHECK-NEXT: vstr.16 s4, [sp] -; CHECK-NEXT: ldrb.w r0, [sp, #29] -; CHECK-NEXT: vabs.f16 s4, s0 +; CHECK-NEXT: vmovx.f16 s4, s0 +; CHECK-NEXT: ldrb.w r0, [sp, #25] +; CHECK-NEXT: vabs.f16 s4, s4 ; CHECK-NEXT: vneg.f16 s6, s4 -; CHECK-NEXT: ldrb.w r1, [sp, #25] ; CHECK-NEXT: tst.w r0, #128 -; CHECK-NEXT: vabs.f16 s8, s1 ; CHECK-NEXT: cset r0, ne -; CHECK-NEXT: vneg.f16 s10, s8 ; CHECK-NEXT: lsls r0, r0, #31 -; CHECK-NEXT: vseleq.f16 s4, s4, s6 -; CHECK-NEXT: tst.w r1, #128 -; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vmovx.f16 s4, s0 -; CHECK-NEXT: cset r1, ne -; CHECK-NEXT: vabs.f16 s4, s4 -; CHECK-NEXT: vneg.f16 s6, s4 -; CHECK-NEXT: lsls r1, r1, #31 -; CHECK-NEXT: vseleq.f16 s4, s4, s6 -; CHECK-NEXT: vmovx.f16 s0, s3 -; CHECK-NEXT: vmov r1, s4 -; CHECK-NEXT: vmov.16 q1[0], r0 -; CHECK-NEXT: ldrb.w r0, [sp, #21] -; CHECK-NEXT: vmov.16 q1[1], r1 -; CHECK-NEXT: vabs.f16 s0, s0 +; CHECK-NEXT: ldrb.w r0, [sp, #29] +; CHECK-NEXT: vseleq.f16 s8, s4, s6 +; CHECK-NEXT: vabs.f16 s4, s0 +; CHECK-NEXT: vabs.f16 s0, s3 ; CHECK-NEXT: tst.w r0, #128 +; CHECK-NEXT: vneg.f16 s6, s4 ; CHECK-NEXT: cset r0, ne ; CHECK-NEXT: lsls r0, r0, #31 -; CHECK-NEXT: vseleq.f16 s8, s8, s10 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmovx.f16 s8, s1 -; CHECK-NEXT: vmov.16 q1[2], r0 ; CHECK-NEXT: ldrb.w r0, [sp, #17] -; CHECK-NEXT: vabs.f16 s8, s8 +; CHECK-NEXT: vseleq.f16 s4, s4, s6 +; CHECK-NEXT: vins.f16 s4, s8 +; CHECK-NEXT: vmovx.f16 s8, s1 ; CHECK-NEXT: tst.w r0, #128 -; CHECK-NEXT: vneg.f16 s10, s8 +; CHECK-NEXT: vabs.f16 s8, s8 ; CHECK-NEXT: cset r0, ne +; CHECK-NEXT: vneg.f16 s10, s8 ; CHECK-NEXT: lsls r0, r0, #31 +; CHECK-NEXT: ldrb.w r0, [sp, #21] ; CHECK-NEXT: vseleq.f16 s8, s8, s10 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vabs.f16 s8, s2 -; CHECK-NEXT: vmov.16 q1[3], r0 -; CHECK-NEXT: ldrb.w r0, [sp, #13] -; CHECK-NEXT: vneg.f16 s10, s8 +; CHECK-NEXT: vabs.f16 s10, s1 ; CHECK-NEXT: tst.w r0, #128 +; CHECK-NEXT: vneg.f16 s12, s10 ; CHECK-NEXT: cset r0, ne ; CHECK-NEXT: lsls r0, r0, #31 -; CHECK-NEXT: vseleq.f16 s8, s8, s10 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmovx.f16 s8, s2 -; CHECK-NEXT: vmov.16 q1[4], r0 ; CHECK-NEXT: ldrb.w r0, [sp, #9] +; CHECK-NEXT: vseleq.f16 s5, s10, s12 +; CHECK-NEXT: vins.f16 s5, s8 +; CHECK-NEXT: vmovx.f16 s8, s2 +; CHECK-NEXT: tst.w r0, #128 ; CHECK-NEXT: vabs.f16 s8, s8 +; CHECK-NEXT: cset r0, ne +; CHECK-NEXT: vneg.f16 s10, s8 +; CHECK-NEXT: lsls r0, r0, #31 +; CHECK-NEXT: ldrb.w r0, [sp, #13] +; CHECK-NEXT: vseleq.f16 s8, s8, s10 +; CHECK-NEXT: vabs.f16 s10, s2 ; CHECK-NEXT: vneg.f16 s2, s0 ; CHECK-NEXT: tst.w r0, #128 -; CHECK-NEXT: vneg.f16 s10, s8 +; CHECK-NEXT: vneg.f16 s12, s10 ; CHECK-NEXT: cset r0, ne ; CHECK-NEXT: lsls r0, r0, #31 -; CHECK-NEXT: vseleq.f16 s8, s8, s10 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vabs.f16 s8, s3 -; CHECK-NEXT: vmov.16 q1[5], r0 -; CHECK-NEXT: ldrb.w r0, [sp, #5] -; CHECK-NEXT: vneg.f16 s10, s8 +; CHECK-NEXT: ldrb.w r0, [sp, #1] +; CHECK-NEXT: vseleq.f16 s6, s10, s12 +; CHECK-NEXT: vins.f16 s6, s8 +; CHECK-NEXT: vmovx.f16 s8, s3 ; CHECK-NEXT: tst.w r0, #128 +; CHECK-NEXT: vabs.f16 s8, s8 ; CHECK-NEXT: cset r0, ne +; CHECK-NEXT: vneg.f16 s10, s8 ; CHECK-NEXT: lsls r0, r0, #31 +; CHECK-NEXT: ldrb.w r0, [sp, #5] ; CHECK-NEXT: vseleq.f16 s8, s8, s10 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmov.16 q1[6], r0 -; CHECK-NEXT: ldrb.w r0, [sp, #1] ; CHECK-NEXT: tst.w r0, #128 ; CHECK-NEXT: cset r0, ne ; CHECK-NEXT: lsls r0, r0, #31 -; CHECK-NEXT: vseleq.f16 s0, s0, s2 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q1[7], r0 +; CHECK-NEXT: vseleq.f16 s7, s0, s2 +; CHECK-NEXT: vins.f16 s7, s8 ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: add sp, #32 ; CHECK-NEXT: bx lr Index: llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll +++ llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll @@ -5,35 +5,23 @@ define arm_aapcs_vfpcc <8 x half> @fneg_float16_t(<8 x half> %src) { ; CHECK-MVE-LABEL: fneg_float16_t: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vmovx.f16 s4, s0 -; CHECK-MVE-NEXT: vneg.f16 s8, s1 -; CHECK-MVE-NEXT: vneg.f16 s4, s4 -; CHECK-MVE-NEXT: vmov r0, s4 -; CHECK-MVE-NEXT: vneg.f16 s4, s0 -; CHECK-MVE-NEXT: vmov r1, s4 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q1[0], r1 -; CHECK-MVE-NEXT: vneg.f16 s0, s0 -; CHECK-MVE-NEXT: vmov.16 q1[1], r0 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmovx.f16 s8, s1 -; CHECK-MVE-NEXT: vmov.16 q1[2], r0 +; CHECK-MVE-NEXT: vmov q1, q0 +; CHECK-MVE-NEXT: vmovx.f16 s0, s4 +; CHECK-MVE-NEXT: vneg.f16 s8, s0 +; CHECK-MVE-NEXT: vneg.f16 s0, s4 +; CHECK-MVE-NEXT: vins.f16 s0, s8 +; CHECK-MVE-NEXT: vmovx.f16 s8, s5 ; CHECK-MVE-NEXT: vneg.f16 s8, s8 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vneg.f16 s8, s2 -; CHECK-MVE-NEXT: vmov.16 q1[3], r0 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmovx.f16 s8, s2 -; CHECK-MVE-NEXT: vmov.16 q1[4], r0 +; CHECK-MVE-NEXT: vneg.f16 s1, s5 +; CHECK-MVE-NEXT: vins.f16 s1, s8 +; CHECK-MVE-NEXT: vmovx.f16 s8, s6 ; CHECK-MVE-NEXT: vneg.f16 s8, s8 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vneg.f16 s8, s3 -; CHECK-MVE-NEXT: vmov.16 q1[5], r0 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmov.16 q1[6], r0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q1[7], r0 -; CHECK-MVE-NEXT: vmov q0, q1 +; CHECK-MVE-NEXT: vneg.f16 s2, s6 +; CHECK-MVE-NEXT: vins.f16 s2, s8 +; CHECK-MVE-NEXT: vmovx.f16 s8, s7 +; CHECK-MVE-NEXT: vneg.f16 s8, s8 +; CHECK-MVE-NEXT: vneg.f16 s3, s7 +; CHECK-MVE-NEXT: vins.f16 s3, s8 ; CHECK-MVE-NEXT: bx lr ; ; CHECK-MVEFP-LABEL: fneg_float16_t: @@ -89,35 +77,23 @@ define arm_aapcs_vfpcc <8 x half> @fabs_float16_t(<8 x half> %src) { ; CHECK-MVE-LABEL: fabs_float16_t: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vmovx.f16 s4, s0 -; CHECK-MVE-NEXT: vabs.f16 s8, s1 -; CHECK-MVE-NEXT: vabs.f16 s4, s4 -; CHECK-MVE-NEXT: vmov r0, s4 -; CHECK-MVE-NEXT: vabs.f16 s4, s0 -; CHECK-MVE-NEXT: vmov r1, s4 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q1[0], r1 -; CHECK-MVE-NEXT: vabs.f16 s0, s0 -; CHECK-MVE-NEXT: vmov.16 q1[1], r0 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmovx.f16 s8, s1 -; CHECK-MVE-NEXT: vmov.16 q1[2], r0 +; CHECK-MVE-NEXT: vmov q1, q0 +; CHECK-MVE-NEXT: vmovx.f16 s0, s4 +; CHECK-MVE-NEXT: vabs.f16 s8, s0 +; CHECK-MVE-NEXT: vabs.f16 s0, s4 +; CHECK-MVE-NEXT: vins.f16 s0, s8 +; CHECK-MVE-NEXT: vmovx.f16 s8, s5 ; CHECK-MVE-NEXT: vabs.f16 s8, s8 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vabs.f16 s8, s2 -; CHECK-MVE-NEXT: vmov.16 q1[3], r0 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmovx.f16 s8, s2 -; CHECK-MVE-NEXT: vmov.16 q1[4], r0 +; CHECK-MVE-NEXT: vabs.f16 s1, s5 +; CHECK-MVE-NEXT: vins.f16 s1, s8 +; CHECK-MVE-NEXT: vmovx.f16 s8, s6 ; CHECK-MVE-NEXT: vabs.f16 s8, s8 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vabs.f16 s8, s3 -; CHECK-MVE-NEXT: vmov.16 q1[5], r0 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmov.16 q1[6], r0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q1[7], r0 -; CHECK-MVE-NEXT: vmov q0, q1 +; CHECK-MVE-NEXT: vabs.f16 s2, s6 +; CHECK-MVE-NEXT: vins.f16 s2, s8 +; CHECK-MVE-NEXT: vmovx.f16 s8, s7 +; CHECK-MVE-NEXT: vabs.f16 s8, s8 +; CHECK-MVE-NEXT: vabs.f16 s3, s7 +; CHECK-MVE-NEXT: vins.f16 s3, s8 ; CHECK-MVE-NEXT: bx lr ; ; CHECK-MVEFP-LABEL: fabs_float16_t: Index: llvm/test/CodeGen/Thumb2/mve-frint.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-frint.ll +++ llvm/test/CodeGen/Thumb2/mve-frint.ll @@ -24,35 +24,23 @@ define arm_aapcs_vfpcc <8 x half> @fceil_float16_t(<8 x half> %src) { ; CHECK-MVE-LABEL: fceil_float16_t: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vmovx.f16 s4, s0 -; CHECK-MVE-NEXT: vrintp.f16 s8, s1 -; CHECK-MVE-NEXT: vrintp.f16 s4, s4 -; CHECK-MVE-NEXT: vmov r0, s4 -; CHECK-MVE-NEXT: vrintp.f16 s4, s0 -; CHECK-MVE-NEXT: vmov r1, s4 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q1[0], r1 -; CHECK-MVE-NEXT: vrintp.f16 s0, s0 -; CHECK-MVE-NEXT: vmov.16 q1[1], r0 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmovx.f16 s8, s1 -; CHECK-MVE-NEXT: vmov.16 q1[2], r0 +; CHECK-MVE-NEXT: vmov q1, q0 +; CHECK-MVE-NEXT: vmovx.f16 s0, s4 +; CHECK-MVE-NEXT: vrintp.f16 s8, s0 +; CHECK-MVE-NEXT: vrintp.f16 s0, s4 +; CHECK-MVE-NEXT: vins.f16 s0, s8 +; CHECK-MVE-NEXT: vmovx.f16 s8, s5 ; CHECK-MVE-NEXT: vrintp.f16 s8, s8 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vrintp.f16 s8, s2 -; CHECK-MVE-NEXT: vmov.16 q1[3], r0 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmovx.f16 s8, s2 -; CHECK-MVE-NEXT: vmov.16 q1[4], r0 +; CHECK-MVE-NEXT: vrintp.f16 s1, s5 +; CHECK-MVE-NEXT: vins.f16 s1, s8 +; CHECK-MVE-NEXT: vmovx.f16 s8, s6 ; CHECK-MVE-NEXT: vrintp.f16 s8, s8 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vrintp.f16 s8, s3 -; CHECK-MVE-NEXT: vmov.16 q1[5], r0 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmov.16 q1[6], r0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q1[7], r0 -; CHECK-MVE-NEXT: vmov q0, q1 +; CHECK-MVE-NEXT: vrintp.f16 s2, s6 +; CHECK-MVE-NEXT: vins.f16 s2, s8 +; CHECK-MVE-NEXT: vmovx.f16 s8, s7 +; CHECK-MVE-NEXT: vrintp.f16 s8, s8 +; CHECK-MVE-NEXT: vrintp.f16 s3, s7 +; CHECK-MVE-NEXT: vins.f16 s3, s8 ; CHECK-MVE-NEXT: bx lr ; ; CHECK-MVEFP-LABEL: fceil_float16_t: @@ -110,35 +98,23 @@ define arm_aapcs_vfpcc <8 x half> @ftrunc_float16_t(<8 x half> %src) { ; CHECK-MVE-LABEL: ftrunc_float16_t: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vmovx.f16 s4, s0 -; CHECK-MVE-NEXT: vrintz.f16 s8, s1 -; CHECK-MVE-NEXT: vrintz.f16 s4, s4 -; CHECK-MVE-NEXT: vmov r0, s4 -; CHECK-MVE-NEXT: vrintz.f16 s4, s0 -; CHECK-MVE-NEXT: vmov r1, s4 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q1[0], r1 -; CHECK-MVE-NEXT: vrintz.f16 s0, s0 -; CHECK-MVE-NEXT: vmov.16 q1[1], r0 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmovx.f16 s8, s1 -; CHECK-MVE-NEXT: vmov.16 q1[2], r0 +; CHECK-MVE-NEXT: vmov q1, q0 +; CHECK-MVE-NEXT: vmovx.f16 s0, s4 +; CHECK-MVE-NEXT: vrintz.f16 s8, s0 +; CHECK-MVE-NEXT: vrintz.f16 s0, s4 +; CHECK-MVE-NEXT: vins.f16 s0, s8 +; CHECK-MVE-NEXT: vmovx.f16 s8, s5 ; CHECK-MVE-NEXT: vrintz.f16 s8, s8 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vrintz.f16 s8, s2 -; CHECK-MVE-NEXT: vmov.16 q1[3], r0 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmovx.f16 s8, s2 -; CHECK-MVE-NEXT: vmov.16 q1[4], r0 +; CHECK-MVE-NEXT: vrintz.f16 s1, s5 +; CHECK-MVE-NEXT: vins.f16 s1, s8 +; CHECK-MVE-NEXT: vmovx.f16 s8, s6 ; CHECK-MVE-NEXT: vrintz.f16 s8, s8 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vrintz.f16 s8, s3 -; CHECK-MVE-NEXT: vmov.16 q1[5], r0 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmov.16 q1[6], r0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q1[7], r0 -; CHECK-MVE-NEXT: vmov q0, q1 +; CHECK-MVE-NEXT: vrintz.f16 s2, s6 +; CHECK-MVE-NEXT: vins.f16 s2, s8 +; CHECK-MVE-NEXT: vmovx.f16 s8, s7 +; CHECK-MVE-NEXT: vrintz.f16 s8, s8 +; CHECK-MVE-NEXT: vrintz.f16 s3, s7 +; CHECK-MVE-NEXT: vins.f16 s3, s8 ; CHECK-MVE-NEXT: bx lr ; ; CHECK-MVEFP-LABEL: ftrunc_float16_t: @@ -196,35 +172,23 @@ define arm_aapcs_vfpcc <8 x half> @frint_float16_t(<8 x half> %src) { ; CHECK-MVE-LABEL: frint_float16_t: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vmovx.f16 s4, s0 -; CHECK-MVE-NEXT: vrintx.f16 s8, s1 -; CHECK-MVE-NEXT: vrintx.f16 s4, s4 -; CHECK-MVE-NEXT: vmov r0, s4 -; CHECK-MVE-NEXT: vrintx.f16 s4, s0 -; CHECK-MVE-NEXT: vmov r1, s4 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q1[0], r1 -; CHECK-MVE-NEXT: vrintx.f16 s0, s0 -; CHECK-MVE-NEXT: vmov.16 q1[1], r0 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmovx.f16 s8, s1 -; CHECK-MVE-NEXT: vmov.16 q1[2], r0 +; CHECK-MVE-NEXT: vmov q1, q0 +; CHECK-MVE-NEXT: vmovx.f16 s0, s4 +; CHECK-MVE-NEXT: vrintx.f16 s8, s0 +; CHECK-MVE-NEXT: vrintx.f16 s0, s4 +; CHECK-MVE-NEXT: vins.f16 s0, s8 +; CHECK-MVE-NEXT: vmovx.f16 s8, s5 ; CHECK-MVE-NEXT: vrintx.f16 s8, s8 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vrintx.f16 s8, s2 -; CHECK-MVE-NEXT: vmov.16 q1[3], r0 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmovx.f16 s8, s2 -; CHECK-MVE-NEXT: vmov.16 q1[4], r0 +; CHECK-MVE-NEXT: vrintx.f16 s1, s5 +; CHECK-MVE-NEXT: vins.f16 s1, s8 +; CHECK-MVE-NEXT: vmovx.f16 s8, s6 ; CHECK-MVE-NEXT: vrintx.f16 s8, s8 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vrintx.f16 s8, s3 -; CHECK-MVE-NEXT: vmov.16 q1[5], r0 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmov.16 q1[6], r0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q1[7], r0 -; CHECK-MVE-NEXT: vmov q0, q1 +; CHECK-MVE-NEXT: vrintx.f16 s2, s6 +; CHECK-MVE-NEXT: vins.f16 s2, s8 +; CHECK-MVE-NEXT: vmovx.f16 s8, s7 +; CHECK-MVE-NEXT: vrintx.f16 s8, s8 +; CHECK-MVE-NEXT: vrintx.f16 s3, s7 +; CHECK-MVE-NEXT: vins.f16 s3, s8 ; CHECK-MVE-NEXT: bx lr ; ; CHECK-MVEFP-LABEL: frint_float16_t: @@ -277,35 +241,23 @@ define arm_aapcs_vfpcc <8 x half> @fnearbyint_float16_t(<8 x half> %src) { ; CHECK-LABEL: fnearbyint_float16_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmovx.f16 s4, s0 -; CHECK-NEXT: vrintr.f16 s8, s1 -; CHECK-NEXT: vrintr.f16 s4, s4 -; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vrintr.f16 s4, s0 -; CHECK-NEXT: vmov r1, s4 -; CHECK-NEXT: vmovx.f16 s0, s3 -; CHECK-NEXT: vmov.16 q1[0], r1 -; CHECK-NEXT: vrintr.f16 s0, s0 -; CHECK-NEXT: vmov.16 q1[1], r0 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmovx.f16 s8, s1 -; CHECK-NEXT: vmov.16 q1[2], r0 +; CHECK-NEXT: vmov q1, q0 +; CHECK-NEXT: vmovx.f16 s0, s4 +; CHECK-NEXT: vrintr.f16 s8, s0 +; CHECK-NEXT: vrintr.f16 s0, s4 +; CHECK-NEXT: vins.f16 s0, s8 +; CHECK-NEXT: vmovx.f16 s8, s5 ; CHECK-NEXT: vrintr.f16 s8, s8 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vrintr.f16 s8, s2 -; CHECK-NEXT: vmov.16 q1[3], r0 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmovx.f16 s8, s2 -; CHECK-NEXT: vmov.16 q1[4], r0 +; CHECK-NEXT: vrintr.f16 s1, s5 +; CHECK-NEXT: vins.f16 s1, s8 +; CHECK-NEXT: vmovx.f16 s8, s6 ; CHECK-NEXT: vrintr.f16 s8, s8 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vrintr.f16 s8, s3 -; CHECK-NEXT: vmov.16 q1[5], r0 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmov.16 q1[6], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q1[7], r0 -; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: vrintr.f16 s2, s6 +; CHECK-NEXT: vins.f16 s2, s8 +; CHECK-NEXT: vmovx.f16 s8, s7 +; CHECK-NEXT: vrintr.f16 s8, s8 +; CHECK-NEXT: vrintr.f16 s3, s7 +; CHECK-NEXT: vins.f16 s3, s8 ; CHECK-NEXT: bx lr entry: %0 = call fast <8 x half> @llvm.nearbyint.v8f16(<8 x half> %src) @@ -358,35 +310,23 @@ define arm_aapcs_vfpcc <8 x half> @ffloor_float16_t(<8 x half> %src) { ; CHECK-MVE-LABEL: ffloor_float16_t: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vmovx.f16 s4, s0 -; CHECK-MVE-NEXT: vrintm.f16 s8, s1 -; CHECK-MVE-NEXT: vrintm.f16 s4, s4 -; CHECK-MVE-NEXT: vmov r0, s4 -; CHECK-MVE-NEXT: vrintm.f16 s4, s0 -; CHECK-MVE-NEXT: vmov r1, s4 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q1[0], r1 -; CHECK-MVE-NEXT: vrintm.f16 s0, s0 -; CHECK-MVE-NEXT: vmov.16 q1[1], r0 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmovx.f16 s8, s1 -; CHECK-MVE-NEXT: vmov.16 q1[2], r0 +; CHECK-MVE-NEXT: vmov q1, q0 +; CHECK-MVE-NEXT: vmovx.f16 s0, s4 +; CHECK-MVE-NEXT: vrintm.f16 s8, s0 +; CHECK-MVE-NEXT: vrintm.f16 s0, s4 +; CHECK-MVE-NEXT: vins.f16 s0, s8 +; CHECK-MVE-NEXT: vmovx.f16 s8, s5 ; CHECK-MVE-NEXT: vrintm.f16 s8, s8 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vrintm.f16 s8, s2 -; CHECK-MVE-NEXT: vmov.16 q1[3], r0 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmovx.f16 s8, s2 -; CHECK-MVE-NEXT: vmov.16 q1[4], r0 +; CHECK-MVE-NEXT: vrintm.f16 s1, s5 +; CHECK-MVE-NEXT: vins.f16 s1, s8 +; CHECK-MVE-NEXT: vmovx.f16 s8, s6 ; CHECK-MVE-NEXT: vrintm.f16 s8, s8 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vrintm.f16 s8, s3 -; CHECK-MVE-NEXT: vmov.16 q1[5], r0 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmov.16 q1[6], r0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q1[7], r0 -; CHECK-MVE-NEXT: vmov q0, q1 +; CHECK-MVE-NEXT: vrintm.f16 s2, s6 +; CHECK-MVE-NEXT: vins.f16 s2, s8 +; CHECK-MVE-NEXT: vmovx.f16 s8, s7 +; CHECK-MVE-NEXT: vrintm.f16 s8, s8 +; CHECK-MVE-NEXT: vrintm.f16 s3, s7 +; CHECK-MVE-NEXT: vins.f16 s3, s8 ; CHECK-MVE-NEXT: bx lr ; ; CHECK-MVEFP-LABEL: ffloor_float16_t: @@ -444,35 +384,23 @@ define arm_aapcs_vfpcc <8 x half> @fround_float16_t(<8 x half> %src) { ; CHECK-MVE-LABEL: fround_float16_t: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vmovx.f16 s4, s0 -; CHECK-MVE-NEXT: vrinta.f16 s8, s1 -; CHECK-MVE-NEXT: vrinta.f16 s4, s4 -; CHECK-MVE-NEXT: vmov r0, s4 -; CHECK-MVE-NEXT: vrinta.f16 s4, s0 -; CHECK-MVE-NEXT: vmov r1, s4 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q1[0], r1 -; CHECK-MVE-NEXT: vrinta.f16 s0, s0 -; CHECK-MVE-NEXT: vmov.16 q1[1], r0 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmovx.f16 s8, s1 -; CHECK-MVE-NEXT: vmov.16 q1[2], r0 +; CHECK-MVE-NEXT: vmov q1, q0 +; CHECK-MVE-NEXT: vmovx.f16 s0, s4 +; CHECK-MVE-NEXT: vrinta.f16 s8, s0 +; CHECK-MVE-NEXT: vrinta.f16 s0, s4 +; CHECK-MVE-NEXT: vins.f16 s0, s8 +; CHECK-MVE-NEXT: vmovx.f16 s8, s5 ; CHECK-MVE-NEXT: vrinta.f16 s8, s8 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vrinta.f16 s8, s2 -; CHECK-MVE-NEXT: vmov.16 q1[3], r0 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmovx.f16 s8, s2 -; CHECK-MVE-NEXT: vmov.16 q1[4], r0 +; CHECK-MVE-NEXT: vrinta.f16 s1, s5 +; CHECK-MVE-NEXT: vins.f16 s1, s8 +; CHECK-MVE-NEXT: vmovx.f16 s8, s6 ; CHECK-MVE-NEXT: vrinta.f16 s8, s8 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vrinta.f16 s8, s3 -; CHECK-MVE-NEXT: vmov.16 q1[5], r0 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmov.16 q1[6], r0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q1[7], r0 -; CHECK-MVE-NEXT: vmov q0, q1 +; CHECK-MVE-NEXT: vrinta.f16 s2, s6 +; CHECK-MVE-NEXT: vins.f16 s2, s8 +; CHECK-MVE-NEXT: vmovx.f16 s8, s7 +; CHECK-MVE-NEXT: vrinta.f16 s8, s8 +; CHECK-MVE-NEXT: vrinta.f16 s3, s7 +; CHECK-MVE-NEXT: vins.f16 s3, s8 ; CHECK-MVE-NEXT: bx lr ; ; CHECK-MVEFP-LABEL: fround_float16_t: Index: llvm/test/CodeGen/Thumb2/mve-gather-ind16-scaled.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-gather-ind16-scaled.ll +++ llvm/test/CodeGen/Thumb2/mve-gather-ind16-scaled.ll @@ -95,40 +95,28 @@ ; CHECK-NEXT: vshl.i32 q0, q0, #1 ; CHECK-NEXT: vadd.i32 q1, q0, r0 ; CHECK-NEXT: vmov r2, s5 -; CHECK-NEXT: vldr.16 s0, [r2] -; CHECK-NEXT: vmov r3, s4 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vldr.16 s0, [r3] -; CHECK-NEXT: vmov r3, s0 -; CHECK-NEXT: vmov.16 q0[0], r3 -; CHECK-NEXT: vmov.16 q0[1], r2 -; CHECK-NEXT: vmov r2, s6 ; CHECK-NEXT: vldr.16 s8, [r2] -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: vmov.16 q0[2], r2 -; CHECK-NEXT: vmov r2, s7 -; CHECK-NEXT: vldr.16 s4, [r2] ; CHECK-NEXT: vmov r2, s4 +; CHECK-NEXT: vldr.16 s0, [r2] +; CHECK-NEXT: vmov r2, s7 +; CHECK-NEXT: vins.f16 s0, s8 +; CHECK-NEXT: vldr.16 s8, [r2] +; CHECK-NEXT: vmov r2, s6 ; CHECK-NEXT: vldrh.s32 q1, [r1, #8] -; CHECK-NEXT: vmov.16 q0[3], r2 +; CHECK-NEXT: vldr.16 s1, [r2] ; CHECK-NEXT: vshl.i32 q1, q1, #1 ; CHECK-NEXT: vadd.i32 q1, q1, r0 -; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vldr.16 s8, [r0] -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmov.16 q0[4], r0 +; CHECK-NEXT: vins.f16 s1, s8 ; CHECK-NEXT: vmov r0, s5 ; CHECK-NEXT: vldr.16 s8, [r0] -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmov.16 q0[5], r0 -; CHECK-NEXT: vmov r0, s6 -; CHECK-NEXT: vldr.16 s8, [r0] -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmov.16 q0[6], r0 -; CHECK-NEXT: vmov r0, s7 -; CHECK-NEXT: vldr.16 s4, [r0] ; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vmov.16 q0[7], r0 +; CHECK-NEXT: vldr.16 s2, [r0] +; CHECK-NEXT: vmov r0, s7 +; CHECK-NEXT: vins.f16 s2, s8 +; CHECK-NEXT: vldr.16 s8, [r0] +; CHECK-NEXT: vmov r0, s6 +; CHECK-NEXT: vldr.16 s3, [r0] +; CHECK-NEXT: vins.f16 s3, s8 ; CHECK-NEXT: bx lr entry: %offs = load <8 x i16>, <8 x i16>* %offptr, align 2 Index: llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll +++ llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll @@ -376,38 +376,26 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vmov r1, s5 -; CHECK-NEXT: vldr.16 s0, [r1] -; CHECK-NEXT: vmov r2, s4 -; CHECK-NEXT: vmov r1, s0 -; CHECK-NEXT: vldr.16 s0, [r2] -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmov.16 q0[0], r2 -; CHECK-NEXT: vmov.16 q0[1], r1 -; CHECK-NEXT: vmov r1, s6 ; CHECK-NEXT: vldr.16 s8, [r1] -; CHECK-NEXT: vmov r1, s8 -; CHECK-NEXT: vmov.16 q0[2], r1 -; CHECK-NEXT: vmov r1, s7 -; CHECK-NEXT: vldr.16 s4, [r1] ; CHECK-NEXT: vmov r1, s4 +; CHECK-NEXT: vldr.16 s0, [r1] +; CHECK-NEXT: vmov r1, s7 +; CHECK-NEXT: vins.f16 s0, s8 +; CHECK-NEXT: vldr.16 s8, [r1] +; CHECK-NEXT: vmov r1, s6 ; CHECK-NEXT: vldrw.u32 q1, [r0, #16] -; CHECK-NEXT: vmov.16 q0[3], r1 -; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vldr.16 s8, [r0] -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmov.16 q0[4], r0 +; CHECK-NEXT: vldr.16 s1, [r1] ; CHECK-NEXT: vmov r0, s5 +; CHECK-NEXT: vins.f16 s1, s8 ; CHECK-NEXT: vldr.16 s8, [r0] -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmov.16 q0[5], r0 -; CHECK-NEXT: vmov r0, s6 -; CHECK-NEXT: vldr.16 s8, [r0] -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmov.16 q0[6], r0 -; CHECK-NEXT: vmov r0, s7 -; CHECK-NEXT: vldr.16 s4, [r0] ; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vmov.16 q0[7], r0 +; CHECK-NEXT: vldr.16 s2, [r0] +; CHECK-NEXT: vmov r0, s7 +; CHECK-NEXT: vins.f16 s2, s8 +; CHECK-NEXT: vldr.16 s8, [r0] +; CHECK-NEXT: vmov r0, s6 +; CHECK-NEXT: vldr.16 s3, [r0] +; CHECK-NEXT: vins.f16 s3, s8 ; CHECK-NEXT: bx lr entry: %offs = load <8 x half*>, <8 x half*>* %offptr, align 4 Index: llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll +++ llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll @@ -823,85 +823,45 @@ ; CHECK-NEXT: .pad #8 ; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: vldrh.s32 q0, [r1] -; CHECK-NEXT: mov.w lr, #0 -; CHECK-NEXT: @ implicit-def: $q1 ; CHECK-NEXT: vcmp.s32 gt, q0, zr -; CHECK-NEXT: vmrs r3, p0 -; CHECK-NEXT: and r1, r3, #1 +; CHECK-NEXT: @ implicit-def: $q0 +; CHECK-NEXT: vmrs lr, p0 +; CHECK-NEXT: and r1, lr, #1 +; CHECK-NEXT: ubfx r3, lr, #4, #1 ; CHECK-NEXT: rsb.w r12, r1, #0 -; CHECK-NEXT: ubfx r1, r3, #4, #1 -; CHECK-NEXT: bfi lr, r12, #0, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi lr, r1, #1, #1 -; CHECK-NEXT: ubfx r1, r3, #8, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi lr, r1, #2, #1 -; CHECK-NEXT: ubfx r1, r3, #12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi lr, r1, #3, #1 -; CHECK-NEXT: lsls.w r1, lr, #31 -; CHECK-NEXT: beq .LBB18_2 -; CHECK-NEXT: @ %bb.1: @ %cond.load -; CHECK-NEXT: vldr.16 s4, [r2] -; CHECK-NEXT: .LBB18_2: @ %else -; CHECK-NEXT: lsls.w r1, lr, #30 -; CHECK-NEXT: bpl .LBB18_6 -; CHECK-NEXT: @ %bb.3: @ %cond.load1 -; CHECK-NEXT: vldr.16 s0, [r2, #2] -; CHECK-NEXT: vmov r3, s4 -; CHECK-NEXT: vmovx.f16 s4, s5 -; CHECK-NEXT: vmov r1, s0 -; CHECK-NEXT: vmov.16 q0[0], r3 -; CHECK-NEXT: vmov.16 q0[1], r1 -; CHECK-NEXT: vmov r1, s5 -; CHECK-NEXT: vmov.16 q0[2], r1 -; CHECK-NEXT: vmov r1, s4 -; CHECK-NEXT: vmov.16 q0[3], r1 -; CHECK-NEXT: lsls.w r1, lr, #29 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: bfi r1, r12, #0, #1 +; CHECK-NEXT: bfi r1, r3, #1, #1 +; CHECK-NEXT: ubfx r3, lr, #8, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: bfi r1, r3, #2, #1 +; CHECK-NEXT: ubfx r3, lr, #12, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: bfi r1, r3, #3, #1 +; CHECK-NEXT: lsls r3, r1, #31 +; CHECK-NEXT: bne .LBB18_6 +; CHECK-NEXT: @ %bb.1: @ %else +; CHECK-NEXT: lsls r3, r1, #30 ; CHECK-NEXT: bmi .LBB18_7 -; CHECK-NEXT: .LBB18_4: -; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: lsls.w r1, lr, #28 +; CHECK-NEXT: .LBB18_2: @ %else2 +; CHECK-NEXT: lsls r3, r1, #29 ; CHECK-NEXT: bmi .LBB18_8 -; CHECK-NEXT: .LBB18_5: -; CHECK-NEXT: vmov q2, q1 -; CHECK-NEXT: b .LBB18_9 -; CHECK-NEXT: .LBB18_6: -; CHECK-NEXT: vmov q0, q1 -; CHECK-NEXT: lsls.w r1, lr, #29 -; CHECK-NEXT: bpl .LBB18_4 -; CHECK-NEXT: .LBB18_7: @ %cond.load4 -; CHECK-NEXT: vmovx.f16 s4, s0 -; CHECK-NEXT: vmov r1, s0 -; CHECK-NEXT: vldr.16 s8, [r2, #4] -; CHECK-NEXT: vmov r3, s4 -; CHECK-NEXT: vmov.16 q1[0], r1 -; CHECK-NEXT: vmovx.f16 s0, s1 -; CHECK-NEXT: vmov.16 q1[1], r3 -; CHECK-NEXT: vmov r1, s8 -; CHECK-NEXT: vmov.16 q1[2], r1 -; CHECK-NEXT: vmov r1, s0 -; CHECK-NEXT: vmov.16 q1[3], r1 -; CHECK-NEXT: lsls.w r1, lr, #28 +; CHECK-NEXT: .LBB18_3: @ %else5 +; CHECK-NEXT: lsls r1, r1, #28 ; CHECK-NEXT: bpl .LBB18_5 -; CHECK-NEXT: .LBB18_8: @ %cond.load7 -; CHECK-NEXT: vmovx.f16 s0, s4 -; CHECK-NEXT: vmov r3, s4 -; CHECK-NEXT: vmov r1, s0 -; CHECK-NEXT: vmov.16 q2[0], r3 -; CHECK-NEXT: vldr.16 s0, [r2, #6] -; CHECK-NEXT: vmov.16 q2[1], r1 -; CHECK-NEXT: vmov r1, s5 -; CHECK-NEXT: vmov.16 q2[2], r1 -; CHECK-NEXT: vmov r1, s0 -; CHECK-NEXT: vmov.16 q2[3], r1 -; CHECK-NEXT: .LBB18_9: @ %else8 +; CHECK-NEXT: .LBB18_4: @ %cond.load7 +; CHECK-NEXT: vmovx.f16 s4, s0 +; CHECK-NEXT: vins.f16 s0, s4 +; CHECK-NEXT: vldr.16 s4, [r2, #6] +; CHECK-NEXT: vins.f16 s1, s4 +; CHECK-NEXT: .LBB18_5: @ %else8 ; CHECK-NEXT: vmrs r2, p0 ; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: vcvtt.f32.f16 s3, s9 -; CHECK-NEXT: vcvtb.f32.f16 s2, s9 -; CHECK-NEXT: vcvtt.f32.f16 s1, s8 -; CHECK-NEXT: vcvtb.f32.f16 s0, s8 +; CHECK-NEXT: vcvtt.f32.f16 s7, s1 +; CHECK-NEXT: vcvtb.f32.f16 s6, s1 +; CHECK-NEXT: vcvtt.f32.f16 s5, s0 +; CHECK-NEXT: vcvtb.f32.f16 s4, s0 ; CHECK-NEXT: and r3, r2, #1 ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: bfi r1, r3, #0, #1 @@ -916,22 +876,42 @@ ; CHECK-NEXT: bfi r1, r2, #3, #1 ; CHECK-NEXT: lsls r2, r1, #31 ; CHECK-NEXT: itt ne -; CHECK-NEXT: vmovne r2, s0 +; CHECK-NEXT: vmovne r2, s4 ; CHECK-NEXT: strne r2, [r0] ; CHECK-NEXT: lsls r2, r1, #30 ; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r2, s1 +; CHECK-NEXT: vmovmi r2, s5 ; CHECK-NEXT: strmi r2, [r0, #4] ; CHECK-NEXT: lsls r2, r1, #29 ; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r2, s2 +; CHECK-NEXT: vmovmi r2, s6 ; CHECK-NEXT: strmi r2, [r0, #8] ; CHECK-NEXT: lsls r1, r1, #28 ; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r1, s3 +; CHECK-NEXT: vmovmi r1, s7 ; CHECK-NEXT: strmi r1, [r0, #12] ; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: .LBB18_6: @ %cond.load +; CHECK-NEXT: vldr.16 s0, [r2] +; CHECK-NEXT: lsls r3, r1, #30 +; CHECK-NEXT: bpl .LBB18_2 +; CHECK-NEXT: .LBB18_7: @ %cond.load1 +; CHECK-NEXT: vldr.16 s4, [r2, #2] +; CHECK-NEXT: vins.f16 s0, s4 +; CHECK-NEXT: vmovx.f16 s4, s1 +; CHECK-NEXT: vins.f16 s1, s4 +; CHECK-NEXT: lsls r3, r1, #29 +; CHECK-NEXT: bpl .LBB18_3 +; CHECK-NEXT: .LBB18_8: @ %cond.load4 +; CHECK-NEXT: vmovx.f16 s4, s0 +; CHECK-NEXT: vins.f16 s0, s4 +; CHECK-NEXT: vmovx.f16 s4, s1 +; CHECK-NEXT: vldr.16 s1, [r2, #4] +; CHECK-NEXT: vins.f16 s1, s4 +; CHECK-NEXT: lsls r1, r1, #28 +; CHECK-NEXT: bmi .LBB18_4 +; CHECK-NEXT: b .LBB18_5 entry: %0 = load <4 x i16>, <4 x i16>* %mask, align 2 %1 = icmp sgt <4 x i16> %0, zeroinitializer @@ -949,85 +929,45 @@ ; CHECK-NEXT: .pad #8 ; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: vldrh.s32 q0, [r1] -; CHECK-NEXT: mov.w lr, #0 -; CHECK-NEXT: @ implicit-def: $q1 ; CHECK-NEXT: vcmp.s32 gt, q0, zr -; CHECK-NEXT: vmrs r3, p0 -; CHECK-NEXT: and r1, r3, #1 +; CHECK-NEXT: @ implicit-def: $q0 +; CHECK-NEXT: vmrs lr, p0 +; CHECK-NEXT: and r1, lr, #1 +; CHECK-NEXT: ubfx r3, lr, #4, #1 ; CHECK-NEXT: rsb.w r12, r1, #0 -; CHECK-NEXT: ubfx r1, r3, #4, #1 -; CHECK-NEXT: bfi lr, r12, #0, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi lr, r1, #1, #1 -; CHECK-NEXT: ubfx r1, r3, #8, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi lr, r1, #2, #1 -; CHECK-NEXT: ubfx r1, r3, #12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi lr, r1, #3, #1 -; CHECK-NEXT: lsls.w r1, lr, #31 -; CHECK-NEXT: beq .LBB19_2 -; CHECK-NEXT: @ %bb.1: @ %cond.load -; CHECK-NEXT: vldr.16 s4, [r2] -; CHECK-NEXT: .LBB19_2: @ %else -; CHECK-NEXT: lsls.w r1, lr, #30 -; CHECK-NEXT: bpl .LBB19_6 -; CHECK-NEXT: @ %bb.3: @ %cond.load1 -; CHECK-NEXT: vldr.16 s0, [r2, #2] -; CHECK-NEXT: vmov r3, s4 -; CHECK-NEXT: vmovx.f16 s4, s5 -; CHECK-NEXT: vmov r1, s0 -; CHECK-NEXT: vmov.16 q0[0], r3 -; CHECK-NEXT: vmov.16 q0[1], r1 -; CHECK-NEXT: vmov r1, s5 -; CHECK-NEXT: vmov.16 q0[2], r1 -; CHECK-NEXT: vmov r1, s4 -; CHECK-NEXT: vmov.16 q0[3], r1 -; CHECK-NEXT: lsls.w r1, lr, #29 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: bfi r1, r12, #0, #1 +; CHECK-NEXT: bfi r1, r3, #1, #1 +; CHECK-NEXT: ubfx r3, lr, #8, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: bfi r1, r3, #2, #1 +; CHECK-NEXT: ubfx r3, lr, #12, #1 +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: bfi r1, r3, #3, #1 +; CHECK-NEXT: lsls r3, r1, #31 +; CHECK-NEXT: bne .LBB19_6 +; CHECK-NEXT: @ %bb.1: @ %else +; CHECK-NEXT: lsls r3, r1, #30 ; CHECK-NEXT: bmi .LBB19_7 -; CHECK-NEXT: .LBB19_4: -; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: lsls.w r1, lr, #28 +; CHECK-NEXT: .LBB19_2: @ %else2 +; CHECK-NEXT: lsls r3, r1, #29 ; CHECK-NEXT: bmi .LBB19_8 -; CHECK-NEXT: .LBB19_5: -; CHECK-NEXT: vmov q2, q1 -; CHECK-NEXT: b .LBB19_9 -; CHECK-NEXT: .LBB19_6: -; CHECK-NEXT: vmov q0, q1 -; CHECK-NEXT: lsls.w r1, lr, #29 -; CHECK-NEXT: bpl .LBB19_4 -; CHECK-NEXT: .LBB19_7: @ %cond.load4 -; CHECK-NEXT: vmovx.f16 s4, s0 -; CHECK-NEXT: vmov r1, s0 -; CHECK-NEXT: vldr.16 s8, [r2, #4] -; CHECK-NEXT: vmov r3, s4 -; CHECK-NEXT: vmov.16 q1[0], r1 -; CHECK-NEXT: vmovx.f16 s0, s1 -; CHECK-NEXT: vmov.16 q1[1], r3 -; CHECK-NEXT: vmov r1, s8 -; CHECK-NEXT: vmov.16 q1[2], r1 -; CHECK-NEXT: vmov r1, s0 -; CHECK-NEXT: vmov.16 q1[3], r1 -; CHECK-NEXT: lsls.w r1, lr, #28 +; CHECK-NEXT: .LBB19_3: @ %else5 +; CHECK-NEXT: lsls r1, r1, #28 ; CHECK-NEXT: bpl .LBB19_5 -; CHECK-NEXT: .LBB19_8: @ %cond.load7 -; CHECK-NEXT: vmovx.f16 s0, s4 -; CHECK-NEXT: vmov r3, s4 -; CHECK-NEXT: vmov r1, s0 -; CHECK-NEXT: vmov.16 q2[0], r3 -; CHECK-NEXT: vldr.16 s0, [r2, #6] -; CHECK-NEXT: vmov.16 q2[1], r1 -; CHECK-NEXT: vmov r1, s5 -; CHECK-NEXT: vmov.16 q2[2], r1 -; CHECK-NEXT: vmov r1, s0 -; CHECK-NEXT: vmov.16 q2[3], r1 -; CHECK-NEXT: .LBB19_9: @ %else8 +; CHECK-NEXT: .LBB19_4: @ %cond.load7 +; CHECK-NEXT: vmovx.f16 s4, s0 +; CHECK-NEXT: vins.f16 s0, s4 +; CHECK-NEXT: vldr.16 s4, [r2, #6] +; CHECK-NEXT: vins.f16 s1, s4 +; CHECK-NEXT: .LBB19_5: @ %else8 ; CHECK-NEXT: vmrs r2, p0 ; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: vcvtt.f32.f16 s3, s9 -; CHECK-NEXT: vcvtb.f32.f16 s2, s9 -; CHECK-NEXT: vcvtt.f32.f16 s1, s8 -; CHECK-NEXT: vcvtb.f32.f16 s0, s8 +; CHECK-NEXT: vcvtt.f32.f16 s7, s1 +; CHECK-NEXT: vcvtb.f32.f16 s6, s1 +; CHECK-NEXT: vcvtt.f32.f16 s5, s0 +; CHECK-NEXT: vcvtb.f32.f16 s4, s0 ; CHECK-NEXT: and r3, r2, #1 ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: bfi r1, r3, #0, #1 @@ -1042,22 +982,42 @@ ; CHECK-NEXT: bfi r1, r2, #3, #1 ; CHECK-NEXT: lsls r2, r1, #31 ; CHECK-NEXT: itt ne -; CHECK-NEXT: vmovne r2, s0 +; CHECK-NEXT: vmovne r2, s4 ; CHECK-NEXT: strne r2, [r0] ; CHECK-NEXT: lsls r2, r1, #30 ; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r2, s1 +; CHECK-NEXT: vmovmi r2, s5 ; CHECK-NEXT: strmi r2, [r0, #4] ; CHECK-NEXT: lsls r2, r1, #29 ; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r2, s2 +; CHECK-NEXT: vmovmi r2, s6 ; CHECK-NEXT: strmi r2, [r0, #8] ; CHECK-NEXT: lsls r1, r1, #28 ; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r1, s3 +; CHECK-NEXT: vmovmi r1, s7 ; CHECK-NEXT: strmi r1, [r0, #12] ; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: .LBB19_6: @ %cond.load +; CHECK-NEXT: vldr.16 s0, [r2] +; CHECK-NEXT: lsls r3, r1, #30 +; CHECK-NEXT: bpl .LBB19_2 +; CHECK-NEXT: .LBB19_7: @ %cond.load1 +; CHECK-NEXT: vldr.16 s4, [r2, #2] +; CHECK-NEXT: vins.f16 s0, s4 +; CHECK-NEXT: vmovx.f16 s4, s1 +; CHECK-NEXT: vins.f16 s1, s4 +; CHECK-NEXT: lsls r3, r1, #29 +; CHECK-NEXT: bpl .LBB19_3 +; CHECK-NEXT: .LBB19_8: @ %cond.load4 +; CHECK-NEXT: vmovx.f16 s4, s0 +; CHECK-NEXT: vins.f16 s0, s4 +; CHECK-NEXT: vmovx.f16 s4, s1 +; CHECK-NEXT: vldr.16 s1, [r2, #4] +; CHECK-NEXT: vins.f16 s1, s4 +; CHECK-NEXT: lsls r1, r1, #28 +; CHECK-NEXT: bmi .LBB19_4 +; CHECK-NEXT: b .LBB19_5 entry: %0 = load <4 x i16>, <4 x i16>* %mask, align 2 %1 = icmp sgt <4 x i16> %0, zeroinitializer Index: llvm/test/CodeGen/Thumb2/mve-minmax.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-minmax.ll +++ llvm/test/CodeGen/Thumb2/mve-minmax.ll @@ -315,38 +315,26 @@ define arm_aapcs_vfpcc <8 x half> @minnm_float16_t(<8 x half> %src1, <8 x half> %src2) { ; CHECK-MVE-LABEL: minnm_float16_t: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vminnm.f16 s8, s4, s0 -; CHECK-MVE-NEXT: vmovx.f16 s10, s4 -; CHECK-MVE-NEXT: vmov r0, s8 ; CHECK-MVE-NEXT: vmovx.f16 s8, s0 -; CHECK-MVE-NEXT: vminnm.f16 s8, s10, s8 -; CHECK-MVE-NEXT: vminnm.f16 s12, s5, s1 -; CHECK-MVE-NEXT: vmov r1, s8 -; CHECK-MVE-NEXT: vmov.16 q2[0], r0 -; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: vmovx.f16 s10, s4 +; CHECK-MVE-NEXT: vminnm.f16 s12, s10, s8 +; CHECK-MVE-NEXT: vminnm.f16 s8, s4, s0 +; CHECK-MVE-NEXT: vins.f16 s8, s12 ; CHECK-MVE-NEXT: vmovx.f16 s12, s1 ; CHECK-MVE-NEXT: vmovx.f16 s14, s5 -; CHECK-MVE-NEXT: vmov.16 q2[1], r1 +; CHECK-MVE-NEXT: vminnm.f16 s9, s5, s1 ; CHECK-MVE-NEXT: vminnm.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmov.16 q2[2], r0 -; CHECK-MVE-NEXT: vmov r0, s12 -; CHECK-MVE-NEXT: vminnm.f16 s12, s6, s2 -; CHECK-MVE-NEXT: vmov.16 q2[3], r0 -; CHECK-MVE-NEXT: vmov r0, s12 -; CHECK-MVE-NEXT: vmovx.f16 s12, s2 ; CHECK-MVE-NEXT: vmovx.f16 s14, s6 +; CHECK-MVE-NEXT: vins.f16 s9, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s2 +; CHECK-MVE-NEXT: vminnm.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vminnm.f16 s10, s6, s2 +; CHECK-MVE-NEXT: vins.f16 s10, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s3 +; CHECK-MVE-NEXT: vmovx.f16 s14, s7 +; CHECK-MVE-NEXT: vminnm.f16 s11, s7, s3 ; CHECK-MVE-NEXT: vminnm.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmov.16 q2[4], r0 -; CHECK-MVE-NEXT: vmov r0, s12 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmovx.f16 s2, s7 -; CHECK-MVE-NEXT: vminnm.f16 s12, s7, s3 -; CHECK-MVE-NEXT: vmov.16 q2[5], r0 -; CHECK-MVE-NEXT: vmov r0, s12 -; CHECK-MVE-NEXT: vminnm.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov.16 q2[6], r0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q2[7], r0 +; CHECK-MVE-NEXT: vins.f16 s11, s12 ; CHECK-MVE-NEXT: vmov q0, q2 ; CHECK-MVE-NEXT: bx lr ; Index: llvm/test/CodeGen/Thumb2/mve-shuffle.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-shuffle.ll +++ llvm/test/CodeGen/Thumb2/mve-shuffle.ll @@ -62,23 +62,15 @@ define arm_aapcs_vfpcc <8 x i16> @shuffle1_i16(<8 x i16> %src) { ; CHECK-LABEL: shuffle1_i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: vmov.u16 r0, q0[7] -; CHECK-NEXT: vmov.16 q0[0], r0 -; CHECK-NEXT: vmov.u16 r0, q1[6] -; CHECK-NEXT: vmov.16 q0[1], r0 -; CHECK-NEXT: vmov.u16 r0, q1[5] -; CHECK-NEXT: vmov.16 q0[2], r0 -; CHECK-NEXT: vmov.u16 r0, q1[4] -; CHECK-NEXT: vmov.16 q0[3], r0 -; CHECK-NEXT: vmov.u16 r0, q1[3] -; CHECK-NEXT: vmov.16 q0[4], r0 -; CHECK-NEXT: vmov.u16 r0, q1[2] -; CHECK-NEXT: vmov.16 q0[5], r0 -; CHECK-NEXT: vmov.u16 r0, q1[1] -; CHECK-NEXT: vmov.16 q0[6], r0 -; CHECK-NEXT: vmov.u16 r0, q1[0] -; CHECK-NEXT: vmov.16 q0[7], r0 +; CHECK-NEXT: vmovx.f16 s4, s3 +; CHECK-NEXT: vins.f16 s4, s3 +; CHECK-NEXT: vmovx.f16 s5, s2 +; CHECK-NEXT: vins.f16 s5, s2 +; CHECK-NEXT: vmovx.f16 s6, s1 +; CHECK-NEXT: vins.f16 s6, s1 +; CHECK-NEXT: vmovx.f16 s7, s0 +; CHECK-NEXT: vins.f16 s7, s0 +; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> @@ -97,20 +89,16 @@ define arm_aapcs_vfpcc <8 x i16> @shuffle3_i16(<8 x i16> %src) { ; CHECK-LABEL: shuffle3_i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: vmov.u16 r0, q0[7] -; CHECK-NEXT: vmov.16 q0[2], r0 -; CHECK-NEXT: vmov.u16 r0, q1[6] -; CHECK-NEXT: vmov.16 q0[3], r0 -; CHECK-NEXT: vmov.u16 r0, q1[3] -; CHECK-NEXT: vmov.16 q0[4], r0 -; CHECK-NEXT: vmov.u16 r0, q1[1] -; CHECK-NEXT: vmov.16 q0[5], r0 -; CHECK-NEXT: vmov.u16 r0, q1[2] -; CHECK-NEXT: vmov.16 q0[6], r0 -; CHECK-NEXT: vmov.u16 r0, q1[0] -; CHECK-NEXT: vmov.16 q0[7], r0 -; CHECK-NEXT: vmov.f32 s0, s6 +; CHECK-NEXT: vmovx.f16 s5, s3 +; CHECK-NEXT: vmov.u16 r0, q0[3] +; CHECK-NEXT: vins.f16 s5, s3 +; CHECK-NEXT: vmov.16 q1[4], r0 +; CHECK-NEXT: vmov.u16 r0, q0[1] +; CHECK-NEXT: vmov.16 q1[5], r0 +; CHECK-NEXT: vins.f16 s1, s0 +; CHECK-NEXT: vmov.f32 s7, s1 +; CHECK-NEXT: vmov.f32 s4, s2 +; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> @@ -345,27 +333,15 @@ define arm_aapcs_vfpcc <8 x half> @shuffle1_f16(<8 x half> %src) { ; CHECK-LABEL: shuffle1_f16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmovx.f16 s4, s3 -; CHECK-NEXT: vmov r0, s3 -; CHECK-NEXT: vmov r1, s4 -; CHECK-NEXT: vmovx.f16 s8, s2 -; CHECK-NEXT: vmov.16 q1[0], r1 -; CHECK-NEXT: vmov.16 q1[1], r0 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmov.16 q1[2], r0 -; CHECK-NEXT: vmov r0, s2 -; CHECK-NEXT: vmovx.f16 s8, s1 -; CHECK-NEXT: vmov.16 q1[3], r0 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmovx.f16 s8, s0 -; CHECK-NEXT: vmov.16 q1[4], r0 -; CHECK-NEXT: vmov r0, s1 -; CHECK-NEXT: vmov.16 q1[5], r0 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmov.16 q1[6], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q1[7], r0 -; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: vmov q1, q0 +; CHECK-NEXT: vmovx.f16 s0, s7 +; CHECK-NEXT: vins.f16 s0, s7 +; CHECK-NEXT: vmovx.f16 s1, s6 +; CHECK-NEXT: vins.f16 s1, s6 +; CHECK-NEXT: vmovx.f16 s2, s5 +; CHECK-NEXT: vins.f16 s2, s5 +; CHECK-NEXT: vmovx.f16 s3, s4 +; CHECK-NEXT: vins.f16 s3, s4 ; CHECK-NEXT: bx lr entry: %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> @@ -384,23 +360,15 @@ define arm_aapcs_vfpcc <8 x half> @shuffle3_f16(<8 x half> %src) { ; CHECK-LABEL: shuffle3_f16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmovx.f16 s4, s3 -; CHECK-NEXT: vmov r0, s3 -; CHECK-NEXT: vmov r1, s4 -; CHECK-NEXT: vmovx.f16 s8, s1 -; CHECK-NEXT: vmov.16 q1[2], r1 -; CHECK-NEXT: vmov.16 q1[3], r0 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmovx.f16 s8, s0 -; CHECK-NEXT: vmov.16 q1[4], r0 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmov.16 q1[5], r0 -; CHECK-NEXT: vmov r0, s1 -; CHECK-NEXT: vmov.16 q1[6], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q1[7], r0 -; CHECK-NEXT: vmov.f32 s4, s2 -; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: vmov q1, q0 +; CHECK-NEXT: vmovx.f16 s1, s7 +; CHECK-NEXT: vmovx.f16 s8, s4 +; CHECK-NEXT: vins.f16 s1, s7 +; CHECK-NEXT: vmovx.f16 s2, s5 +; CHECK-NEXT: vins.f16 s5, s4 +; CHECK-NEXT: vins.f16 s2, s8 +; CHECK-NEXT: vmov.f32 s3, s5 +; CHECK-NEXT: vmov.f32 s0, s6 ; CHECK-NEXT: bx lr entry: %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> Index: llvm/test/CodeGen/Thumb2/mve-shufflemov.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-shufflemov.ll +++ llvm/test/CodeGen/Thumb2/mve-shufflemov.ll @@ -35,23 +35,15 @@ define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_76543210(<8 x i16> %s1, <8 x i16> %s2) { ; CHECK-LABEL: shuffle_i16_76543210: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: vmov.u16 r0, q0[7] -; CHECK-NEXT: vmov.16 q0[0], r0 -; CHECK-NEXT: vmov.u16 r0, q1[6] -; CHECK-NEXT: vmov.16 q0[1], r0 -; CHECK-NEXT: vmov.u16 r0, q1[5] -; CHECK-NEXT: vmov.16 q0[2], r0 -; CHECK-NEXT: vmov.u16 r0, q1[4] -; CHECK-NEXT: vmov.16 q0[3], r0 -; CHECK-NEXT: vmov.u16 r0, q1[3] -; CHECK-NEXT: vmov.16 q0[4], r0 -; CHECK-NEXT: vmov.u16 r0, q1[2] -; CHECK-NEXT: vmov.16 q0[5], r0 -; CHECK-NEXT: vmov.u16 r0, q1[1] -; CHECK-NEXT: vmov.16 q0[6], r0 -; CHECK-NEXT: vmov.u16 r0, q1[0] -; CHECK-NEXT: vmov.16 q0[7], r0 +; CHECK-NEXT: vmovx.f16 s4, s3 +; CHECK-NEXT: vins.f16 s4, s3 +; CHECK-NEXT: vmovx.f16 s5, s2 +; CHECK-NEXT: vins.f16 s5, s2 +; CHECK-NEXT: vmovx.f16 s6, s1 +; CHECK-NEXT: vins.f16 s6, s1 +; CHECK-NEXT: vmovx.f16 s7, s0 +; CHECK-NEXT: vins.f16 s7, s0 +; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> @@ -346,27 +338,15 @@ define arm_aapcs_vfpcc <8 x half> @shuffle_f16_76543210(<8 x half> %s1, <8 x half> %s2) { ; CHECK-LABEL: shuffle_f16_76543210: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmovx.f16 s4, s3 -; CHECK-NEXT: vmov r0, s3 -; CHECK-NEXT: vmov r1, s4 -; CHECK-NEXT: vmovx.f16 s8, s2 -; CHECK-NEXT: vmov.16 q1[0], r1 -; CHECK-NEXT: vmov.16 q1[1], r0 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmov.16 q1[2], r0 -; CHECK-NEXT: vmov r0, s2 -; CHECK-NEXT: vmovx.f16 s8, s1 -; CHECK-NEXT: vmov.16 q1[3], r0 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmovx.f16 s8, s0 -; CHECK-NEXT: vmov.16 q1[4], r0 -; CHECK-NEXT: vmov r0, s1 -; CHECK-NEXT: vmov.16 q1[5], r0 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmov.16 q1[6], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q1[7], r0 -; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: vmov q1, q0 +; CHECK-NEXT: vmovx.f16 s0, s7 +; CHECK-NEXT: vins.f16 s0, s7 +; CHECK-NEXT: vmovx.f16 s1, s6 +; CHECK-NEXT: vins.f16 s1, s6 +; CHECK-NEXT: vmovx.f16 s2, s5 +; CHECK-NEXT: vins.f16 s2, s5 +; CHECK-NEXT: vmovx.f16 s3, s4 +; CHECK-NEXT: vins.f16 s3, s4 ; CHECK-NEXT: bx lr entry: %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> Index: llvm/test/CodeGen/Thumb2/mve-simple-arith.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-simple-arith.ll +++ llvm/test/CodeGen/Thumb2/mve-simple-arith.ll @@ -79,38 +79,26 @@ define arm_aapcs_vfpcc <8 x half> @add_float16_t(<8 x half> %src1, <8 x half> %src2) { ; CHECK-MVE-LABEL: add_float16_t: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vadd.f16 s8, s4, s0 -; CHECK-MVE-NEXT: vmovx.f16 s10, s4 -; CHECK-MVE-NEXT: vmov r0, s8 ; CHECK-MVE-NEXT: vmovx.f16 s8, s0 -; CHECK-MVE-NEXT: vadd.f16 s8, s10, s8 -; CHECK-MVE-NEXT: vadd.f16 s12, s5, s1 -; CHECK-MVE-NEXT: vmov r1, s8 -; CHECK-MVE-NEXT: vmov.16 q2[0], r0 -; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: vmovx.f16 s10, s4 +; CHECK-MVE-NEXT: vadd.f16 s12, s10, s8 +; CHECK-MVE-NEXT: vadd.f16 s8, s4, s0 +; CHECK-MVE-NEXT: vins.f16 s8, s12 ; CHECK-MVE-NEXT: vmovx.f16 s12, s1 ; CHECK-MVE-NEXT: vmovx.f16 s14, s5 -; CHECK-MVE-NEXT: vmov.16 q2[1], r1 +; CHECK-MVE-NEXT: vadd.f16 s9, s5, s1 ; CHECK-MVE-NEXT: vadd.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmov.16 q2[2], r0 -; CHECK-MVE-NEXT: vmov r0, s12 -; CHECK-MVE-NEXT: vadd.f16 s12, s6, s2 -; CHECK-MVE-NEXT: vmov.16 q2[3], r0 -; CHECK-MVE-NEXT: vmov r0, s12 -; CHECK-MVE-NEXT: vmovx.f16 s12, s2 ; CHECK-MVE-NEXT: vmovx.f16 s14, s6 +; CHECK-MVE-NEXT: vins.f16 s9, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s2 ; CHECK-MVE-NEXT: vadd.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmov.16 q2[4], r0 -; CHECK-MVE-NEXT: vmov r0, s12 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmovx.f16 s2, s7 -; CHECK-MVE-NEXT: vadd.f16 s12, s7, s3 -; CHECK-MVE-NEXT: vmov.16 q2[5], r0 -; CHECK-MVE-NEXT: vmov r0, s12 -; CHECK-MVE-NEXT: vadd.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov.16 q2[6], r0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q2[7], r0 +; CHECK-MVE-NEXT: vadd.f16 s10, s6, s2 +; CHECK-MVE-NEXT: vins.f16 s10, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s3 +; CHECK-MVE-NEXT: vmovx.f16 s14, s7 +; CHECK-MVE-NEXT: vadd.f16 s11, s7, s3 +; CHECK-MVE-NEXT: vadd.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vins.f16 s11, s12 ; CHECK-MVE-NEXT: vmov q0, q2 ; CHECK-MVE-NEXT: bx lr ; @@ -228,38 +216,26 @@ define arm_aapcs_vfpcc <8 x half> @sub_float16_t(<8 x half> %src1, <8 x half> %src2) { ; CHECK-MVE-LABEL: sub_float16_t: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vsub.f16 s8, s4, s0 -; CHECK-MVE-NEXT: vmovx.f16 s10, s4 -; CHECK-MVE-NEXT: vmov r0, s8 ; CHECK-MVE-NEXT: vmovx.f16 s8, s0 -; CHECK-MVE-NEXT: vsub.f16 s8, s10, s8 -; CHECK-MVE-NEXT: vsub.f16 s12, s5, s1 -; CHECK-MVE-NEXT: vmov r1, s8 -; CHECK-MVE-NEXT: vmov.16 q2[0], r0 -; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: vmovx.f16 s10, s4 +; CHECK-MVE-NEXT: vsub.f16 s12, s10, s8 +; CHECK-MVE-NEXT: vsub.f16 s8, s4, s0 +; CHECK-MVE-NEXT: vins.f16 s8, s12 ; CHECK-MVE-NEXT: vmovx.f16 s12, s1 ; CHECK-MVE-NEXT: vmovx.f16 s14, s5 -; CHECK-MVE-NEXT: vmov.16 q2[1], r1 +; CHECK-MVE-NEXT: vsub.f16 s9, s5, s1 ; CHECK-MVE-NEXT: vsub.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmov.16 q2[2], r0 -; CHECK-MVE-NEXT: vmov r0, s12 -; CHECK-MVE-NEXT: vsub.f16 s12, s6, s2 -; CHECK-MVE-NEXT: vmov.16 q2[3], r0 -; CHECK-MVE-NEXT: vmov r0, s12 -; CHECK-MVE-NEXT: vmovx.f16 s12, s2 ; CHECK-MVE-NEXT: vmovx.f16 s14, s6 +; CHECK-MVE-NEXT: vins.f16 s9, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s2 +; CHECK-MVE-NEXT: vsub.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vsub.f16 s10, s6, s2 +; CHECK-MVE-NEXT: vins.f16 s10, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s3 +; CHECK-MVE-NEXT: vmovx.f16 s14, s7 +; CHECK-MVE-NEXT: vsub.f16 s11, s7, s3 ; CHECK-MVE-NEXT: vsub.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmov.16 q2[4], r0 -; CHECK-MVE-NEXT: vmov r0, s12 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmovx.f16 s2, s7 -; CHECK-MVE-NEXT: vsub.f16 s12, s7, s3 -; CHECK-MVE-NEXT: vmov.16 q2[5], r0 -; CHECK-MVE-NEXT: vmov r0, s12 -; CHECK-MVE-NEXT: vsub.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov.16 q2[6], r0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q2[7], r0 +; CHECK-MVE-NEXT: vins.f16 s11, s12 ; CHECK-MVE-NEXT: vmov q0, q2 ; CHECK-MVE-NEXT: bx lr ; @@ -360,38 +336,26 @@ define arm_aapcs_vfpcc <8 x half> @mul_float16_t(<8 x half> %src1, <8 x half> %src2) { ; CHECK-MVE-LABEL: mul_float16_t: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vmul.f16 s8, s4, s0 -; CHECK-MVE-NEXT: vmovx.f16 s10, s4 -; CHECK-MVE-NEXT: vmov r0, s8 ; CHECK-MVE-NEXT: vmovx.f16 s8, s0 -; CHECK-MVE-NEXT: vmul.f16 s8, s10, s8 -; CHECK-MVE-NEXT: vmul.f16 s12, s5, s1 -; CHECK-MVE-NEXT: vmov r1, s8 -; CHECK-MVE-NEXT: vmov.16 q2[0], r0 -; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: vmovx.f16 s10, s4 +; CHECK-MVE-NEXT: vmul.f16 s12, s10, s8 +; CHECK-MVE-NEXT: vmul.f16 s8, s4, s0 +; CHECK-MVE-NEXT: vins.f16 s8, s12 ; CHECK-MVE-NEXT: vmovx.f16 s12, s1 ; CHECK-MVE-NEXT: vmovx.f16 s14, s5 -; CHECK-MVE-NEXT: vmov.16 q2[1], r1 +; CHECK-MVE-NEXT: vmul.f16 s9, s5, s1 ; CHECK-MVE-NEXT: vmul.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmov.16 q2[2], r0 -; CHECK-MVE-NEXT: vmov r0, s12 -; CHECK-MVE-NEXT: vmul.f16 s12, s6, s2 -; CHECK-MVE-NEXT: vmov.16 q2[3], r0 -; CHECK-MVE-NEXT: vmov r0, s12 -; CHECK-MVE-NEXT: vmovx.f16 s12, s2 ; CHECK-MVE-NEXT: vmovx.f16 s14, s6 +; CHECK-MVE-NEXT: vins.f16 s9, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s2 +; CHECK-MVE-NEXT: vmul.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmul.f16 s10, s6, s2 +; CHECK-MVE-NEXT: vins.f16 s10, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s3 +; CHECK-MVE-NEXT: vmovx.f16 s14, s7 +; CHECK-MVE-NEXT: vmul.f16 s11, s7, s3 ; CHECK-MVE-NEXT: vmul.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmov.16 q2[4], r0 -; CHECK-MVE-NEXT: vmov r0, s12 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmovx.f16 s2, s7 -; CHECK-MVE-NEXT: vmul.f16 s12, s7, s3 -; CHECK-MVE-NEXT: vmov.16 q2[5], r0 -; CHECK-MVE-NEXT: vmov r0, s12 -; CHECK-MVE-NEXT: vmul.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov.16 q2[6], r0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q2[7], r0 +; CHECK-MVE-NEXT: vins.f16 s11, s12 ; CHECK-MVE-NEXT: vmov q0, q2 ; CHECK-MVE-NEXT: bx lr ; Index: llvm/test/CodeGen/Thumb2/mve-vcmpf.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vcmpf.ll +++ llvm/test/CodeGen/Thumb2/mve-vcmpf.ll @@ -769,102 +769,90 @@ ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s8 +; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s12 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: it eq -; CHECK-MVE-NEXT: moveq r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vcmp.f16 s1, s5 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vmovx.f16 s22, s1 -; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 +; CHECK-MVE-NEXT: vseleq.f16 s20, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s16 -; CHECK-MVE-NEXT: vmovx.f16 s4, s7 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s1 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s20, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s20 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s5 ; CHECK-MVE-NEXT: vcmp.f16 s22, s20 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s20, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s20, s9 ; CHECK-MVE-NEXT: vmovx.f16 s22, s13 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s1, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmp.f16 s2, s6 -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s2 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s22, s2 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s20, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s20 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s6 ; CHECK-MVE-NEXT: vcmp.f16 s22, s20 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s20, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s20, s10 ; CHECK-MVE-NEXT: vmovx.f16 s22, s14 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s2, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmp.f16 s3, s7 -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s3 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 -; CHECK-MVE-NEXT: vseleq.f16 s20, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s20 +; CHECK-MVE-NEXT: vmovx.f16 s20, s7 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 +; CHECK-MVE-NEXT: vmovx.f16 s20, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s15 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s3, s7 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s20 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11} ; CHECK-MVE-NEXT: bx lr @@ -896,115 +884,103 @@ ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: vmovx.f16 s16, s8 -; CHECK-MVE-NEXT: vmovx.f16 s18, s12 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it mi -; CHECK-MVE-NEXT: movmi r2, #1 -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: it gt -; CHECK-MVE-NEXT: movgt r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vcmp.f16 s1, s5 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vmovx.f16 s22, s1 -; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s16 -; CHECK-MVE-NEXT: vmovx.f16 s4, s7 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 +; CHECK-MVE-NEXT: vmovx.f16 s18, s12 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s20, s18, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 -; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s1 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s20, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s20 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s5 ; CHECK-MVE-NEXT: vcmp.f16 s22, s20 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s20, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s20, s9 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s22, s13 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, s6 +; CHECK-MVE-NEXT: vcmp.f16 s1, s5 +; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s20 -; CHECK-MVE-NEXT: vmovx.f16 s22, s2 -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s22, s2 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s20, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s20 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s6 ; CHECK-MVE-NEXT: vcmp.f16 s22, s20 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s20, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s20, s10 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s22, s14 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, s7 +; CHECK-MVE-NEXT: vcmp.f16 s2, s6 +; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s20 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s22, s3 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s20 +; CHECK-MVE-NEXT: vmovx.f16 s20, s7 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 +; CHECK-MVE-NEXT: vmovx.f16 s20, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s22, s15 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s20, s15, s11 +; CHECK-MVE-NEXT: vcmp.f16 s3, s7 +; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r0, #1 -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s20 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11} ; CHECK-MVE-NEXT: bx lr @@ -1034,102 +1010,90 @@ ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s8 +; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s12 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: it gt -; CHECK-MVE-NEXT: movgt r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vcmp.f16 s1, s5 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vmovx.f16 s22, s1 -; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 +; CHECK-MVE-NEXT: vseleq.f16 s20, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s16 -; CHECK-MVE-NEXT: vmovx.f16 s4, s7 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s1 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s20, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s20 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s5 ; CHECK-MVE-NEXT: vcmp.f16 s22, s20 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s20, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s20, s9 ; CHECK-MVE-NEXT: vmovx.f16 s22, s13 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s1, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmp.f16 s2, s6 -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s2 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s22, s2 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s20, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s20 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s6 ; CHECK-MVE-NEXT: vcmp.f16 s22, s20 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s20, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s20, s10 ; CHECK-MVE-NEXT: vmovx.f16 s22, s14 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s2, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmp.f16 s3, s7 -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s3 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 -; CHECK-MVE-NEXT: vseleq.f16 s20, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s20 +; CHECK-MVE-NEXT: vmovx.f16 s20, s7 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 +; CHECK-MVE-NEXT: vmovx.f16 s20, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s15 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s3, s7 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s20 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11} ; CHECK-MVE-NEXT: bx lr @@ -1158,102 +1122,90 @@ ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s8 +; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s12 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: it ge -; CHECK-MVE-NEXT: movge r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vcmp.f16 s1, s5 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vmovx.f16 s22, s1 -; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 +; CHECK-MVE-NEXT: vseleq.f16 s20, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s16 -; CHECK-MVE-NEXT: vmovx.f16 s4, s7 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s1 ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s20, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s20 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s5 ; CHECK-MVE-NEXT: vcmp.f16 s22, s20 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s20, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s20, s9 ; CHECK-MVE-NEXT: vmovx.f16 s22, s13 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s1, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmp.f16 s2, s6 -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s2 ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s22, s2 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s20, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s20 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s6 ; CHECK-MVE-NEXT: vcmp.f16 s22, s20 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s20, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s20, s10 ; CHECK-MVE-NEXT: vmovx.f16 s22, s14 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s2, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmp.f16 s3, s7 -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s3 ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 -; CHECK-MVE-NEXT: vseleq.f16 s20, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s20 +; CHECK-MVE-NEXT: vmovx.f16 s20, s7 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 +; CHECK-MVE-NEXT: vmovx.f16 s20, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s15 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s3, s7 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s20 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11} ; CHECK-MVE-NEXT: bx lr @@ -1282,102 +1234,90 @@ ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s8 +; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s12 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: it mi -; CHECK-MVE-NEXT: movmi r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vcmp.f16 s1, s5 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vmovx.f16 s22, s1 -; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 +; CHECK-MVE-NEXT: vseleq.f16 s20, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s16 -; CHECK-MVE-NEXT: vmovx.f16 s4, s7 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s1 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s20, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s20 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s5 ; CHECK-MVE-NEXT: vcmp.f16 s22, s20 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s20, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s20, s9 ; CHECK-MVE-NEXT: vmovx.f16 s22, s13 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s1, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmp.f16 s2, s6 -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s2 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s22, s2 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s20, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s20 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s6 ; CHECK-MVE-NEXT: vcmp.f16 s22, s20 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s20, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s20, s10 ; CHECK-MVE-NEXT: vmovx.f16 s22, s14 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s2, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmp.f16 s3, s7 -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s3 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 -; CHECK-MVE-NEXT: vseleq.f16 s20, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s20 +; CHECK-MVE-NEXT: vmovx.f16 s20, s7 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 +; CHECK-MVE-NEXT: vmovx.f16 s20, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s15 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s3, s7 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s20 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11} ; CHECK-MVE-NEXT: bx lr @@ -1406,102 +1346,90 @@ ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s8 +; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s12 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: it ls -; CHECK-MVE-NEXT: movls r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vcmp.f16 s1, s5 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vmovx.f16 s22, s1 -; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 +; CHECK-MVE-NEXT: vseleq.f16 s20, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s16 -; CHECK-MVE-NEXT: vmovx.f16 s4, s7 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s1 ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s20, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s20 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s5 ; CHECK-MVE-NEXT: vcmp.f16 s22, s20 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s20, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s20, s9 ; CHECK-MVE-NEXT: vmovx.f16 s22, s13 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s1, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmp.f16 s2, s6 -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s2 ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s22, s2 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s20, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s20 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s6 ; CHECK-MVE-NEXT: vcmp.f16 s22, s20 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s20, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s20, s10 ; CHECK-MVE-NEXT: vmovx.f16 s22, s14 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s2, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmp.f16 s3, s7 -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s3 ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 -; CHECK-MVE-NEXT: vseleq.f16 s20, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s20 +; CHECK-MVE-NEXT: vmovx.f16 s20, s7 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 +; CHECK-MVE-NEXT: vmovx.f16 s20, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s15 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s3, s7 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s20 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11} ; CHECK-MVE-NEXT: bx lr @@ -1533,115 +1461,103 @@ ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: vmovx.f16 s16, s8 -; CHECK-MVE-NEXT: vmovx.f16 s18, s12 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it eq -; CHECK-MVE-NEXT: moveq r2, #1 -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: it vs -; CHECK-MVE-NEXT: movvs r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vcmp.f16 s1, s5 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vmovx.f16 s22, s1 -; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s16 -; CHECK-MVE-NEXT: vmovx.f16 s4, s7 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 +; CHECK-MVE-NEXT: vmovx.f16 s18, s12 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s20, s18, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 -; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s1 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s20, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s20 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s5 ; CHECK-MVE-NEXT: vcmp.f16 s22, s20 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s20, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s20, s9 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s22, s13 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, s6 +; CHECK-MVE-NEXT: vcmp.f16 s1, s5 +; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s20 -; CHECK-MVE-NEXT: vmovx.f16 s22, s2 -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s22, s2 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s20, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s20 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s6 ; CHECK-MVE-NEXT: vcmp.f16 s22, s20 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s20, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s20, s10 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s22, s14 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, s7 +; CHECK-MVE-NEXT: vcmp.f16 s2, s6 +; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s20 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s22, s3 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s20 +; CHECK-MVE-NEXT: vmovx.f16 s20, s7 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 +; CHECK-MVE-NEXT: vmovx.f16 s20, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s22, s15 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s20, s15, s11 +; CHECK-MVE-NEXT: vcmp.f16 s3, s7 +; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r0, #1 -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s20 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11} ; CHECK-MVE-NEXT: bx lr @@ -1671,102 +1587,90 @@ ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s8 +; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s12 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: it ne -; CHECK-MVE-NEXT: movne r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vcmp.f16 s1, s5 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vmovx.f16 s22, s1 -; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 +; CHECK-MVE-NEXT: vseleq.f16 s20, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s16 -; CHECK-MVE-NEXT: vmovx.f16 s4, s7 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s1 ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s20, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s20 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s5 ; CHECK-MVE-NEXT: vcmp.f16 s22, s20 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s20, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s20, s9 ; CHECK-MVE-NEXT: vmovx.f16 s22, s13 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s1, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmp.f16 s2, s6 -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s2 ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s22, s2 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s20, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s20 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s6 ; CHECK-MVE-NEXT: vcmp.f16 s22, s20 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s20, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s20, s10 ; CHECK-MVE-NEXT: vmovx.f16 s22, s14 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s2, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmp.f16 s3, s7 -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s3 ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 -; CHECK-MVE-NEXT: vseleq.f16 s20, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s20 +; CHECK-MVE-NEXT: vmovx.f16 s20, s7 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 +; CHECK-MVE-NEXT: vmovx.f16 s20, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s15 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s3, s7 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s20 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11} ; CHECK-MVE-NEXT: bx lr @@ -1795,102 +1699,90 @@ ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s8 +; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s12 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: it hi -; CHECK-MVE-NEXT: movhi r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vcmp.f16 s1, s5 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vmovx.f16 s22, s1 -; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 +; CHECK-MVE-NEXT: vseleq.f16 s20, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s16 -; CHECK-MVE-NEXT: vmovx.f16 s4, s7 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s1 ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s20, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s20 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s5 ; CHECK-MVE-NEXT: vcmp.f16 s22, s20 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s20, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s20, s9 ; CHECK-MVE-NEXT: vmovx.f16 s22, s13 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s1, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmp.f16 s2, s6 -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s2 ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s22, s2 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s20, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s20 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s6 ; CHECK-MVE-NEXT: vcmp.f16 s22, s20 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s20, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s20, s10 ; CHECK-MVE-NEXT: vmovx.f16 s22, s14 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s2, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmp.f16 s3, s7 -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s3 ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 -; CHECK-MVE-NEXT: vseleq.f16 s20, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s20 +; CHECK-MVE-NEXT: vmovx.f16 s20, s7 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 +; CHECK-MVE-NEXT: vmovx.f16 s20, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s15 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s3, s7 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s20 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11} ; CHECK-MVE-NEXT: bx lr @@ -1919,102 +1811,90 @@ ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s8 +; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s12 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: it pl -; CHECK-MVE-NEXT: movpl r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vcmp.f16 s1, s5 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vmovx.f16 s22, s1 -; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 +; CHECK-MVE-NEXT: vseleq.f16 s20, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s16 -; CHECK-MVE-NEXT: vmovx.f16 s4, s7 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s1 ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s20, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s20 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s5 ; CHECK-MVE-NEXT: vcmp.f16 s22, s20 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s20, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s20, s9 ; CHECK-MVE-NEXT: vmovx.f16 s22, s13 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s1, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmp.f16 s2, s6 -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s2 ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s22, s2 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s20, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s20 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s6 ; CHECK-MVE-NEXT: vcmp.f16 s22, s20 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s20, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s20, s10 ; CHECK-MVE-NEXT: vmovx.f16 s22, s14 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s2, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmp.f16 s3, s7 -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s3 ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 -; CHECK-MVE-NEXT: vseleq.f16 s20, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s20 +; CHECK-MVE-NEXT: vmovx.f16 s20, s7 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 +; CHECK-MVE-NEXT: vmovx.f16 s20, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s15 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s3, s7 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s20 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11} ; CHECK-MVE-NEXT: bx lr @@ -2043,102 +1923,90 @@ ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s8 +; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s12 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: it lt -; CHECK-MVE-NEXT: movlt r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vcmp.f16 s1, s5 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vmovx.f16 s22, s1 -; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 +; CHECK-MVE-NEXT: vseleq.f16 s20, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s16 -; CHECK-MVE-NEXT: vmovx.f16 s4, s7 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s1 ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s20, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s20 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s5 ; CHECK-MVE-NEXT: vcmp.f16 s22, s20 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s20, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s20, s9 ; CHECK-MVE-NEXT: vmovx.f16 s22, s13 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s1, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmp.f16 s2, s6 -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s2 ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s22, s2 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s20, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s20 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s6 ; CHECK-MVE-NEXT: vcmp.f16 s22, s20 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s20, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s20, s10 ; CHECK-MVE-NEXT: vmovx.f16 s22, s14 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s2, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmp.f16 s3, s7 -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s3 ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 -; CHECK-MVE-NEXT: vseleq.f16 s20, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s20 +; CHECK-MVE-NEXT: vmovx.f16 s20, s7 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 +; CHECK-MVE-NEXT: vmovx.f16 s20, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s15 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s3, s7 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s20 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11} ; CHECK-MVE-NEXT: bx lr @@ -2167,102 +2035,90 @@ ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s8 +; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s12 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: it le -; CHECK-MVE-NEXT: movle r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vcmp.f16 s1, s5 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vmovx.f16 s22, s1 -; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 +; CHECK-MVE-NEXT: vseleq.f16 s20, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s16 -; CHECK-MVE-NEXT: vmovx.f16 s4, s7 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s1 ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s20, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s20 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s5 ; CHECK-MVE-NEXT: vcmp.f16 s22, s20 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s20, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s20, s9 ; CHECK-MVE-NEXT: vmovx.f16 s22, s13 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s1, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmp.f16 s2, s6 -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s2 ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s22, s2 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s20, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s20 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s6 ; CHECK-MVE-NEXT: vcmp.f16 s22, s20 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s20, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s20, s10 ; CHECK-MVE-NEXT: vmovx.f16 s22, s14 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s2, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmp.f16 s3, s7 -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s3 ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 -; CHECK-MVE-NEXT: vseleq.f16 s20, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s20 +; CHECK-MVE-NEXT: vmovx.f16 s20, s7 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 +; CHECK-MVE-NEXT: vmovx.f16 s20, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s15 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s3, s7 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s20 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11} ; CHECK-MVE-NEXT: bx lr @@ -2291,102 +2147,90 @@ ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s8 +; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s12 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: it vc -; CHECK-MVE-NEXT: movvc r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vcmp.f16 s1, s5 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vmovx.f16 s22, s1 -; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 +; CHECK-MVE-NEXT: vseleq.f16 s20, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s16 -; CHECK-MVE-NEXT: vmovx.f16 s4, s7 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s1 ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s20, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s20 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s5 ; CHECK-MVE-NEXT: vcmp.f16 s22, s20 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s20, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s20, s9 ; CHECK-MVE-NEXT: vmovx.f16 s22, s13 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s1, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmp.f16 s2, s6 -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s2 ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s22, s2 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s20, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s20 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s6 ; CHECK-MVE-NEXT: vcmp.f16 s22, s20 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s20, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s20, s10 ; CHECK-MVE-NEXT: vmovx.f16 s22, s14 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s2, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmp.f16 s3, s7 -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s3 ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 -; CHECK-MVE-NEXT: vseleq.f16 s20, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s20 +; CHECK-MVE-NEXT: vmovx.f16 s20, s7 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 +; CHECK-MVE-NEXT: vmovx.f16 s20, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s15 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s3, s7 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s20 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11} ; CHECK-MVE-NEXT: bx lr @@ -2416,102 +2260,90 @@ ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s8 +; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s12 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: it vs -; CHECK-MVE-NEXT: movvs r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vcmp.f16 s1, s5 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vmovx.f16 s22, s1 -; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 +; CHECK-MVE-NEXT: vseleq.f16 s20, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s16 -; CHECK-MVE-NEXT: vmovx.f16 s4, s7 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s1 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s20, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s20 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s5 ; CHECK-MVE-NEXT: vcmp.f16 s22, s20 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s20, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s20, s9 ; CHECK-MVE-NEXT: vmovx.f16 s22, s13 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s1, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmp.f16 s2, s6 -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s2 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s22, s2 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s20, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s20 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s6 ; CHECK-MVE-NEXT: vcmp.f16 s22, s20 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s20, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s20, s10 ; CHECK-MVE-NEXT: vmovx.f16 s22, s14 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s2, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmp.f16 s3, s7 -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s3 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 -; CHECK-MVE-NEXT: vseleq.f16 s20, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s20 +; CHECK-MVE-NEXT: vmovx.f16 s20, s7 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 +; CHECK-MVE-NEXT: vmovx.f16 s20, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s22, s15 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s3, s7 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s20 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11} ; CHECK-MVE-NEXT: bx lr Index: llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll +++ llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll @@ -811,98 +811,86 @@ ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s5, s12 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it eq -; CHECK-MVE-NEXT: moveq r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, s4 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s13 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s13 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, s4 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s14 -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s14 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, s4 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s15 +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s3 +; CHECK-MVE-NEXT: vcmp.f16 s6, s4 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s6, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s6 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -937,111 +925,99 @@ ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: vmovx.f16 s5, s12 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 +; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi -; CHECK-MVE-NEXT: movmi r2, #1 -; CHECK-MVE-NEXT: it gt -; CHECK-MVE-NEXT: movgt r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, s4 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s13 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 -; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 -; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s5, s13 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s2, s4 +; CHECK-MVE-NEXT: vcmp.f16 s1, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s14 -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s5, s14 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s3, s4 +; CHECK-MVE-NEXT: vcmp.f16 s2, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 +; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s5, s15 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s3 +; CHECK-MVE-NEXT: vcmp.f16 s6, s4 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s3, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r0, #1 -; CHECK-MVE-NEXT: vmov r1, s6 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s6 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -1074,98 +1050,86 @@ ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s5, s12 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it gt -; CHECK-MVE-NEXT: movgt r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, s4 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s13 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s13 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, s4 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s14 -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s14 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, s4 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s15 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s3 +; CHECK-MVE-NEXT: vcmp.f16 s6, s4 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s6, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s6 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -1197,98 +1161,86 @@ ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s5, s12 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it ge -; CHECK-MVE-NEXT: movge r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, s4 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s13 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s13 ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, s4 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s14 -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s14 ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, s4 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s15 +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s3 +; CHECK-MVE-NEXT: vcmp.f16 s6, s4 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s6, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s6 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -1320,98 +1272,86 @@ ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s5, s12 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it mi -; CHECK-MVE-NEXT: movmi r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, s4 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s13 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s13 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, s4 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s14 -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s14 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, s4 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s15 +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s3 +; CHECK-MVE-NEXT: vcmp.f16 s6, s4 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s6, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s6 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -1443,98 +1383,86 @@ ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s5, s12 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it ls -; CHECK-MVE-NEXT: movls r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, s4 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s13 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s13 ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, s4 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s14 -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s14 ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, s4 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s15 +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s3 +; CHECK-MVE-NEXT: vcmp.f16 s6, s4 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s6, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s6 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -1569,111 +1497,99 @@ ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: vmovx.f16 s5, s12 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 +; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq -; CHECK-MVE-NEXT: moveq r2, #1 -; CHECK-MVE-NEXT: it vs -; CHECK-MVE-NEXT: movvs r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, s4 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s13 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 -; CHECK-MVE-NEXT: it eq -; CHECK-MVE-NEXT: moveq r1, #1 -; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s5, s13 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s2, s4 +; CHECK-MVE-NEXT: vcmp.f16 s1, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s14 -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s5, s14 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s3, s4 +; CHECK-MVE-NEXT: vcmp.f16 s2, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 +; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s5, s15 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s3 +; CHECK-MVE-NEXT: vcmp.f16 s6, s4 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s3, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r0, #1 -; CHECK-MVE-NEXT: vmov r1, s6 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s6 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -1706,98 +1622,86 @@ ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s5, s12 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it ne -; CHECK-MVE-NEXT: movne r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, s4 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s13 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s13 ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, s4 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s14 -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s14 ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, s4 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s15 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s3 +; CHECK-MVE-NEXT: vcmp.f16 s6, s4 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s6, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s6 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -1829,98 +1733,86 @@ ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s5, s12 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it hi -; CHECK-MVE-NEXT: movhi r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, s4 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s13 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s13 ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, s4 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s14 -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s14 ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, s4 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s15 ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s6, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s3 +; CHECK-MVE-NEXT: vcmp.f16 s6, s4 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, s4 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s6 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -1952,98 +1844,86 @@ ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s5, s12 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it pl -; CHECK-MVE-NEXT: movpl r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, s4 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s13 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s13 ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, s4 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s14 -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s14 ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, s4 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s15 +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s3 +; CHECK-MVE-NEXT: vcmp.f16 s6, s4 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s6, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s6 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -2075,98 +1955,86 @@ ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s5, s12 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it lt -; CHECK-MVE-NEXT: movlt r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, s4 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s13 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s13 ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, s4 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s14 -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s14 ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, s4 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s15 ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s6, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s3 +; CHECK-MVE-NEXT: vcmp.f16 s6, s4 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, s4 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s6 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -2198,98 +2066,86 @@ ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s5, s12 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it le -; CHECK-MVE-NEXT: movle r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, s4 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s13 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s13 ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, s4 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s14 -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s14 ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, s4 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s15 +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s3 +; CHECK-MVE-NEXT: vcmp.f16 s6, s4 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s6, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s6 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -2321,98 +2177,86 @@ ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s5, s12 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it vc -; CHECK-MVE-NEXT: movvc r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, s4 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s13 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s13 ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, s4 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s14 -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s14 ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, s4 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s15 +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s3 +; CHECK-MVE-NEXT: vcmp.f16 s6, s4 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s6, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s6 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -2445,98 +2289,86 @@ ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s5, s12 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it vs -; CHECK-MVE-NEXT: movvs r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, s4 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s13 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s13 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, s4 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s14 -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s14 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, s4 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s15 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s3 +; CHECK-MVE-NEXT: vcmp.f16 s6, s4 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s6, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s6 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -3368,98 +3200,86 @@ ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s4, s0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s5, s12 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s4, s0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it eq -; CHECK-MVE-NEXT: moveq r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s1 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s13 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s13 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s1 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s2 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s14 -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s14 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s2 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s3 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vcmp.f16 s4, s0 -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s15 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s6, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s3 +; CHECK-MVE-NEXT: vcmp.f16 s4, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s3 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s6 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -3494,111 +3314,99 @@ ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s4, s0 ; CHECK-MVE-NEXT: vmovx.f16 s5, s12 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s0 +; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi -; CHECK-MVE-NEXT: movmi r2, #1 -; CHECK-MVE-NEXT: it gt -; CHECK-MVE-NEXT: movgt r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s1 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s13 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 -; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 -; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s5, s13 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s4, s2 +; CHECK-MVE-NEXT: vcmp.f16 s4, s1 ; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s14 -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s5, s14 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s4, s3 +; CHECK-MVE-NEXT: vcmp.f16 s4, s2 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 +; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vcmp.f16 s4, s0 -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s5, s15 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s3 +; CHECK-MVE-NEXT: vcmp.f16 s4, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s4, s3 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r0, #1 -; CHECK-MVE-NEXT: vmov r1, s6 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s6 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -3631,98 +3439,86 @@ ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s4, s0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s5, s12 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s4, s0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it gt -; CHECK-MVE-NEXT: movgt r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s1 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s13 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s13 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s1 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s2 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s14 -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s14 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s2 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s3 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vcmp.f16 s4, s0 -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s15 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s3 +; CHECK-MVE-NEXT: vcmp.f16 s4, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s3 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s6, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s6 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -3754,98 +3550,86 @@ ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s4, s0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s5, s12 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s4, s0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it ge -; CHECK-MVE-NEXT: movge r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s1 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s13 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s13 ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s1 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s2 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s14 -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s14 ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s2 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s3 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vcmp.f16 s4, s0 -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s15 +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s3 +; CHECK-MVE-NEXT: vcmp.f16 s4, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s3 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s6, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s6 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -3877,98 +3661,86 @@ ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s4, s0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s5, s12 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s4, s0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it mi -; CHECK-MVE-NEXT: movmi r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s1 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s13 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s13 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s1 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s2 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s14 -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s14 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s2 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s3 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vcmp.f16 s4, s0 -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s15 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s6, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s3 +; CHECK-MVE-NEXT: vcmp.f16 s4, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s3 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s6 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -4000,98 +3772,86 @@ ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s4, s0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s5, s12 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s4, s0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it ls -; CHECK-MVE-NEXT: movls r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s1 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s13 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s13 ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s1 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s2 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s14 -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s14 ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s2 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s3 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vcmp.f16 s4, s0 -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s15 ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s6, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s3 +; CHECK-MVE-NEXT: vcmp.f16 s4, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s3 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s6 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -4126,111 +3886,99 @@ ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s4, s0 ; CHECK-MVE-NEXT: vmovx.f16 s5, s12 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s0 +; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq -; CHECK-MVE-NEXT: moveq r2, #1 -; CHECK-MVE-NEXT: it vs -; CHECK-MVE-NEXT: movvs r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s1 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s13 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 -; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 -; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s5, s13 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s4, s2 +; CHECK-MVE-NEXT: vcmp.f16 s4, s1 ; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s14 -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s5, s14 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s4, s3 +; CHECK-MVE-NEXT: vcmp.f16 s4, s2 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 +; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vcmp.f16 s4, s0 -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s5, s15 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s3 +; CHECK-MVE-NEXT: vcmp.f16 s4, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s4, s3 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r0, #1 -; CHECK-MVE-NEXT: vmov r1, s6 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s6 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -4263,98 +4011,86 @@ ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s4, s0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s5, s12 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s4, s0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it ne -; CHECK-MVE-NEXT: movne r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s1 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s13 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s13 ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s1 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s2 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s14 -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s14 ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s2 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s3 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vcmp.f16 s4, s0 -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s15 ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s6, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s3 +; CHECK-MVE-NEXT: vcmp.f16 s4, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s3 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s6 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -4386,98 +4122,86 @@ ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s4, s0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s5, s12 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s4, s0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it hi -; CHECK-MVE-NEXT: movhi r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s1 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s13 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s13 ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s1 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s2 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s14 -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s14 +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s2 +; CHECK-MVE-NEXT: vcmp.f16 s4, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s10 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s2 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmovx.f16 s6, s2 -; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s15 ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s3 -; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s3 +; CHECK-MVE-NEXT: vcmp.f16 s4, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vcmp.f16 s4, s0 -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s3 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s6, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s6 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -4509,98 +4233,86 @@ ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s4, s0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s5, s12 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s4, s0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it pl -; CHECK-MVE-NEXT: movpl r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s1 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s13 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s13 ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s1 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s2 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s14 -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s14 ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s2 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s3 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vcmp.f16 s4, s0 -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s15 +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s3 +; CHECK-MVE-NEXT: vcmp.f16 s4, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s3 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s6, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s6 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -4632,98 +4344,86 @@ ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s4, s0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s5, s12 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s4, s0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it lt -; CHECK-MVE-NEXT: movlt r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s1 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s13 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s13 ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s1 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s2 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s14 -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s14 ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s2 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s3 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vcmp.f16 s4, s0 -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s15 +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s3 +; CHECK-MVE-NEXT: vcmp.f16 s4, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s3 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s6, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s6 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -4755,98 +4455,86 @@ ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s4, s0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s5, s12 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s4, s0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it le -; CHECK-MVE-NEXT: movle r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s1 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s13 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s13 ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s1 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s2 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s14 -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s14 ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s2 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s3 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vcmp.f16 s4, s0 -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s15 +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s3 +; CHECK-MVE-NEXT: vcmp.f16 s4, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s3 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s6, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s6 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -4878,98 +4566,86 @@ ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s4, s0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s5, s12 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s4, s0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it vc -; CHECK-MVE-NEXT: movvc r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s1 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s13 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s13 ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s1 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s2 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s14 -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s14 ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s2 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s3 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vcmp.f16 s4, s0 -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s15 +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s3 +; CHECK-MVE-NEXT: vcmp.f16 s4, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s3 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s6, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s6 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -5002,98 +4678,86 @@ ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s4, s0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s5, s12 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s4, s0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it vs -; CHECK-MVE-NEXT: movvs r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s1 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s13 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s13 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s1 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s2 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s14 -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s14 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s2 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s4, s3 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vcmp.f16 s4, s0 -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s15 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s3 +; CHECK-MVE-NEXT: vcmp.f16 s4, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s4, s3 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s6, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s6 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -5128,98 +4792,86 @@ ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s5, s12 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it eq -; CHECK-MVE-NEXT: moveq r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, s4 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s13 -; CHECK-MVE-NEXT: vmov.16 q4[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q4[1], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s13 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s13, s9 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s16, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmov.16 q4[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, s4 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmovx.f16 s5, s14 -; CHECK-MVE-NEXT: vmov.16 q4[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s14 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s6, s14, s10 -; CHECK-MVE-NEXT: vmov r1, s6 +; CHECK-MVE-NEXT: vseleq.f16 s17, s13, s9 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s17, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmov.16 q4[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, s4 ; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: vmov.16 q4[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s15 +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s14, s10 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s18, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s3 +; CHECK-MVE-NEXT: vcmp.f16 s6, s4 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, s4 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s15 -; CHECK-MVE-NEXT: vseleq.f16 s6, s15, s11 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q4[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q4[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s19, s15, s11 +; CHECK-MVE-NEXT: vins.f16 s19, s6 ; CHECK-MVE-NEXT: vmov q0, q4 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr Index: llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll +++ llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll @@ -769,98 +769,86 @@ ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it eq -; CHECK-MVE-NEXT: moveq r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s12 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s12 ; CHECK-MVE-NEXT: vmovx.f16 s18, s9 -; CHECK-MVE-NEXT: vmov.16 q3[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: vseleq.f16 s16, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmovx.f16 s18, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s13, s9, s5 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s13, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s11 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s7 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s11 -; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vseleq.f16 s14, s10, s6 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s14, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s3 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s15, s11, s7 +; CHECK-MVE-NEXT: vins.f16 s15, s16 ; CHECK-MVE-NEXT: vmov q0, q3 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -892,111 +880,99 @@ ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it mi -; CHECK-MVE-NEXT: movmi r2, #1 -; CHECK-MVE-NEXT: it gt -; CHECK-MVE-NEXT: movgt r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s12 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s12 -; CHECK-MVE-NEXT: vmovx.f16 s18, s9 -; CHECK-MVE-NEXT: vmov.16 q3[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 -; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s9 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s2, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmovx.f16 s18, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s13, s9, s5 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s13, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s3, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s0, s7 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s11 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vseleq.f16 s14, s10, s6 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s14, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s3 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r0, #1 -; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s15, s11, s7 +; CHECK-MVE-NEXT: vins.f16 s15, s16 ; CHECK-MVE-NEXT: vmov q0, q3 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -1026,98 +1002,86 @@ ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it gt -; CHECK-MVE-NEXT: movgt r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s12 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s12 ; CHECK-MVE-NEXT: vmovx.f16 s18, s9 -; CHECK-MVE-NEXT: vmov.16 q3[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: vseleq.f16 s16, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmovx.f16 s18, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s13, s9, s5 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s13, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s11 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s14, s10, s6 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s14, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s3 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s7 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s11 -; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s15, s11, s7 +; CHECK-MVE-NEXT: vins.f16 s15, s16 ; CHECK-MVE-NEXT: vmov q0, q3 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -1146,98 +1110,86 @@ ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it ge -; CHECK-MVE-NEXT: movge r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s12 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s12 ; CHECK-MVE-NEXT: vmovx.f16 s18, s9 -; CHECK-MVE-NEXT: vmov.16 q3[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: vseleq.f16 s16, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmovx.f16 s18, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s13, s9, s5 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s13, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s11 +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s14, s10, s6 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s14, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s3 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s7 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s11 -; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s15, s11, s7 +; CHECK-MVE-NEXT: vins.f16 s15, s16 ; CHECK-MVE-NEXT: vmov q0, q3 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -1266,98 +1218,86 @@ ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it mi -; CHECK-MVE-NEXT: movmi r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s12 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s12 ; CHECK-MVE-NEXT: vmovx.f16 s18, s9 -; CHECK-MVE-NEXT: vmov.16 q3[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: vseleq.f16 s16, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmovx.f16 s18, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s13, s9, s5 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s13, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s11 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s7 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s11 -; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vseleq.f16 s14, s10, s6 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s14, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s3 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s15, s11, s7 +; CHECK-MVE-NEXT: vins.f16 s15, s16 ; CHECK-MVE-NEXT: vmov q0, q3 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -1386,98 +1326,86 @@ ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it ls -; CHECK-MVE-NEXT: movls r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s12 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s12 ; CHECK-MVE-NEXT: vmovx.f16 s18, s9 -; CHECK-MVE-NEXT: vmov.16 q3[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: vseleq.f16 s16, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmovx.f16 s18, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s13, s9, s5 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s13, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s11 ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s7 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s11 -; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vseleq.f16 s14, s10, s6 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s14, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s3 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s7 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ls -; CHECK-MVE-NEXT: movls r0, #1 -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: movls r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s15, s11, s7 +; CHECK-MVE-NEXT: vins.f16 s15, s16 ; CHECK-MVE-NEXT: vmov q0, q3 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -1509,111 +1437,99 @@ ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it eq -; CHECK-MVE-NEXT: moveq r2, #1 -; CHECK-MVE-NEXT: it vs -; CHECK-MVE-NEXT: movvs r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s12 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s12 -; CHECK-MVE-NEXT: vmovx.f16 s18, s9 -; CHECK-MVE-NEXT: vmov.16 q3[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 -; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s9 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s2, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmovx.f16 s18, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s13, s9, s5 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s13, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s3, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s0, s7 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s11 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s14, s10, s6 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s14, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s3 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r0, #1 -; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s15, s11, s7 +; CHECK-MVE-NEXT: vins.f16 s15, s16 ; CHECK-MVE-NEXT: vmov q0, q3 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -1643,98 +1559,86 @@ ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it ne -; CHECK-MVE-NEXT: movne r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s12 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s12 ; CHECK-MVE-NEXT: vmovx.f16 s18, s9 -; CHECK-MVE-NEXT: vmov.16 q3[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: vseleq.f16 s16, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmovx.f16 s18, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s13, s9, s5 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s13, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s11 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s14, s10, s6 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s14, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s3 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s7 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s11 -; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s15, s11, s7 +; CHECK-MVE-NEXT: vins.f16 s15, s16 ; CHECK-MVE-NEXT: vmov q0, q3 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -1763,98 +1667,86 @@ ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it hi -; CHECK-MVE-NEXT: movhi r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s12 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s12 ; CHECK-MVE-NEXT: vmovx.f16 s18, s9 -; CHECK-MVE-NEXT: vmov.16 q3[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: vseleq.f16 s16, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmovx.f16 s18, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s13, s9, s5 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s13, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s11 +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s14, s10, s6 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s14, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s3 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s7 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s11 -; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s15, s11, s7 +; CHECK-MVE-NEXT: vins.f16 s15, s16 ; CHECK-MVE-NEXT: vmov q0, q3 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -1883,98 +1775,86 @@ ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it pl -; CHECK-MVE-NEXT: movpl r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s12 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s12 ; CHECK-MVE-NEXT: vmovx.f16 s18, s9 -; CHECK-MVE-NEXT: vmov.16 q3[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: vseleq.f16 s16, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmovx.f16 s18, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s13, s9, s5 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s13, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s11 +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s14, s10, s6 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s14, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s3 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s7 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s11 -; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s15, s11, s7 +; CHECK-MVE-NEXT: vins.f16 s15, s16 ; CHECK-MVE-NEXT: vmov q0, q3 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -2003,98 +1883,86 @@ ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it lt -; CHECK-MVE-NEXT: movlt r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s12 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s12 ; CHECK-MVE-NEXT: vmovx.f16 s18, s9 -; CHECK-MVE-NEXT: vmov.16 q3[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: vseleq.f16 s16, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmovx.f16 s18, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s13, s9, s5 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s13, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s11 +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s14, s10, s6 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s14, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s3 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s7 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s11 -; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s15, s11, s7 +; CHECK-MVE-NEXT: vins.f16 s15, s16 ; CHECK-MVE-NEXT: vmov q0, q3 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -2123,98 +1991,86 @@ ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it le -; CHECK-MVE-NEXT: movle r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s12 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s12 ; CHECK-MVE-NEXT: vmovx.f16 s18, s9 -; CHECK-MVE-NEXT: vmov.16 q3[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: vseleq.f16 s16, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmovx.f16 s18, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s13, s9, s5 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s13, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s11 ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s7 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s11 -; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vseleq.f16 s14, s10, s6 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s14, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s3 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s15, s11, s7 +; CHECK-MVE-NEXT: vins.f16 s15, s16 ; CHECK-MVE-NEXT: vmov q0, q3 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -2243,98 +2099,86 @@ ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, s0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, s0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it vc -; CHECK-MVE-NEXT: movvc r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s12 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, s1 -; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s12 ; CHECK-MVE-NEXT: vmovx.f16 s18, s9 -; CHECK-MVE-NEXT: vmov.16 q3[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: vseleq.f16 s16, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 ; CHECK-MVE-NEXT: vcmp.f16 s16, s16 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, s1 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, s2 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmovx.f16 s18, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s13, s9, s5 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s13, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 ; CHECK-MVE-NEXT: vcmp.f16 s16, s16 -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, s2 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, s3 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmp.f16 s0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s11 ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s7 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s11 -; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vseleq.f16 s14, s10, s6 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s14, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s3 +; CHECK-MVE-NEXT: vcmp.f16 s16, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, s3 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s15, s11, s7 +; CHECK-MVE-NEXT: vins.f16 s15, s16 ; CHECK-MVE-NEXT: vmov q0, q3 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -2364,98 +2208,86 @@ ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, s0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, s0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it vs -; CHECK-MVE-NEXT: movvs r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s12 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, s1 -; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s12 ; CHECK-MVE-NEXT: vmovx.f16 s18, s9 -; CHECK-MVE-NEXT: vmov.16 q3[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: vseleq.f16 s16, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 ; CHECK-MVE-NEXT: vcmp.f16 s16, s16 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, s1 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, s2 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmovx.f16 s18, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s13, s9, s5 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s13, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 ; CHECK-MVE-NEXT: vcmp.f16 s16, s16 -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, s2 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, s3 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmp.f16 s0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s11 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s14, s10, s6 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s14, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s3 +; CHECK-MVE-NEXT: vcmp.f16 s16, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, s3 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s7 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s11 -; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s15, s11, s7 +; CHECK-MVE-NEXT: vins.f16 s15, s16 ; CHECK-MVE-NEXT: vmov q0, q3 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -3242,98 +3074,86 @@ ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it eq -; CHECK-MVE-NEXT: moveq r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s12 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s12 ; CHECK-MVE-NEXT: vmovx.f16 s18, s9 -; CHECK-MVE-NEXT: vmov.16 q3[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: vseleq.f16 s16, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmovx.f16 s18, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s13, s9, s5 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s13, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s11 +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s14, s10, s6 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s14, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s3 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s7 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s11 -; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s15, s11, s7 +; CHECK-MVE-NEXT: vins.f16 s15, s16 ; CHECK-MVE-NEXT: vmov q0, q3 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -3365,111 +3185,99 @@ ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it mi -; CHECK-MVE-NEXT: movmi r2, #1 -; CHECK-MVE-NEXT: it gt -; CHECK-MVE-NEXT: movgt r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s12 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s12 -; CHECK-MVE-NEXT: vmovx.f16 s18, s9 -; CHECK-MVE-NEXT: vmov.16 q3[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 -; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s9 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s2, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmovx.f16 s18, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s13, s9, s5 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s13, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s3, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s0, s7 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s11 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s14, s10, s6 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s14, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s3 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r0, #1 -; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s15, s11, s7 +; CHECK-MVE-NEXT: vins.f16 s15, s16 ; CHECK-MVE-NEXT: vmov q0, q3 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -3499,98 +3307,86 @@ ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it mi -; CHECK-MVE-NEXT: movmi r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s12 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s12 ; CHECK-MVE-NEXT: vmovx.f16 s18, s9 -; CHECK-MVE-NEXT: vmov.16 q3[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: vseleq.f16 s16, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmovx.f16 s18, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s13, s9, s5 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s13, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s11 +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s14, s10, s6 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s14, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s3 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s7 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s11 -; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s15, s11, s7 +; CHECK-MVE-NEXT: vins.f16 s15, s16 ; CHECK-MVE-NEXT: vmov q0, q3 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -3619,98 +3415,86 @@ ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it ls -; CHECK-MVE-NEXT: movls r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s12 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s12 ; CHECK-MVE-NEXT: vmovx.f16 s18, s9 -; CHECK-MVE-NEXT: vmov.16 q3[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: vseleq.f16 s16, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmovx.f16 s18, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s13, s9, s5 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s13, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s11 +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s14, s10, s6 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s14, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s3 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s7 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s11 -; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s15, s11, s7 +; CHECK-MVE-NEXT: vins.f16 s15, s16 ; CHECK-MVE-NEXT: vmov q0, q3 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -3739,98 +3523,86 @@ ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it gt -; CHECK-MVE-NEXT: movgt r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s12 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s12 ; CHECK-MVE-NEXT: vmovx.f16 s18, s9 -; CHECK-MVE-NEXT: vmov.16 q3[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: vseleq.f16 s16, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmovx.f16 s18, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s13, s9, s5 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s13, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s11 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s14, s10, s6 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s14, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s3 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s7 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s11 -; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s15, s11, s7 +; CHECK-MVE-NEXT: vins.f16 s15, s16 ; CHECK-MVE-NEXT: vmov q0, q3 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -3859,98 +3631,86 @@ ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it ge -; CHECK-MVE-NEXT: movge r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s12 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s12 ; CHECK-MVE-NEXT: vmovx.f16 s18, s9 -; CHECK-MVE-NEXT: vmov.16 q3[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: vseleq.f16 s16, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmovx.f16 s18, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s13, s9, s5 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s13, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s11 +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s14, s10, s6 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s14, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s3 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s7 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s11 -; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s15, s11, s7 +; CHECK-MVE-NEXT: vins.f16 s15, s16 ; CHECK-MVE-NEXT: vmov q0, q3 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -3982,111 +3742,99 @@ ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it eq -; CHECK-MVE-NEXT: moveq r2, #1 -; CHECK-MVE-NEXT: it vs -; CHECK-MVE-NEXT: movvs r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s12 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s12 -; CHECK-MVE-NEXT: vmovx.f16 s18, s9 -; CHECK-MVE-NEXT: vmov.16 q3[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 -; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s9 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s2, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmovx.f16 s18, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s13, s9, s5 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s13, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 -; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmp.f16 s3, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r1, #1 -; CHECK-MVE-NEXT: vmovx.f16 s0, s7 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s11 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s14, s10, s6 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s14, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s3 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it eq ; CHECK-MVE-NEXT: moveq r0, #1 -; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s15, s11, s7 +; CHECK-MVE-NEXT: vins.f16 s15, s16 ; CHECK-MVE-NEXT: vmov q0, q3 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -4116,98 +3864,86 @@ ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it ne -; CHECK-MVE-NEXT: movne r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s12 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s12 ; CHECK-MVE-NEXT: vmovx.f16 s18, s9 -; CHECK-MVE-NEXT: vmov.16 q3[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: vseleq.f16 s16, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmovx.f16 s18, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s13, s9, s5 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s13, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 -; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s11 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s14, s10, s6 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s14, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s3 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s7 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s11 -; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ne ; CHECK-MVE-NEXT: movne r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s15, s11, s7 +; CHECK-MVE-NEXT: vins.f16 s15, s16 ; CHECK-MVE-NEXT: vmov q0, q3 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -4236,98 +3972,86 @@ ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it lt -; CHECK-MVE-NEXT: movlt r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s12 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s12 ; CHECK-MVE-NEXT: vmovx.f16 s18, s9 -; CHECK-MVE-NEXT: vmov.16 q3[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: vseleq.f16 s16, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmovx.f16 s18, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s13, s9, s5 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s13, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s11 +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s14, s10, s6 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s14, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s3 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s7 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s11 -; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s15, s11, s7 +; CHECK-MVE-NEXT: vins.f16 s15, s16 ; CHECK-MVE-NEXT: vmov q0, q3 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -4356,98 +4080,86 @@ ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it le -; CHECK-MVE-NEXT: movle r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s12 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s12 ; CHECK-MVE-NEXT: vmovx.f16 s18, s9 -; CHECK-MVE-NEXT: vmov.16 q3[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: vseleq.f16 s16, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmovx.f16 s18, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s13, s9, s5 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s13, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s11 +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s14, s10, s6 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s14, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s3 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s7 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s11 -; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s15, s11, s7 +; CHECK-MVE-NEXT: vins.f16 s15, s16 ; CHECK-MVE-NEXT: vmov q0, q3 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -4476,98 +4188,86 @@ ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it hi -; CHECK-MVE-NEXT: movhi r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s12 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s12 ; CHECK-MVE-NEXT: vmovx.f16 s18, s9 -; CHECK-MVE-NEXT: vmov.16 q3[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: vseleq.f16 s16, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmovx.f16 s18, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s13, s9, s5 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s13, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s11 +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s14, s10, s6 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s14, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s3 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s7 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s11 -; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s15, s11, s7 +; CHECK-MVE-NEXT: vins.f16 s15, s16 ; CHECK-MVE-NEXT: vmov q0, q3 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -4596,98 +4296,86 @@ ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it pl -; CHECK-MVE-NEXT: movpl r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s12 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s12 ; CHECK-MVE-NEXT: vmovx.f16 s18, s9 -; CHECK-MVE-NEXT: vmov.16 q3[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: vseleq.f16 s16, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmovx.f16 s18, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s13, s9, s5 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s13, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 ; CHECK-MVE-NEXT: vcmp.f16 s16, #0 -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s11 +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s14, s10, s6 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s14, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s3 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmovx.f16 s16, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s7 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s11 -; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s15, s11, s7 +; CHECK-MVE-NEXT: vins.f16 s15, s16 ; CHECK-MVE-NEXT: vmov q0, q3 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -4716,98 +4404,86 @@ ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, s0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, s0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it vc -; CHECK-MVE-NEXT: movvc r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s12 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, s1 -; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s12 ; CHECK-MVE-NEXT: vmovx.f16 s18, s9 -; CHECK-MVE-NEXT: vmov.16 q3[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: vseleq.f16 s16, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 ; CHECK-MVE-NEXT: vcmp.f16 s16, s16 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, s1 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, s2 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmovx.f16 s18, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s13, s9, s5 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s13, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 ; CHECK-MVE-NEXT: vcmp.f16 s16, s16 -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, s2 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, s3 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmp.f16 s0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s11 +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s14, s10, s6 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s14, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s3 +; CHECK-MVE-NEXT: vcmp.f16 s16, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, s3 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s7 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s11 -; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s15, s11, s7 +; CHECK-MVE-NEXT: vins.f16 s15, s16 ; CHECK-MVE-NEXT: vmov q0, q3 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr @@ -4837,98 +4513,86 @@ ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmp.f16 s0, s0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, s0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it vs -; CHECK-MVE-NEXT: movvs r2, #1 -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmov r1, s12 -; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmp.f16 s1, s1 -; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s12 ; CHECK-MVE-NEXT: vmovx.f16 s18, s9 -; CHECK-MVE-NEXT: vmov.16 q3[0], r2 -; CHECK-MVE-NEXT: vmovx.f16 s0, s3 -; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: vseleq.f16 s16, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 ; CHECK-MVE-NEXT: vcmp.f16 s16, s16 -; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, s1 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s2, s2 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vmovx.f16 s18, s10 -; CHECK-MVE-NEXT: vmov.16 q3[3], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 -; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vseleq.f16 s13, s9, s5 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s13, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 ; CHECK-MVE-NEXT: vcmp.f16 s16, s16 -; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, s2 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmp.f16 s3, s3 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmp.f16 s0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmovx.f16 s18, s11 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s14, s10, s6 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vins.f16 s14, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s3 +; CHECK-MVE-NEXT: vcmp.f16 s16, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, s3 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vmovx.f16 s0, s7 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vmovx.f16 s2, s11 -; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov.16 q3[6], r1 -; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vseleq.f16 s15, s11, s7 +; CHECK-MVE-NEXT: vins.f16 s15, s16 ; CHECK-MVE-NEXT: vmov q0, q3 ; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr Index: llvm/test/CodeGen/Thumb2/mve-vcvt.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vcvt.ll +++ llvm/test/CodeGen/Thumb2/mve-vcvt.ll @@ -91,47 +91,35 @@ define arm_aapcs_vfpcc <8 x half> @foo_half_int16(<8 x i16> %src) { ; CHECK-MVE-LABEL: foo_half_int16: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vmov.s16 r0, q0[0] -; CHECK-MVE-NEXT: vmov.s16 r1, q0[1] -; CHECK-MVE-NEXT: vmov s4, r0 -; CHECK-MVE-NEXT: vcvt.f16.s32 s4, s4 -; CHECK-MVE-NEXT: vmov r0, s4 -; CHECK-MVE-NEXT: vmov s4, r1 -; CHECK-MVE-NEXT: vcvt.f16.s32 s4, s4 -; CHECK-MVE-NEXT: vmov r1, s4 -; CHECK-MVE-NEXT: vmov.16 q1[0], r0 -; CHECK-MVE-NEXT: vmov.s16 r0, q0[2] -; CHECK-MVE-NEXT: vmov.16 q1[1], r1 -; CHECK-MVE-NEXT: vmov s8, r0 -; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmov.16 q1[2], r0 -; CHECK-MVE-NEXT: vmov.s16 r0, q0[3] -; CHECK-MVE-NEXT: vmov s8, r0 -; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmov.16 q1[3], r0 -; CHECK-MVE-NEXT: vmov.s16 r0, q0[4] +; CHECK-MVE-NEXT: vmov q1, q0 +; CHECK-MVE-NEXT: vmov.s16 r0, q0[1] +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vmov.s16 r0, q1[0] +; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s0 +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vcvt.f16.s32 s0, s0 +; CHECK-MVE-NEXT: vmov.s16 r0, q1[3] +; CHECK-MVE-NEXT: vins.f16 s0, s8 ; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vmov.s16 r0, q1[2] ; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmov.16 q1[4], r0 -; CHECK-MVE-NEXT: vmov.s16 r0, q0[5] +; CHECK-MVE-NEXT: vmov s10, r0 +; CHECK-MVE-NEXT: vmov.s16 r0, q1[5] +; CHECK-MVE-NEXT: vcvt.f16.s32 s1, s10 +; CHECK-MVE-NEXT: vins.f16 s1, s8 ; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vmov.s16 r0, q1[4] ; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmov.16 q1[5], r0 -; CHECK-MVE-NEXT: vmov.s16 r0, q0[6] +; CHECK-MVE-NEXT: vmov s10, r0 +; CHECK-MVE-NEXT: vmov.s16 r0, q1[7] +; CHECK-MVE-NEXT: vcvt.f16.s32 s2, s10 +; CHECK-MVE-NEXT: vins.f16 s2, s8 ; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vmov.s16 r0, q1[6] ; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmov.16 q1[6], r0 -; CHECK-MVE-NEXT: vmov.s16 r0, q0[7] -; CHECK-MVE-NEXT: vmov s0, r0 -; CHECK-MVE-NEXT: vcvt.f16.s32 s0, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q1[7], r0 -; CHECK-MVE-NEXT: vmov q0, q1 +; CHECK-MVE-NEXT: vmov s4, r0 +; CHECK-MVE-NEXT: vcvt.f16.s32 s3, s4 +; CHECK-MVE-NEXT: vins.f16 s3, s8 ; CHECK-MVE-NEXT: bx lr ; ; CHECK-MVEFP-LABEL: foo_half_int16: @@ -146,47 +134,35 @@ define arm_aapcs_vfpcc <8 x half> @foo_half_uint16(<8 x i16> %src) { ; CHECK-MVE-LABEL: foo_half_uint16: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vmov.u16 r0, q0[0] -; CHECK-MVE-NEXT: vmov.u16 r1, q0[1] -; CHECK-MVE-NEXT: vmov s4, r0 -; CHECK-MVE-NEXT: vcvt.f16.u32 s4, s4 -; CHECK-MVE-NEXT: vmov r0, s4 -; CHECK-MVE-NEXT: vmov s4, r1 -; CHECK-MVE-NEXT: vcvt.f16.u32 s4, s4 -; CHECK-MVE-NEXT: vmov r1, s4 -; CHECK-MVE-NEXT: vmov.16 q1[0], r0 -; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] -; CHECK-MVE-NEXT: vmov.16 q1[1], r1 -; CHECK-MVE-NEXT: vmov s8, r0 -; CHECK-MVE-NEXT: vcvt.f16.u32 s8, s8 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmov.16 q1[2], r0 -; CHECK-MVE-NEXT: vmov.u16 r0, q0[3] -; CHECK-MVE-NEXT: vmov s8, r0 -; CHECK-MVE-NEXT: vcvt.f16.u32 s8, s8 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmov.16 q1[3], r0 -; CHECK-MVE-NEXT: vmov.u16 r0, q0[4] +; CHECK-MVE-NEXT: vmov q1, q0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[1] +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q1[0] +; CHECK-MVE-NEXT: vcvt.f16.u32 s8, s0 +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-MVE-NEXT: vmov.u16 r0, q1[3] +; CHECK-MVE-NEXT: vins.f16 s0, s8 ; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q1[2] ; CHECK-MVE-NEXT: vcvt.f16.u32 s8, s8 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmov.16 q1[4], r0 -; CHECK-MVE-NEXT: vmov.u16 r0, q0[5] +; CHECK-MVE-NEXT: vmov s10, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q1[5] +; CHECK-MVE-NEXT: vcvt.f16.u32 s1, s10 +; CHECK-MVE-NEXT: vins.f16 s1, s8 ; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q1[4] ; CHECK-MVE-NEXT: vcvt.f16.u32 s8, s8 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmov.16 q1[5], r0 -; CHECK-MVE-NEXT: vmov.u16 r0, q0[6] +; CHECK-MVE-NEXT: vmov s10, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q1[7] +; CHECK-MVE-NEXT: vcvt.f16.u32 s2, s10 +; CHECK-MVE-NEXT: vins.f16 s2, s8 ; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q1[6] ; CHECK-MVE-NEXT: vcvt.f16.u32 s8, s8 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: vmov.16 q1[6], r0 -; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] -; CHECK-MVE-NEXT: vmov s0, r0 -; CHECK-MVE-NEXT: vcvt.f16.u32 s0, s0 -; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vmov.16 q1[7], r0 -; CHECK-MVE-NEXT: vmov q0, q1 +; CHECK-MVE-NEXT: vmov s4, r0 +; CHECK-MVE-NEXT: vcvt.f16.u32 s3, s4 +; CHECK-MVE-NEXT: vins.f16 s3, s8 ; CHECK-MVE-NEXT: bx lr ; ; CHECK-MVEFP-LABEL: foo_half_uint16: Index: llvm/test/CodeGen/Thumb2/mve-vld2.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vld2.ll +++ llvm/test/CodeGen/Thumb2/mve-vld2.ll @@ -205,41 +205,33 @@ define void @vld2_v8i16_align1(<16 x i16> *%src, <8 x i16> *%dst) { ; CHECK-LABEL: vld2_v8i16_align1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q1, [r0] -; CHECK-NEXT: vldrb.u8 q2, [r0, #16] -; CHECK-NEXT: vmov.u16 r2, q1[1] +; CHECK-NEXT: vldrb.u8 q2, [r0] +; CHECK-NEXT: vldrb.u8 q0, [r0, #16] +; CHECK-NEXT: vmov.f64 d2, d4 ; CHECK-NEXT: vmov.u16 r0, q2[1] -; CHECK-NEXT: vmov.16 q0[0], r2 -; CHECK-NEXT: vmov.u16 r2, q1[3] -; CHECK-NEXT: vmov.16 q0[1], r2 -; CHECK-NEXT: vmov.u16 r2, q1[5] -; CHECK-NEXT: vmov.16 q0[2], r2 -; CHECK-NEXT: vmov.u16 r2, q1[7] -; CHECK-NEXT: vmov.16 q0[3], r2 -; CHECK-NEXT: vmov.16 q0[4], r0 -; CHECK-NEXT: vmov.u16 r0, q2[3] -; CHECK-NEXT: vmov.16 q0[5], r0 -; CHECK-NEXT: vmov.u16 r0, q2[5] -; CHECK-NEXT: vmov.16 q0[6], r0 -; CHECK-NEXT: vmov.u16 r0, q2[7] -; CHECK-NEXT: vmov.16 q0[7], r0 -; CHECK-NEXT: vmov.u16 r0, q1[0] ; CHECK-NEXT: vmov.16 q3[0], r0 -; CHECK-NEXT: vmov.u16 r0, q1[2] +; CHECK-NEXT: vmov.u16 r0, q2[3] ; CHECK-NEXT: vmov.16 q3[1], r0 -; CHECK-NEXT: vmov.u16 r0, q1[4] +; CHECK-NEXT: vmov.u16 r0, q2[5] ; CHECK-NEXT: vmov.16 q3[2], r0 -; CHECK-NEXT: vmov.u16 r0, q1[6] +; CHECK-NEXT: vmov.u16 r0, q2[7] +; CHECK-NEXT: vins.f16 s4, s9 ; CHECK-NEXT: vmov.16 q3[3], r0 -; CHECK-NEXT: vmov.u16 r0, q2[0] +; CHECK-NEXT: vmov.f32 s5, s10 +; CHECK-NEXT: vmov.u16 r0, q0[1] +; CHECK-NEXT: vins.f16 s5, s11 ; CHECK-NEXT: vmov.16 q3[4], r0 -; CHECK-NEXT: vmov.u16 r0, q2[2] +; CHECK-NEXT: vmov.f32 s6, s0 +; CHECK-NEXT: vmov.u16 r0, q0[3] +; CHECK-NEXT: vins.f16 s6, s1 ; CHECK-NEXT: vmov.16 q3[5], r0 -; CHECK-NEXT: vmov.u16 r0, q2[4] +; CHECK-NEXT: vmov.u16 r0, q0[5] +; CHECK-NEXT: vmov.f32 s7, s2 ; CHECK-NEXT: vmov.16 q3[6], r0 -; CHECK-NEXT: vmov.u16 r0, q2[6] +; CHECK-NEXT: vmov.u16 r0, q0[7] +; CHECK-NEXT: vins.f16 s7, s3 ; CHECK-NEXT: vmov.16 q3[7], r0 -; CHECK-NEXT: vadd.i16 q0, q3, q0 +; CHECK-NEXT: vadd.i16 q0, q1, q3 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -559,16 +551,10 @@ ; CHECK-NEXT: vmov.32 q0[0], r2 ; CHECK-NEXT: vmov.32 q0[1], r0 ; CHECK-NEXT: vmovx.f16 s4, s1 -; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vmovx.f16 s4, s0 -; CHECK-NEXT: vmov r2, s4 -; CHECK-NEXT: vmov.16 q1[0], r2 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmov.16 q1[1], r0 -; CHECK-NEXT: vmov r0, s1 -; CHECK-NEXT: vmov.16 q0[0], r2 -; CHECK-NEXT: vmov.16 q0[1], r0 -; CHECK-NEXT: vadd.f16 q0, q0, q1 +; CHECK-NEXT: vmovx.f16 s8, s0 +; CHECK-NEXT: vins.f16 s8, s4 +; CHECK-NEXT: vins.f16 s0, s1 +; CHECK-NEXT: vadd.f16 q0, q0, q2 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: str r0, [r1] ; CHECK-NEXT: bx lr @@ -585,27 +571,16 @@ ; CHECK-LABEL: vld2_v4f16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r0] -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmovx.f16 s8, s0 -; CHECK-NEXT: vmov r0, s1 -; CHECK-NEXT: vmov.16 q1[0], r2 -; CHECK-NEXT: vmov.16 q1[1], r0 -; CHECK-NEXT: vmov r0, s2 -; CHECK-NEXT: vmov.16 q1[2], r0 -; CHECK-NEXT: vmov r0, s8 +; CHECK-NEXT: vmovx.f16 s4, s0 +; CHECK-NEXT: vins.f16 s0, s1 ; CHECK-NEXT: vmovx.f16 s8, s1 -; CHECK-NEXT: vmovx.f16 s12, s2 -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: vmov.16 q2[0], r0 -; CHECK-NEXT: vmov r0, s12 -; CHECK-NEXT: vmov.16 q2[1], r2 -; CHECK-NEXT: vmovx.f16 s12, s3 -; CHECK-NEXT: vmov.16 q2[2], r0 -; CHECK-NEXT: vmov r0, s12 -; CHECK-NEXT: vmov.16 q2[3], r0 -; CHECK-NEXT: vmov r0, s3 -; CHECK-NEXT: vmov.16 q1[3], r0 -; CHECK-NEXT: vadd.f16 q0, q1, q2 +; CHECK-NEXT: vins.f16 s4, s8 +; CHECK-NEXT: vmovx.f16 s8, s3 +; CHECK-NEXT: vmovx.f16 s5, s2 +; CHECK-NEXT: vmov.f32 s1, s2 +; CHECK-NEXT: vins.f16 s5, s8 +; CHECK-NEXT: vins.f16 s1, s3 +; CHECK-NEXT: vadd.f16 q0, q0, q1 ; CHECK-NEXT: vmov r2, s1 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: strd r0, r2, [r1] @@ -662,49 +637,29 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .vsave {d8} ; CHECK-NEXT: vpush {d8} -; CHECK-NEXT: vldrb.u8 q2, [r0] +; CHECK-NEXT: vldrb.u8 q3, [r0] ; CHECK-NEXT: vldrb.u8 q1, [r0, #16] -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: vmovx.f16 s12, s8 -; CHECK-NEXT: vmov.16 q0[0], r2 -; CHECK-NEXT: vmov r3, s9 -; CHECK-NEXT: vmov.16 q0[1], r3 -; CHECK-NEXT: vmov r2, s10 -; CHECK-NEXT: vmov.16 q0[2], r2 -; CHECK-NEXT: vmov r2, s11 -; CHECK-NEXT: vmov.16 q0[3], r2 -; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vmov.16 q0[4], r0 -; CHECK-NEXT: vmov r0, s5 -; CHECK-NEXT: vmov.16 q0[5], r0 -; CHECK-NEXT: vmov r0, s6 -; CHECK-NEXT: vmov.16 q0[6], r0 -; CHECK-NEXT: vmov r0, s12 -; CHECK-NEXT: vmovx.f16 s12, s9 -; CHECK-NEXT: vmovx.f16 s16, s10 -; CHECK-NEXT: vmov r2, s12 -; CHECK-NEXT: vmov.16 q3[0], r0 -; CHECK-NEXT: vmov.16 q3[1], r2 -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: vmovx.f16 s8, s11 -; CHECK-NEXT: vmov.16 q3[2], r0 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmovx.f16 s8, s4 -; CHECK-NEXT: vmov.16 q3[3], r0 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmovx.f16 s8, s5 -; CHECK-NEXT: vmov.16 q3[4], r0 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmovx.f16 s8, s6 -; CHECK-NEXT: vmov.16 q3[5], r0 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmovx.f16 s8, s7 -; CHECK-NEXT: vmov.16 q3[6], r0 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmov.16 q3[7], r0 -; CHECK-NEXT: vmov r0, s7 -; CHECK-NEXT: vmov.16 q0[7], r0 -; CHECK-NEXT: vadd.f16 q0, q0, q3 +; CHECK-NEXT: vmov.f64 d0, d6 +; CHECK-NEXT: vmovx.f16 s16, s13 +; CHECK-NEXT: vmovx.f16 s8, s12 +; CHECK-NEXT: vins.f16 s8, s16 +; CHECK-NEXT: vmovx.f16 s16, s15 +; CHECK-NEXT: vmovx.f16 s9, s14 +; CHECK-NEXT: vmovx.f16 s12, s5 +; CHECK-NEXT: vins.f16 s0, s13 +; CHECK-NEXT: vins.f16 s9, s16 +; CHECK-NEXT: vmov.f32 s1, s14 +; CHECK-NEXT: vmovx.f16 s10, s4 +; CHECK-NEXT: vins.f16 s1, s15 +; CHECK-NEXT: vins.f16 s10, s12 +; CHECK-NEXT: vmov.f32 s2, s4 +; CHECK-NEXT: vmovx.f16 s12, s7 +; CHECK-NEXT: vmovx.f16 s11, s6 +; CHECK-NEXT: vins.f16 s2, s5 +; CHECK-NEXT: vins.f16 s6, s7 +; CHECK-NEXT: vins.f16 s11, s12 +; CHECK-NEXT: vmov.f32 s3, s6 +; CHECK-NEXT: vadd.f16 q0, q0, q2 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: vpop {d8} ; CHECK-NEXT: bx lr Index: llvm/test/CodeGen/Thumb2/mve-vld3.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vld3.ll +++ llvm/test/CodeGen/Thumb2/mve-vld3.ll @@ -330,73 +330,67 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vldrw.u32 q2, [r0, #32] +; CHECK-NEXT: vldrw.u32 q2, [r0] +; CHECK-NEXT: vldrw.u32 q0, [r0, #32] ; CHECK-NEXT: vldrw.u32 q1, [r0, #16] -; CHECK-NEXT: vmov.u16 r2, q0[2] -; CHECK-NEXT: vmov.u16 r0, q2[4] -; CHECK-NEXT: vmov.16 q4[0], r2 -; CHECK-NEXT: vmov.u16 r2, q0[5] -; CHECK-NEXT: vmov.16 q4[1], r2 -; CHECK-NEXT: vmov.u16 r2, q1[0] +; CHECK-NEXT: vmov.u16 r0, q2[2] +; CHECK-NEXT: vmov.16 q4[0], r0 +; CHECK-NEXT: vmov.u16 r0, q2[5] +; CHECK-NEXT: vmov.16 q4[1], r0 +; CHECK-NEXT: vmov.u16 r0, q1[0] +; CHECK-NEXT: vmov.16 q4[2], r0 +; CHECK-NEXT: vmov.u16 r0, q1[3] +; CHECK-NEXT: vmov.16 q4[3], r0 +; CHECK-NEXT: vmov.u16 r0, q0[4] ; CHECK-NEXT: vmov.16 q5[6], r0 -; CHECK-NEXT: vmov.u16 r0, q2[7] +; CHECK-NEXT: vmov.u16 r0, q0[7] ; CHECK-NEXT: vmov.16 q5[7], r0 -; CHECK-NEXT: vmov.16 q4[2], r2 -; CHECK-NEXT: vmov.u16 r2, q1[3] -; CHECK-NEXT: vmov.f32 s22, s8 -; CHECK-NEXT: vmov.16 q4[3], r2 -; CHECK-NEXT: vmov q3, q5 ; CHECK-NEXT: vmov.f32 s18, s7 -; CHECK-NEXT: vmovnb.i32 q3, q4 +; CHECK-NEXT: vmov.f32 s22, s0 +; CHECK-NEXT: vmov q3, q5 ; CHECK-NEXT: vmov r2, s16 +; CHECK-NEXT: vmovnb.i32 q3, q4 ; CHECK-NEXT: vmov r0, s14 ; CHECK-NEXT: vmov q3[2], q3[0], r2, r0 ; CHECK-NEXT: vmov r0, s23 ; CHECK-NEXT: vmov r2, s17 ; CHECK-NEXT: vmov q3[3], q3[1], r2, r0 -; CHECK-NEXT: vmov.u16 r0, q0[0] +; CHECK-NEXT: vmov.u16 r0, q2[0] ; CHECK-NEXT: vmov.16 q4[0], r0 -; CHECK-NEXT: vmov.u16 r0, q0[3] +; CHECK-NEXT: vmov.u16 r0, q2[3] ; CHECK-NEXT: vmov.16 q4[1], r0 -; CHECK-NEXT: vmov.u16 r0, q0[6] +; CHECK-NEXT: vmov.u16 r0, q2[6] ; CHECK-NEXT: vmov.16 q4[2], r0 ; CHECK-NEXT: vmov.u16 r0, q1[1] ; CHECK-NEXT: vmov.16 q4[3], r0 ; CHECK-NEXT: vmov.u16 r0, q1[4] ; CHECK-NEXT: vmov.16 q4[4], r0 -; CHECK-NEXT: vmov.u16 r0, q2[2] +; CHECK-NEXT: vmov.u16 r0, q0[2] ; CHECK-NEXT: vmov.16 q5[6], r0 -; CHECK-NEXT: vmov.u16 r0, q2[5] +; CHECK-NEXT: vmov.u16 r0, q0[5] ; CHECK-NEXT: vmov.16 q5[7], r0 ; CHECK-NEXT: vmov.u16 r0, q1[7] ; CHECK-NEXT: vmov.16 q4[5], r0 -; CHECK-NEXT: vmov.u16 r0, q0[1] -; CHECK-NEXT: vmov.f32 s19, s23 -; CHECK-NEXT: vmov.16 q5[0], r0 -; CHECK-NEXT: vmov.u16 r0, q0[4] -; CHECK-NEXT: vmov.16 q5[1], r0 -; CHECK-NEXT: vmov.u16 r0, q0[7] -; CHECK-NEXT: vmov.16 q5[2], r0 -; CHECK-NEXT: vmov.u16 r0, q1[2] -; CHECK-NEXT: vmov.16 q5[3], r0 ; CHECK-NEXT: vmov.u16 r0, q1[5] +; CHECK-NEXT: vmov.f32 s19, s23 +; CHECK-NEXT: vmovx.f16 s20, s8 +; CHECK-NEXT: vins.f16 s20, s10 +; CHECK-NEXT: vmovx.f16 s21, s11 +; CHECK-NEXT: vins.f16 s21, s5 ; CHECK-NEXT: vmov.16 q5[4], r0 -; CHECK-NEXT: vmov.u16 r0, q2[0] -; CHECK-NEXT: vmov.16 q0[5], r0 -; CHECK-NEXT: vmov.u16 r0, q2[3] -; CHECK-NEXT: vmov.16 q0[6], r0 -; CHECK-NEXT: vmov.u16 r0, q2[6] -; CHECK-NEXT: vmov.16 q0[7], r0 +; CHECK-NEXT: vmov.u16 r0, q0[0] +; CHECK-NEXT: vmov.16 q1[5], r0 ; CHECK-NEXT: vmov r2, s20 -; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: vmovnb.i32 q1, q5 -; CHECK-NEXT: vmov r0, s6 -; CHECK-NEXT: vmov q1[2], q1[0], r2, r0 -; CHECK-NEXT: vmov r0, s3 +; CHECK-NEXT: vmovx.f16 s7, s1 +; CHECK-NEXT: vins.f16 s7, s3 +; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: vmovnb.i32 q0, q5 +; CHECK-NEXT: vmov r0, s2 +; CHECK-NEXT: vmov q0[2], q0[0], r2, r0 +; CHECK-NEXT: vmov r0, s7 ; CHECK-NEXT: vmov r2, s21 -; CHECK-NEXT: vmov q1[3], q1[1], r2, r0 -; CHECK-NEXT: vadd.i16 q0, q4, q1 +; CHECK-NEXT: vmov q0[3], q0[1], r2, r0 +; CHECK-NEXT: vadd.i16 q0, q4, q0 ; CHECK-NEXT: vadd.i16 q0, q0, q3 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: vpop {d8, d9, d10, d11} @@ -417,144 +411,132 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vldrw.u32 q0, [r0, #48] -; CHECK-NEXT: vldrw.u32 q1, [r0, #64] -; CHECK-NEXT: vldrw.u32 q2, [r0, #80] -; CHECK-NEXT: vmov.u16 r2, q0[2] -; CHECK-NEXT: vmov.16 q4[0], r2 -; CHECK-NEXT: vmov.u16 r2, q0[5] -; CHECK-NEXT: vmov.16 q4[1], r2 -; CHECK-NEXT: vmov.u16 r2, q1[0] -; CHECK-NEXT: vmov.16 q4[2], r2 -; CHECK-NEXT: vmov.u16 r2, q1[3] -; CHECK-NEXT: vmov.16 q4[3], r2 +; CHECK-NEXT: vldrw.u32 q3, [r0, #48] +; CHECK-NEXT: vldrw.u32 q2, [r0, #64] +; CHECK-NEXT: vldrw.u32 q4, [r0, #80] +; CHECK-NEXT: vldrw.u32 q0, [r0, #16] +; CHECK-NEXT: vmov.u16 r2, q3[0] +; CHECK-NEXT: vmov.16 q1[0], r2 +; CHECK-NEXT: vmov.u16 r2, q3[3] +; CHECK-NEXT: vmov.16 q1[1], r2 +; CHECK-NEXT: vmov.u16 r2, q3[6] +; CHECK-NEXT: vmov.16 q1[2], r2 +; CHECK-NEXT: vmov.u16 r2, q2[1] +; CHECK-NEXT: vmov.16 q1[3], r2 ; CHECK-NEXT: vmov.u16 r2, q2[4] +; CHECK-NEXT: vmov.16 q1[4], r2 +; CHECK-NEXT: vmov.u16 r2, q4[2] ; CHECK-NEXT: vmov.16 q5[6], r2 -; CHECK-NEXT: vmov.u16 r2, q2[7] +; CHECK-NEXT: vmov.u16 r2, q4[5] ; CHECK-NEXT: vmov.16 q5[7], r2 -; CHECK-NEXT: vmov.f32 s18, s7 -; CHECK-NEXT: vmov.f32 s22, s8 -; CHECK-NEXT: vmov q3, q5 +; CHECK-NEXT: vmov.u16 r2, q2[7] +; CHECK-NEXT: vmov.16 q1[5], r2 +; CHECK-NEXT: vmov.u16 r2, q4[4] +; CHECK-NEXT: vmov.16 q6[6], r2 +; CHECK-NEXT: vmov.u16 r2, q3[2] +; CHECK-NEXT: vmov.f32 s7, s23 +; CHECK-NEXT: vmov.16 q5[0], r2 +; CHECK-NEXT: vmov.u16 r2, q3[5] +; CHECK-NEXT: vmov.16 q5[1], r2 +; CHECK-NEXT: vmov.u16 r2, q2[0] +; CHECK-NEXT: vmov.16 q5[2], r2 +; CHECK-NEXT: vmov.u16 r2, q2[3] +; CHECK-NEXT: vmov.16 q5[3], r2 +; CHECK-NEXT: vmov.u16 r2, q4[7] +; CHECK-NEXT: vmov.16 q6[7], r2 +; CHECK-NEXT: vmov.f32 s22, s11 +; CHECK-NEXT: vmov.f32 s26, s16 +; CHECK-NEXT: vmov q7, q6 +; CHECK-NEXT: vmov r3, s20 +; CHECK-NEXT: vmovnb.i32 q7, q5 +; CHECK-NEXT: vmov r12, s27 +; CHECK-NEXT: vmov r2, s30 +; CHECK-NEXT: vmov q6[2], q6[0], r3, r2 +; CHECK-NEXT: vmov r2, s21 +; CHECK-NEXT: vmov q6[3], q6[1], r2, r12 +; CHECK-NEXT: vmov.u16 r2, q4[0] +; CHECK-NEXT: vmov.16 q5[5], r2 +; CHECK-NEXT: vmov.u16 r2, q2[5] +; CHECK-NEXT: vmovx.f16 s23, s17 +; CHECK-NEXT: vins.f16 s23, s19 +; CHECK-NEXT: vmovx.f16 s16, s12 +; CHECK-NEXT: vins.f16 s16, s14 +; CHECK-NEXT: vmovx.f16 s17, s15 +; CHECK-NEXT: vins.f16 s17, s9 +; CHECK-NEXT: vmov q2, q5 +; CHECK-NEXT: vmov.16 q4[4], r2 +; CHECK-NEXT: vmovnb.i32 q2, q4 ; CHECK-NEXT: vmov r3, s16 -; CHECK-NEXT: vmovnb.i32 q3, q4 -; CHECK-NEXT: vmov r2, s14 -; CHECK-NEXT: vmov q3[2], q3[0], r3, r2 +; CHECK-NEXT: vmov r2, s10 +; CHECK-NEXT: vmov q2[2], q2[0], r3, r2 ; CHECK-NEXT: vmov r2, s23 ; CHECK-NEXT: vmov r3, s17 -; CHECK-NEXT: vmov q3[3], q3[1], r3, r2 +; CHECK-NEXT: vldrw.u32 q5, [r0, #32] +; CHECK-NEXT: vmov q2[3], q2[1], r3, r2 +; CHECK-NEXT: vadd.i16 q1, q1, q2 +; CHECK-NEXT: vldrw.u32 q2, [r0] +; CHECK-NEXT: vmov.u16 r0, q5[4] +; CHECK-NEXT: vadd.i16 q1, q1, q6 +; CHECK-NEXT: vmov.u16 r2, q2[2] +; CHECK-NEXT: vmov.16 q6[6], r0 +; CHECK-NEXT: vmov.16 q3[0], r2 +; CHECK-NEXT: vmov.u16 r2, q2[5] +; CHECK-NEXT: vmov.16 q3[1], r2 ; CHECK-NEXT: vmov.u16 r2, q0[0] -; CHECK-NEXT: vmov.16 q4[0], r2 +; CHECK-NEXT: vmov.u16 r0, q5[7] +; CHECK-NEXT: vmov.16 q3[2], r2 +; CHECK-NEXT: vmov.16 q6[7], r0 ; CHECK-NEXT: vmov.u16 r2, q0[3] +; CHECK-NEXT: vmov.16 q3[3], r2 +; CHECK-NEXT: vmov.f32 s26, s20 +; CHECK-NEXT: vmov.f32 s14, s3 +; CHECK-NEXT: vmov q4, q6 +; CHECK-NEXT: vmovnb.i32 q4, q3 +; CHECK-NEXT: vmov.u16 r2, q2[0] +; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: vmov.16 q4[0], r2 +; CHECK-NEXT: vmov.u16 r2, q2[3] +; CHECK-NEXT: vmov r3, s27 ; CHECK-NEXT: vmov.16 q4[1], r2 -; CHECK-NEXT: vmov.u16 r2, q0[6] +; CHECK-NEXT: vmov.u16 r2, q2[6] ; CHECK-NEXT: vmov.16 q4[2], r2 -; CHECK-NEXT: vmov.u16 r2, q1[1] -; CHECK-NEXT: vmov.16 q4[3], r2 -; CHECK-NEXT: vmov.u16 r2, q1[4] -; CHECK-NEXT: vmov.16 q4[4], r2 -; CHECK-NEXT: vmov.u16 r2, q2[2] -; CHECK-NEXT: vmov.16 q5[6], r2 -; CHECK-NEXT: vmov.u16 r2, q2[5] -; CHECK-NEXT: vmov.16 q5[7], r2 -; CHECK-NEXT: vmov.u16 r2, q1[7] -; CHECK-NEXT: vmov.16 q4[5], r2 ; CHECK-NEXT: vmov.u16 r2, q0[1] -; CHECK-NEXT: vmov.f32 s19, s23 -; CHECK-NEXT: vmov.16 q5[0], r2 +; CHECK-NEXT: vmov.16 q4[3], r2 ; CHECK-NEXT: vmov.u16 r2, q0[4] -; CHECK-NEXT: vmov.16 q5[1], r2 +; CHECK-NEXT: vmov.16 q4[4], r2 +; CHECK-NEXT: vmov.u16 r2, q5[2] +; CHECK-NEXT: vmov.16 q7[6], r2 +; CHECK-NEXT: vmov.u16 r2, q5[5] +; CHECK-NEXT: vmov.16 q7[7], r2 ; CHECK-NEXT: vmov.u16 r2, q0[7] -; CHECK-NEXT: vmov.16 q5[2], r2 -; CHECK-NEXT: vmov.u16 r2, q1[2] -; CHECK-NEXT: vmov.16 q5[3], r2 -; CHECK-NEXT: vmov.u16 r2, q1[5] -; CHECK-NEXT: vmov.16 q5[4], r2 -; CHECK-NEXT: vmov.u16 r2, q2[0] -; CHECK-NEXT: vmov.16 q0[5], r2 -; CHECK-NEXT: vmov.u16 r2, q2[3] -; CHECK-NEXT: vmov.16 q0[6], r2 -; CHECK-NEXT: vmov.u16 r2, q2[6] -; CHECK-NEXT: vmov.16 q0[7], r2 -; CHECK-NEXT: vmov r3, s20 -; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: vldrw.u32 q2, [r0] -; CHECK-NEXT: vmovnb.i32 q1, q5 -; CHECK-NEXT: vmov r2, s6 -; CHECK-NEXT: vmov q1[2], q1[0], r3, r2 -; CHECK-NEXT: vmov r2, s3 -; CHECK-NEXT: vmov r3, s21 -; CHECK-NEXT: vmov q1[3], q1[1], r3, r2 -; CHECK-NEXT: vmov.u16 r2, q2[0] -; CHECK-NEXT: vadd.i16 q0, q4, q1 -; CHECK-NEXT: vmov.16 q1[0], r2 -; CHECK-NEXT: vadd.i16 q0, q0, q3 -; CHECK-NEXT: vmov.u16 r2, q2[3] -; CHECK-NEXT: vldrw.u32 q3, [r0, #16] -; CHECK-NEXT: vldrw.u32 q4, [r0, #32] -; CHECK-NEXT: vmov.16 q1[1], r2 -; CHECK-NEXT: vmov.u16 r2, q2[6] -; CHECK-NEXT: vmov.16 q1[2], r2 -; CHECK-NEXT: vmov.u16 r2, q3[1] -; CHECK-NEXT: vmov.u16 r0, q4[2] -; CHECK-NEXT: vmov.16 q1[3], r2 -; CHECK-NEXT: vmov.u16 r2, q3[4] -; CHECK-NEXT: vmov.16 q5[6], r0 -; CHECK-NEXT: vmov.u16 r0, q4[5] -; CHECK-NEXT: vmov.16 q1[4], r2 -; CHECK-NEXT: vmov.16 q5[7], r0 -; CHECK-NEXT: vmov.u16 r0, q3[7] -; CHECK-NEXT: vmov.16 q1[5], r0 -; CHECK-NEXT: vmov.u16 r0, q2[2] -; CHECK-NEXT: vmov.f32 s7, s23 -; CHECK-NEXT: vmov.16 q5[0], r0 -; CHECK-NEXT: vmov.u16 r0, q2[5] -; CHECK-NEXT: vstrw.32 q0, [r1, #16] -; CHECK-NEXT: vmov.16 q5[1], r0 -; CHECK-NEXT: vmov.u16 r0, q3[0] -; CHECK-NEXT: vmov.16 q5[2], r0 -; CHECK-NEXT: vmov.u16 r0, q3[3] -; CHECK-NEXT: vmov.16 q5[3], r0 -; CHECK-NEXT: vmov.u16 r0, q4[4] -; CHECK-NEXT: vmov.16 q7[6], r0 -; CHECK-NEXT: vmov.u16 r0, q4[7] -; CHECK-NEXT: vmov.16 q7[7], r0 -; CHECK-NEXT: vmov.f32 s22, s15 -; CHECK-NEXT: vmov.f32 s30, s16 -; CHECK-NEXT: vmov q6, q7 -; CHECK-NEXT: vmov r2, s20 -; CHECK-NEXT: vmovnb.i32 q6, q5 -; CHECK-NEXT: vmov r0, s26 +; CHECK-NEXT: vmov.16 q4[5], r2 +; CHECK-NEXT: vmov r2, s12 ; CHECK-NEXT: vmov q6[2], q6[0], r2, r0 -; CHECK-NEXT: vmov r0, s31 +; CHECK-NEXT: vmov r0, s13 +; CHECK-NEXT: vmov q6[3], q6[1], r0, r3 +; CHECK-NEXT: vmov.u16 r0, q5[0] +; CHECK-NEXT: vmov.16 q3[5], r0 +; CHECK-NEXT: vmov.u16 r0, q0[5] +; CHECK-NEXT: vmovx.f16 s15, s21 +; CHECK-NEXT: vmov.f32 s19, s31 +; CHECK-NEXT: vins.f16 s15, s23 +; CHECK-NEXT: vmovx.f16 s20, s8 +; CHECK-NEXT: vins.f16 s20, s10 +; CHECK-NEXT: vstrw.32 q1, [r1, #16] +; CHECK-NEXT: vmovx.f16 s21, s11 +; CHECK-NEXT: vins.f16 s21, s1 +; CHECK-NEXT: vmov q0, q3 +; CHECK-NEXT: vmov.16 q5[4], r0 +; CHECK-NEXT: vmovnb.i32 q0, q5 +; CHECK-NEXT: vmov r2, s20 +; CHECK-NEXT: vmov r0, s2 +; CHECK-NEXT: vmov q0[2], q0[0], r2, r0 +; CHECK-NEXT: vmov r0, s15 ; CHECK-NEXT: vmov r2, s21 -; CHECK-NEXT: vmov q6[3], q6[1], r2, r0 -; CHECK-NEXT: vmov.u16 r0, q4[0] -; CHECK-NEXT: vmov.16 q5[5], r0 -; CHECK-NEXT: vmov.u16 r0, q4[3] -; CHECK-NEXT: vmov.16 q5[6], r0 -; CHECK-NEXT: vmov.u16 r0, q2[1] -; CHECK-NEXT: vmov.16 q7[0], r0 -; CHECK-NEXT: vmov.u16 r0, q2[4] -; CHECK-NEXT: vmov.16 q7[1], r0 -; CHECK-NEXT: vmov.u16 r0, q2[7] -; CHECK-NEXT: vmov.16 q7[2], r0 -; CHECK-NEXT: vmov.u16 r0, q3[2] -; CHECK-NEXT: vmov.16 q7[3], r0 -; CHECK-NEXT: vmov.u16 r0, q3[5] -; CHECK-NEXT: vmov.16 q7[4], r0 -; CHECK-NEXT: vmov.u16 r0, q4[6] -; CHECK-NEXT: vmov.16 q5[7], r0 -; CHECK-NEXT: vmov r2, s28 -; CHECK-NEXT: vmov q2, q5 -; CHECK-NEXT: vmovnb.i32 q2, q7 -; CHECK-NEXT: vmov r0, s10 -; CHECK-NEXT: vmov q2[2], q2[0], r2, r0 -; CHECK-NEXT: vmov r0, s23 -; CHECK-NEXT: vmov r2, s29 -; CHECK-NEXT: vmov q2[3], q2[1], r2, r0 -; CHECK-NEXT: vadd.i16 q1, q1, q2 -; CHECK-NEXT: vadd.i16 q1, q1, q6 -; CHECK-NEXT: vstrw.32 q1, [r1] +; CHECK-NEXT: vmov q0[3], q0[1], r2, r0 +; CHECK-NEXT: vadd.i16 q0, q4, q0 +; CHECK-NEXT: vadd.i16 q0, q0, q6 +; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: bx lr entry: @@ -653,56 +635,54 @@ define void @vld3_v8i8(<24 x i8> *%src, <8 x i8> *%dst) { ; CHECK-LABEL: vld3_v8i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vldrb.u16 q1, [r0, #16] -; CHECK-NEXT: vmov.u8 r2, q0[0] -; CHECK-NEXT: vmov.u16 r0, q1[2] -; CHECK-NEXT: vmov.16 q2[0], r2 -; CHECK-NEXT: vmov.u8 r2, q0[3] -; CHECK-NEXT: vmov.16 q2[1], r2 -; CHECK-NEXT: vmov.u8 r2, q0[6] -; CHECK-NEXT: vmov.16 q2[2], r2 -; CHECK-NEXT: vmov.u8 r2, q0[9] -; CHECK-NEXT: vmov.16 q2[3], r2 -; CHECK-NEXT: vmov.u8 r2, q0[12] -; CHECK-NEXT: vmov.16 q2[4], r2 -; CHECK-NEXT: vmov.u8 r2, q0[15] -; CHECK-NEXT: vmov.16 q2[5], r2 -; CHECK-NEXT: vmov.16 q2[6], r0 -; CHECK-NEXT: vmov.u8 r0, q0[1] +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vldrb.u16 q0, [r0, #16] +; CHECK-NEXT: vmov.u8 r0, q1[1] +; CHECK-NEXT: vmov.16 q2[0], r0 +; CHECK-NEXT: vmov.u8 r0, q1[4] +; CHECK-NEXT: vmov.16 q2[1], r0 +; CHECK-NEXT: vmov.u8 r0, q1[7] +; CHECK-NEXT: vmov.16 q2[2], r0 +; CHECK-NEXT: vmov.u8 r0, q1[10] +; CHECK-NEXT: vmov.16 q2[3], r0 +; CHECK-NEXT: vmov.u8 r0, q1[13] +; CHECK-NEXT: vmov.16 q2[4], r0 +; CHECK-NEXT: vmov.u16 r0, q0[0] +; CHECK-NEXT: vmov.16 q2[5], r0 +; CHECK-NEXT: vmov.u8 r0, q1[0] ; CHECK-NEXT: vmov.16 q3[0], r0 -; CHECK-NEXT: vmov.u8 r0, q0[4] +; CHECK-NEXT: vmov.u8 r0, q1[3] ; CHECK-NEXT: vmov.16 q3[1], r0 -; CHECK-NEXT: vmov.u8 r0, q0[7] +; CHECK-NEXT: vmov.u8 r0, q1[6] ; CHECK-NEXT: vmov.16 q3[2], r0 -; CHECK-NEXT: vmov.u8 r0, q0[10] +; CHECK-NEXT: vmov.u8 r0, q1[9] ; CHECK-NEXT: vmov.16 q3[3], r0 -; CHECK-NEXT: vmov.u8 r0, q0[13] +; CHECK-NEXT: vmov.u8 r0, q1[12] ; CHECK-NEXT: vmov.16 q3[4], r0 -; CHECK-NEXT: vmov.u16 r0, q1[0] +; CHECK-NEXT: vmov.u8 r0, q1[15] ; CHECK-NEXT: vmov.16 q3[5], r0 -; CHECK-NEXT: vmov.u16 r0, q1[3] +; CHECK-NEXT: vmov.u16 r0, q0[2] +; CHECK-NEXT: vmovx.f16 s11, s1 ; CHECK-NEXT: vmov.16 q3[6], r0 -; CHECK-NEXT: vmov.u16 r0, q1[6] +; CHECK-NEXT: vmov.u16 r0, q0[5] +; CHECK-NEXT: vins.f16 s11, s3 ; CHECK-NEXT: vmov.16 q3[7], r0 -; CHECK-NEXT: vmov.u16 r0, q1[5] -; CHECK-NEXT: vmov.16 q2[7], r0 -; CHECK-NEXT: vmov.u8 r0, q0[2] -; CHECK-NEXT: vadd.i16 q2, q2, q3 +; CHECK-NEXT: vmov.u8 r0, q1[2] +; CHECK-NEXT: vadd.i16 q2, q3, q2 ; CHECK-NEXT: vmov.16 q3[0], r0 -; CHECK-NEXT: vmov.u8 r0, q0[5] +; CHECK-NEXT: vmov.u8 r0, q1[5] ; CHECK-NEXT: vmov.16 q3[1], r0 -; CHECK-NEXT: vmov.u8 r0, q0[8] +; CHECK-NEXT: vmov.u8 r0, q1[8] ; CHECK-NEXT: vmov.16 q3[2], r0 -; CHECK-NEXT: vmov.u8 r0, q0[11] +; CHECK-NEXT: vmov.u8 r0, q1[11] ; CHECK-NEXT: vmov.16 q3[3], r0 -; CHECK-NEXT: vmov.u8 r0, q0[14] +; CHECK-NEXT: vmov.u8 r0, q1[14] ; CHECK-NEXT: vmov.16 q3[4], r0 -; CHECK-NEXT: vmov.u16 r0, q1[1] +; CHECK-NEXT: vmov.u16 r0, q0[1] ; CHECK-NEXT: vmov.16 q3[5], r0 -; CHECK-NEXT: vmov.u16 r0, q1[4] +; CHECK-NEXT: vmov.u16 r0, q0[4] ; CHECK-NEXT: vmov.16 q3[6], r0 -; CHECK-NEXT: vmov.u16 r0, q1[7] +; CHECK-NEXT: vmov.u16 r0, q0[7] ; CHECK-NEXT: vmov.16 q3[7], r0 ; CHECK-NEXT: vadd.i16 q0, q2, q3 ; CHECK-NEXT: vstrb.16 q0, [r1] @@ -1229,21 +1209,13 @@ ; CHECK-NEXT: vmov.32 q0[1], r3 ; CHECK-NEXT: vmov.32 q0[2], r0 ; CHECK-NEXT: vmovx.f16 s4, s0 -; CHECK-NEXT: vmov r2, s2 -; CHECK-NEXT: vmov r0, s4 ; CHECK-NEXT: vmovx.f16 s8, s1 -; CHECK-NEXT: vmov.16 q1[0], r0 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmov.16 q1[1], r2 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmov.16 q2[0], r2 -; CHECK-NEXT: vmov r2, s1 -; CHECK-NEXT: vmov.16 q2[1], r0 -; CHECK-NEXT: vadd.f16 q1, q2, q1 +; CHECK-NEXT: vins.f16 s4, s2 +; CHECK-NEXT: vins.f16 s0, s8 ; CHECK-NEXT: vmovx.f16 s8, s2 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmov.16 q0[0], r2 -; CHECK-NEXT: vmov.16 q0[1], r0 +; CHECK-NEXT: vadd.f16 q1, q0, q1 +; CHECK-NEXT: vins.f16 s1, s8 +; CHECK-NEXT: vmov.f32 s0, s1 ; CHECK-NEXT: vadd.f16 q0, q1, q0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: str r0, [r1] @@ -1262,48 +1234,35 @@ define void @vld3_v4f16(<12 x half> *%src, <4 x half> *%dst) { ; CHECK-LABEL: vld3_v4f16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .vsave {d8} -; CHECK-NEXT: vpush {d8} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vmovx.f16 s4, s0 -; CHECK-NEXT: vmov r2, s2 -; CHECK-NEXT: vmov r3, s4 -; CHECK-NEXT: vmovx.f16 s4, s3 -; CHECK-NEXT: vmov.16 q2[0], r3 -; CHECK-NEXT: vmovx.f16 s12, s1 -; CHECK-NEXT: vmov.16 q2[1], r2 -; CHECK-NEXT: vmov r2, s4 -; CHECK-NEXT: vmov.16 q2[2], r2 ; CHECK-NEXT: ldrd r2, r0, [r0, #16] -; CHECK-NEXT: vmov.32 q1[0], r2 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmov.32 q1[1], r0 -; CHECK-NEXT: vmovx.f16 s0, s2 -; CHECK-NEXT: vmov r0, s5 -; CHECK-NEXT: vmovx.f16 s16, s4 -; CHECK-NEXT: vmov.16 q2[3], r0 -; CHECK-NEXT: vmov r0, s12 -; CHECK-NEXT: vmov.16 q3[0], r2 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmov.16 q3[1], r0 -; CHECK-NEXT: vmov r0, s3 -; CHECK-NEXT: vmov.16 q3[2], r0 -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: vmov.16 q3[3], r0 -; CHECK-NEXT: vmov r0, s1 -; CHECK-NEXT: vmov.16 q0[0], r0 -; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vmov.16 q0[1], r2 -; CHECK-NEXT: vmovx.f16 s4, s5 -; CHECK-NEXT: vmov.16 q0[2], r0 -; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vadd.f16 q2, q3, q2 -; CHECK-NEXT: vmov.16 q0[3], r0 -; CHECK-NEXT: vadd.f16 q0, q2, q0 +; CHECK-NEXT: vmovx.f16 s4, s0 +; CHECK-NEXT: vmovx.f16 s8, s1 +; CHECK-NEXT: vins.f16 s4, s2 +; CHECK-NEXT: vins.f16 s0, s8 +; CHECK-NEXT: vmov.32 q2[0], r2 +; CHECK-NEXT: vmov.f32 s14, s3 +; CHECK-NEXT: vmov.32 q2[1], r0 +; CHECK-NEXT: vmovx.f16 s5, s3 +; CHECK-NEXT: vmovx.f16 s12, s8 +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vins.f16 s14, s12 +; CHECK-NEXT: vmovx.f16 s12, s2 +; CHECK-NEXT: vins.f16 s1, s12 +; CHECK-NEXT: vmovx.f16 s12, s9 +; CHECK-NEXT: vins.f16 s5, s9 +; CHECK-NEXT: vmov.f32 s17, s14 +; CHECK-NEXT: vmov.f32 s0, s1 +; CHECK-NEXT: vins.f16 s8, s12 +; CHECK-NEXT: vadd.f16 q1, q4, q1 +; CHECK-NEXT: vmov.f32 s1, s8 +; CHECK-NEXT: vadd.f16 q0, q1, q0 ; CHECK-NEXT: vmov r2, s1 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: strd r0, r2, [r1] -; CHECK-NEXT: vpop {d8} +; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: bx lr entry: %l1 = load <12 x half>, <12 x half>* %src, align 4 @@ -1321,90 +1280,60 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r7, lr} ; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12} -; CHECK-NEXT: vldrw.u32 q2, [r0] -; CHECK-NEXT: vldrw.u32 q1, [r0, #16] +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vldrw.u32 q3, [r0] ; CHECK-NEXT: vldrw.u32 q4, [r0, #32] -; CHECK-NEXT: vmovx.f16 s0, s10 -; CHECK-NEXT: vmov r3, s9 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmov.16 q0[0], r3 -; CHECK-NEXT: vmov.16 q0[1], r2 -; CHECK-NEXT: vmov r2, s4 -; CHECK-NEXT: vmovx.f16 s12, s5 -; CHECK-NEXT: vmov.16 q0[2], r2 -; CHECK-NEXT: vmov r2, s12 -; CHECK-NEXT: vmovx.f16 s12, s19 -; CHECK-NEXT: vmov.16 q0[3], r2 -; CHECK-NEXT: vmov r3, s18 +; CHECK-NEXT: vmovx.f16 s4, s14 +; CHECK-NEXT: vmov.f32 s0, s13 +; CHECK-NEXT: vins.f16 s0, s4 +; CHECK-NEXT: vldrw.u32 q1, [r0, #16] +; CHECK-NEXT: vmovx.f16 s20, s19 +; CHECK-NEXT: vmovx.f16 s24, s13 +; CHECK-NEXT: vmovx.f16 s8, s5 +; CHECK-NEXT: vmov.f32 s1, s4 +; CHECK-NEXT: vins.f16 s1, s8 +; CHECK-NEXT: vmov.f32 s11, s18 +; CHECK-NEXT: vins.f16 s11, s20 ; CHECK-NEXT: vmov.f32 s2, s7 ; CHECK-NEXT: vmovx.f16 s20, s16 -; CHECK-NEXT: vmov r0, s12 -; CHECK-NEXT: vmov.16 q3[6], r3 -; CHECK-NEXT: vmov.16 q3[7], r0 -; CHECK-NEXT: vmov r0, s20 -; CHECK-NEXT: vmovx.f16 s24, s11 -; CHECK-NEXT: vmov r5, s8 -; CHECK-NEXT: vmov.f32 s14, s16 -; CHECK-NEXT: vmov r2, s2 -; CHECK-NEXT: vmov.16 q5[4], r2 -; CHECK-NEXT: vmov.16 q5[5], r0 -; CHECK-NEXT: vmov r0, s19 -; CHECK-NEXT: vmov r12, s22 -; CHECK-NEXT: vmovx.f16 s20, s17 -; CHECK-NEXT: vmov r2, s20 -; CHECK-NEXT: vmov.16 q5[6], r2 -; CHECK-NEXT: vmov r2, s16 -; CHECK-NEXT: vmov.16 q5[7], r0 -; CHECK-NEXT: vmov lr, s23 -; CHECK-NEXT: vmovx.f16 s20, s6 -; CHECK-NEXT: vmov r0, s20 -; CHECK-NEXT: vmov.16 q5[4], r0 -; CHECK-NEXT: vmov r0, s10 -; CHECK-NEXT: vmov.16 q5[5], r2 -; CHECK-NEXT: vmov r3, s22 -; CHECK-NEXT: vmovx.f16 s20, s8 -; CHECK-NEXT: vmov r2, s20 -; CHECK-NEXT: vmov.16 q5[0], r2 -; CHECK-NEXT: vmov.16 q5[1], r0 -; CHECK-NEXT: vmov r0, s24 -; CHECK-NEXT: vmov.16 q5[2], r0 -; CHECK-NEXT: vmov r0, s5 -; CHECK-NEXT: vmov.16 q5[3], r0 -; CHECK-NEXT: vmov r2, s20 -; CHECK-NEXT: vmovx.f16 s20, s9 -; CHECK-NEXT: vmov r0, s21 -; CHECK-NEXT: vmov r4, s20 -; CHECK-NEXT: vmov.16 q5[0], r5 -; CHECK-NEXT: vmov.16 q5[1], r4 -; CHECK-NEXT: vmov r4, s11 -; CHECK-NEXT: vmovx.f16 s8, s4 -; CHECK-NEXT: vmov.16 q5[2], r4 -; CHECK-NEXT: vmov r4, s8 -; CHECK-NEXT: vmovx.f16 s8, s18 -; CHECK-NEXT: vmov.16 q5[3], r4 -; CHECK-NEXT: vmov r4, s6 -; CHECK-NEXT: vmov.16 q5[4], r4 -; CHECK-NEXT: vmov r4, s17 -; CHECK-NEXT: vmovx.f16 s4, s7 -; CHECK-NEXT: vmov r5, s8 -; CHECK-NEXT: vmov.16 q2[6], r4 -; CHECK-NEXT: vmov r4, s4 -; CHECK-NEXT: vmov.16 q5[5], r4 +; CHECK-NEXT: vmov.f32 s10, s16 +; CHECK-NEXT: vins.f16 s2, s20 +; CHECK-NEXT: vmov.f64 d10, d6 +; CHECK-NEXT: vins.f16 s20, s24 +; CHECK-NEXT: vmovx.f16 s24, s4 +; CHECK-NEXT: vmov.f32 s21, s15 +; CHECK-NEXT: vins.f16 s21, s24 +; CHECK-NEXT: vmovx.f16 s24, s7 +; CHECK-NEXT: vmov.f32 s22, s6 +; CHECK-NEXT: vins.f16 s22, s24 +; CHECK-NEXT: vmovx.f16 s24, s12 +; CHECK-NEXT: vins.f16 s24, s14 +; CHECK-NEXT: vmov r12, s2 +; CHECK-NEXT: vmovx.f16 s25, s15 +; CHECK-NEXT: vmovx.f16 s15, s17 +; CHECK-NEXT: vins.f16 s15, s19 +; CHECK-NEXT: vins.f16 s25, s5 +; CHECK-NEXT: vmov r0, s15 +; CHECK-NEXT: vmovx.f16 s12, s18 +; CHECK-NEXT: vmovx.f16 s6, s6 +; CHECK-NEXT: vins.f16 s17, s12 +; CHECK-NEXT: vins.f16 s6, s16 +; CHECK-NEXT: vmov r5, s24 +; CHECK-NEXT: vmov r2, s6 ; CHECK-NEXT: vmov r4, s0 -; CHECK-NEXT: vmov.16 q2[7], r5 ; CHECK-NEXT: vmov q1[2], q1[0], r4, r12 ; CHECK-NEXT: vmov r4, s1 -; CHECK-NEXT: vmov q0[2], q0[0], r2, r3 -; CHECK-NEXT: vmov r5, s15 -; CHECK-NEXT: vmov q0[3], q0[1], r0, lr -; CHECK-NEXT: vmov.f32 s23, s11 -; CHECK-NEXT: vmov q1[3], q1[1], r4, r5 +; CHECK-NEXT: vmov lr, s25 +; CHECK-NEXT: vmov q0[2], q0[0], r5, r2 +; CHECK-NEXT: vmov r3, s11 +; CHECK-NEXT: vmov q0[3], q0[1], lr, r0 +; CHECK-NEXT: vmov.f32 s23, s17 +; CHECK-NEXT: vmov q1[3], q1[1], r4, r3 ; CHECK-NEXT: vadd.f16 q0, q5, q0 ; CHECK-NEXT: vadd.f16 q0, q0, q1 ; CHECK-NEXT: vstrw.32 q0, [r1] -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12} +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: pop {r4, r5, r7, pc} entry: %l1 = load <24 x half>, <24 x half>* %src, align 4 @@ -1422,171 +1351,111 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, lr} ; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vldrw.u32 q2, [r0, #80] +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: vldrw.u32 q3, [r0, #48] -; CHECK-NEXT: vldrw.u32 q4, [r0, #64] -; CHECK-NEXT: vmovx.f16 s0, s11 -; CHECK-NEXT: vmovx.f16 s4, s8 -; CHECK-NEXT: vmov r2, s10 -; CHECK-NEXT: vmovx.f16 s20, s17 -; CHECK-NEXT: vmov r3, s0 -; CHECK-NEXT: vmov.16 q0[6], r2 -; CHECK-NEXT: vmov r12, s4 +; CHECK-NEXT: vldrw.u32 q4, [r0, #80] ; CHECK-NEXT: vmovx.f16 s4, s14 -; CHECK-NEXT: vmov r2, s13 -; CHECK-NEXT: vmov.16 q0[7], r3 -; CHECK-NEXT: vmov r3, s4 -; CHECK-NEXT: vmov.16 q1[0], r2 -; CHECK-NEXT: vmov.16 q1[1], r3 -; CHECK-NEXT: vmov r2, s16 -; CHECK-NEXT: vmov.16 q1[2], r2 -; CHECK-NEXT: vmov r2, s20 -; CHECK-NEXT: vmov.16 q1[3], r2 +; CHECK-NEXT: vmov.f32 s0, s13 +; CHECK-NEXT: vins.f16 s0, s4 +; CHECK-NEXT: vldrw.u32 q1, [r0, #64] +; CHECK-NEXT: vmovx.f16 s20, s19 ; CHECK-NEXT: vmovx.f16 s24, s13 -; CHECK-NEXT: vmov.f32 s6, s19 -; CHECK-NEXT: vmovx.f16 s28, s16 -; CHECK-NEXT: vmov.f32 s2, s8 -; CHECK-NEXT: vmov r4, s14 -; CHECK-NEXT: vmov r5, s4 -; CHECK-NEXT: vmov r2, s6 -; CHECK-NEXT: vmov.16 q5[4], r2 -; CHECK-NEXT: vmov r2, s9 -; CHECK-NEXT: vmov.16 q5[5], r12 -; CHECK-NEXT: vmov r12, s22 -; CHECK-NEXT: vmovx.f16 s20, s10 -; CHECK-NEXT: vmov r3, s20 -; CHECK-NEXT: vmov.16 q5[6], r2 -; CHECK-NEXT: vmov.16 q5[7], r3 -; CHECK-NEXT: vmov r3, s12 -; CHECK-NEXT: vmov r2, s24 -; CHECK-NEXT: vmov.16 q6[0], r3 -; CHECK-NEXT: vmov.16 q6[1], r2 -; CHECK-NEXT: vmov r2, s15 -; CHECK-NEXT: vmov.16 q6[2], r2 -; CHECK-NEXT: vmov r2, s28 -; CHECK-NEXT: vmov.16 q6[3], r2 -; CHECK-NEXT: vmov r2, s18 -; CHECK-NEXT: vmovx.f16 s28, s19 -; CHECK-NEXT: vmov.16 q6[4], r2 -; CHECK-NEXT: vmov r2, s28 -; CHECK-NEXT: vmovx.f16 s28, s18 -; CHECK-NEXT: vmov.16 q6[5], r2 -; CHECK-NEXT: vmov r2, s28 -; CHECK-NEXT: vmov r3, s8 -; CHECK-NEXT: vmov.16 q7[4], r2 -; CHECK-NEXT: vmovx.f16 s8, s9 -; CHECK-NEXT: vmov.16 q7[5], r3 -; CHECK-NEXT: vmov r3, s8 +; CHECK-NEXT: vmovx.f16 s8, s5 +; CHECK-NEXT: vmov.f32 s1, s4 +; CHECK-NEXT: vins.f16 s1, s8 +; CHECK-NEXT: vmov.f32 s11, s18 +; CHECK-NEXT: vins.f16 s11, s20 +; CHECK-NEXT: vmov.f32 s2, s7 +; CHECK-NEXT: vmovx.f16 s20, s16 +; CHECK-NEXT: vmov.f32 s10, s16 +; CHECK-NEXT: vins.f16 s2, s20 +; CHECK-NEXT: vmov.f64 d10, d6 +; CHECK-NEXT: vins.f16 s20, s24 +; CHECK-NEXT: vmovx.f16 s24, s4 +; CHECK-NEXT: vmov.f32 s21, s15 +; CHECK-NEXT: vins.f16 s21, s24 +; CHECK-NEXT: vmovx.f16 s24, s7 +; CHECK-NEXT: vmov.f32 s22, s6 +; CHECK-NEXT: vins.f16 s22, s24 +; CHECK-NEXT: vmovx.f16 s24, s12 +; CHECK-NEXT: vins.f16 s24, s14 +; CHECK-NEXT: vmov r12, s2 +; CHECK-NEXT: vmovx.f16 s25, s15 +; CHECK-NEXT: vmovx.f16 s15, s17 +; CHECK-NEXT: vins.f16 s25, s5 +; CHECK-NEXT: vmov r6, s24 +; CHECK-NEXT: vmov lr, s25 +; CHECK-NEXT: vmovx.f16 s24, s18 +; CHECK-NEXT: vmovx.f16 s6, s6 +; CHECK-NEXT: vins.f16 s17, s24 +; CHECK-NEXT: vins.f16 s6, s16 +; CHECK-NEXT: vins.f16 s15, s19 +; CHECK-NEXT: vmov r3, s6 +; CHECK-NEXT: vmov r5, s0 +; CHECK-NEXT: vmov q1[2], q1[0], r5, r12 +; CHECK-NEXT: vmov r5, s1 +; CHECK-NEXT: vmov r4, s15 +; CHECK-NEXT: vmov q0[2], q0[0], r6, r3 ; CHECK-NEXT: vmov r2, s11 -; CHECK-NEXT: vmov.16 q2[6], r3 -; CHECK-NEXT: vmov.16 q2[7], r2 -; CHECK-NEXT: vmov lr, s30 -; CHECK-NEXT: vmov r6, s11 -; CHECK-NEXT: vmovx.f16 s8, s12 -; CHECK-NEXT: vmov r3, s8 -; CHECK-NEXT: vmovx.f16 s12, s15 -; CHECK-NEXT: vmov.16 q2[0], r3 -; CHECK-NEXT: vmov r3, s12 -; CHECK-NEXT: vmov.16 q2[1], r4 -; CHECK-NEXT: vmov r2, s5 -; CHECK-NEXT: vmov.16 q2[2], r3 -; CHECK-NEXT: vmov r3, s17 -; CHECK-NEXT: vmov.16 q2[3], r3 -; CHECK-NEXT: vmov.f32 s27, s23 -; CHECK-NEXT: vmov r3, s8 -; CHECK-NEXT: vldrw.u32 q4, [r0, #32] -; CHECK-NEXT: vmov r4, s9 -; CHECK-NEXT: vmov q2[2], q2[0], r5, r12 -; CHECK-NEXT: vmov r5, s3 -; CHECK-NEXT: vmov q0[2], q0[0], r3, lr -; CHECK-NEXT: vmov q0[3], q0[1], r4, r6 -; CHECK-NEXT: vmov q2[3], q2[1], r2, r5 -; CHECK-NEXT: vadd.f16 q0, q6, q0 -; CHECK-NEXT: vmovx.f16 s12, s16 -; CHECK-NEXT: vadd.f16 q1, q0, q2 -; CHECK-NEXT: vldrw.u32 q0, [r0, #16] -; CHECK-NEXT: vldrw.u32 q2, [r0] +; CHECK-NEXT: vmov q0[3], q0[1], lr, r4 +; CHECK-NEXT: vmov.f32 s23, s17 +; CHECK-NEXT: vldrw.u32 q4, [r0] +; CHECK-NEXT: vmov q1[3], q1[1], r5, r2 +; CHECK-NEXT: vadd.f16 q0, q5, q0 +; CHECK-NEXT: vadd.f16 q1, q0, q1 +; CHECK-NEXT: vldrw.u32 q0, [r0, #32] +; CHECK-NEXT: vldrw.u32 q2, [r0, #16] ; CHECK-NEXT: vstrw.32 q1, [r1, #16] -; CHECK-NEXT: vmovx.f16 s4, s19 -; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: vmov r2, s4 -; CHECK-NEXT: vmov.16 q1[6], r0 -; CHECK-NEXT: vmov.16 q1[7], r2 -; CHECK-NEXT: vmov r0, s12 -; CHECK-NEXT: vmovx.f16 s12, s10 -; CHECK-NEXT: vmov r2, s9 -; CHECK-NEXT: vmov r3, s12 -; CHECK-NEXT: vmov.16 q3[0], r2 -; CHECK-NEXT: vmov.16 q3[1], r3 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmovx.f16 s20, s1 -; CHECK-NEXT: vmov.16 q3[2], r2 -; CHECK-NEXT: vmov r2, s20 -; CHECK-NEXT: vmovx.f16 s24, s9 -; CHECK-NEXT: vmov.16 q3[3], r2 -; CHECK-NEXT: vmovx.f16 s28, s0 -; CHECK-NEXT: vmov.f32 s14, s3 -; CHECK-NEXT: vmov.f32 s6, s16 -; CHECK-NEXT: vmov r5, s10 -; CHECK-NEXT: vmov r4, s12 -; CHECK-NEXT: vmov r2, s14 -; CHECK-NEXT: vmov.16 q5[4], r2 -; CHECK-NEXT: vmov r2, s17 -; CHECK-NEXT: vmov.16 q5[5], r0 -; CHECK-NEXT: vmov r0, s22 -; CHECK-NEXT: vmovx.f16 s20, s18 -; CHECK-NEXT: vmov r3, s20 -; CHECK-NEXT: vmov.16 q5[6], r2 -; CHECK-NEXT: vmov.16 q5[7], r3 -; CHECK-NEXT: vmov r3, s8 -; CHECK-NEXT: vmov r2, s24 -; CHECK-NEXT: vmov.16 q6[0], r3 -; CHECK-NEXT: vmov.16 q6[1], r2 -; CHECK-NEXT: vmov r2, s11 -; CHECK-NEXT: vmov.16 q6[2], r2 -; CHECK-NEXT: vmov r2, s28 -; CHECK-NEXT: vmov.16 q6[3], r2 -; CHECK-NEXT: vmov r2, s2 -; CHECK-NEXT: vmovx.f16 s28, s3 -; CHECK-NEXT: vmov.16 q6[4], r2 -; CHECK-NEXT: vmov r2, s28 -; CHECK-NEXT: vmovx.f16 s28, s2 -; CHECK-NEXT: vmov.16 q6[5], r2 -; CHECK-NEXT: vmov r2, s28 -; CHECK-NEXT: vmov r3, s16 -; CHECK-NEXT: vmovx.f16 s16, s17 -; CHECK-NEXT: vmov.16 q7[4], r2 -; CHECK-NEXT: vmov r6, s16 -; CHECK-NEXT: vmov.16 q7[5], r3 -; CHECK-NEXT: vmov r3, s19 -; CHECK-NEXT: vmov.16 q4[6], r6 -; CHECK-NEXT: vmov r2, s30 -; CHECK-NEXT: vmov.16 q4[7], r3 -; CHECK-NEXT: vmov.f32 s27, s23 -; CHECK-NEXT: vmov r3, s19 -; CHECK-NEXT: vmovx.f16 s16, s8 -; CHECK-NEXT: vmov r6, s16 -; CHECK-NEXT: vmovx.f16 s8, s11 -; CHECK-NEXT: vmov.16 q4[0], r6 -; CHECK-NEXT: vmov r6, s8 -; CHECK-NEXT: vmov.16 q4[1], r5 -; CHECK-NEXT: vmov.16 q4[2], r6 -; CHECK-NEXT: vmov r6, s1 -; CHECK-NEXT: vmov.16 q4[3], r6 +; CHECK-NEXT: vmovx.f16 s12, s18 +; CHECK-NEXT: vmov.f32 s4, s17 +; CHECK-NEXT: vins.f16 s4, s12 +; CHECK-NEXT: vmovx.f16 s12, s9 +; CHECK-NEXT: vmov.f32 s5, s8 +; CHECK-NEXT: vmovx.f16 s20, s3 +; CHECK-NEXT: vins.f16 s5, s12 +; CHECK-NEXT: vmov.f32 s15, s2 +; CHECK-NEXT: vins.f16 s15, s20 +; CHECK-NEXT: vmov.f32 s6, s11 +; CHECK-NEXT: vmovx.f16 s20, s0 +; CHECK-NEXT: vmovx.f16 s24, s17 +; CHECK-NEXT: vins.f16 s6, s20 +; CHECK-NEXT: vmov.f64 d10, d8 +; CHECK-NEXT: vins.f16 s20, s24 +; CHECK-NEXT: vmovx.f16 s24, s8 +; CHECK-NEXT: vmov.f32 s21, s19 +; CHECK-NEXT: vins.f16 s21, s24 +; CHECK-NEXT: vmovx.f16 s24, s11 +; CHECK-NEXT: vmov.f32 s22, s10 +; CHECK-NEXT: vins.f16 s22, s24 +; CHECK-NEXT: vmovx.f16 s24, s16 +; CHECK-NEXT: vins.f16 s24, s18 +; CHECK-NEXT: vmov.f32 s14, s0 +; CHECK-NEXT: vmovx.f16 s25, s19 +; CHECK-NEXT: vmovx.f16 s19, s1 +; CHECK-NEXT: vins.f16 s25, s9 +; CHECK-NEXT: vmov r3, s24 +; CHECK-NEXT: vmov r2, s25 +; CHECK-NEXT: vmovx.f16 s24, s2 +; CHECK-NEXT: vmovx.f16 s10, s10 +; CHECK-NEXT: vins.f16 s1, s24 +; CHECK-NEXT: vins.f16 s10, s0 +; CHECK-NEXT: vins.f16 s19, s3 +; CHECK-NEXT: vmov r0, s6 +; CHECK-NEXT: vmov r6, s10 +; CHECK-NEXT: vmov r4, s4 +; CHECK-NEXT: vmov.f32 s23, s1 ; CHECK-NEXT: vmov q0[2], q0[0], r4, r0 -; CHECK-NEXT: vmov r6, s16 -; CHECK-NEXT: vmov r0, s7 -; CHECK-NEXT: vmov q1[2], q1[0], r6, r2 -; CHECK-NEXT: vmov r5, s17 -; CHECK-NEXT: vmov q1[3], q1[1], r5, r3 -; CHECK-NEXT: vmov r4, s13 -; CHECK-NEXT: vmov q0[3], q0[1], r4, r0 -; CHECK-NEXT: vadd.f16 q1, q6, q1 +; CHECK-NEXT: vmov r0, s5 +; CHECK-NEXT: vmov q1[2], q1[0], r3, r6 +; CHECK-NEXT: vmov r5, s19 +; CHECK-NEXT: vmov q1[3], q1[1], r2, r5 +; CHECK-NEXT: vmov r4, s15 +; CHECK-NEXT: vmov q0[3], q0[1], r0, r4 +; CHECK-NEXT: vadd.f16 q1, q5, q1 ; CHECK-NEXT: vadd.f16 q0, q1, q0 ; CHECK-NEXT: vstrw.32 q0, [r1] -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: pop {r4, r5, r6, pc} entry: %l1 = load <48 x half>, <48 x half>* %src, align 4 Index: llvm/test/CodeGen/Thumb2/mve-vld4.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vld4.ll +++ llvm/test/CodeGen/Thumb2/mve-vld4.ll @@ -390,84 +390,64 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: vldrb.u8 q1, [r0] -; CHECK-NEXT: vldrb.u8 q0, [r0, #16] -; CHECK-NEXT: vldrb.u8 q2, [r0, #32] -; CHECK-NEXT: vldrb.u8 q3, [r0, #48] -; CHECK-NEXT: vmov.u16 r2, q1[3] -; CHECK-NEXT: vmov.16 q4[0], r2 -; CHECK-NEXT: vmov.u16 r2, q1[7] -; CHECK-NEXT: vmov.16 q4[1], r2 -; CHECK-NEXT: vmov.u16 r2, q0[3] -; CHECK-NEXT: vmov.16 q4[2], r2 -; CHECK-NEXT: vmov.u16 r2, q2[3] -; CHECK-NEXT: vmov.16 q5[4], r2 -; CHECK-NEXT: vmov.u16 r2, q2[7] -; CHECK-NEXT: vmov.16 q5[5], r2 +; CHECK-NEXT: vldrb.u8 q3, [r0] +; CHECK-NEXT: vldrb.u8 q0, [r0, #32] +; CHECK-NEXT: vldrb.u8 q1, [r0, #48] +; CHECK-NEXT: vldrb.u8 q2, [r0, #16] ; CHECK-NEXT: vmov.u16 r0, q3[3] -; CHECK-NEXT: vmov.16 q5[6], r0 -; CHECK-NEXT: vmov.u16 r0, q3[7] -; CHECK-NEXT: vmov.16 q5[7], r0 -; CHECK-NEXT: vmov.u16 r0, q0[7] -; CHECK-NEXT: vmov.16 q4[3], r0 -; CHECK-NEXT: vmov.u16 r0, q1[2] -; CHECK-NEXT: vmov.f32 s18, s22 -; CHECK-NEXT: vmov.f32 s19, s23 +; CHECK-NEXT: vmov.f32 s18, s1 ; CHECK-NEXT: vmov.16 q5[0], r0 -; CHECK-NEXT: vmov.u16 r0, q1[6] +; CHECK-NEXT: vmov.u16 r0, q3[7] ; CHECK-NEXT: vmov.16 q5[1], r0 -; CHECK-NEXT: vmov.u16 r0, q0[2] +; CHECK-NEXT: vmov.u16 r0, q2[3] ; CHECK-NEXT: vmov.16 q5[2], r0 -; CHECK-NEXT: vmov.u16 r0, q2[2] +; CHECK-NEXT: vmov.u16 r0, q0[3] +; CHECK-NEXT: vins.f16 s18, s3 ; CHECK-NEXT: vmov.16 q6[4], r0 -; CHECK-NEXT: vmov.u16 r0, q2[6] +; CHECK-NEXT: vmov.u16 r0, q0[7] +; CHECK-NEXT: vmov.f32 s19, s5 ; CHECK-NEXT: vmov.16 q6[5], r0 -; CHECK-NEXT: vmov.u16 r0, q3[2] +; CHECK-NEXT: vmov.u16 r0, q1[3] +; CHECK-NEXT: vins.f16 s19, s7 ; CHECK-NEXT: vmov.16 q6[6], r0 -; CHECK-NEXT: vmov.u16 r0, q3[6] +; CHECK-NEXT: vmov.u16 r0, q1[7] +; CHECK-NEXT: vmov.f32 s16, s13 ; CHECK-NEXT: vmov.16 q6[7], r0 -; CHECK-NEXT: vmov.u16 r0, q0[6] +; CHECK-NEXT: vmov.u16 r0, q2[7] ; CHECK-NEXT: vmov.16 q5[3], r0 -; CHECK-NEXT: vmov.u16 r0, q1[1] -; CHECK-NEXT: vmov.f32 s22, s26 -; CHECK-NEXT: vmov.f32 s23, s27 -; CHECK-NEXT: vadd.i16 q4, q5, q4 -; CHECK-NEXT: vmov.16 q5[0], r0 -; CHECK-NEXT: vmov.u16 r0, q1[5] -; CHECK-NEXT: vmov.16 q5[1], r0 -; CHECK-NEXT: vmov.u16 r0, q0[1] -; CHECK-NEXT: vmov.16 q5[2], r0 -; CHECK-NEXT: vmov.u16 r0, q2[1] -; CHECK-NEXT: vmov.16 q6[4], r0 -; CHECK-NEXT: vmov.u16 r0, q2[5] -; CHECK-NEXT: vmov.16 q6[5], r0 +; CHECK-NEXT: vins.f16 s16, s15 +; CHECK-NEXT: vmov.f32 s17, s9 ; CHECK-NEXT: vmov.u16 r0, q3[1] -; CHECK-NEXT: vmov.16 q6[6], r0 -; CHECK-NEXT: vmov.u16 r0, q3[5] -; CHECK-NEXT: vmov.16 q6[7], r0 -; CHECK-NEXT: vmov.u16 r0, q0[5] -; CHECK-NEXT: vmov.16 q5[3], r0 -; CHECK-NEXT: vmov.u16 r0, q1[0] ; CHECK-NEXT: vmov.f32 s22, s26 +; CHECK-NEXT: vins.f16 s17, s11 ; CHECK-NEXT: vmov.f32 s23, s27 ; CHECK-NEXT: vmov.16 q6[0], r0 -; CHECK-NEXT: vmov.u16 r0, q1[4] +; CHECK-NEXT: vadd.i16 q4, q4, q5 +; CHECK-NEXT: vmov.f64 d11, d0 +; CHECK-NEXT: vmov.u16 r0, q3[5] ; CHECK-NEXT: vmov.16 q6[1], r0 -; CHECK-NEXT: vmov.u16 r0, q0[0] +; CHECK-NEXT: vmov.u16 r0, q2[1] ; CHECK-NEXT: vmov.16 q6[2], r0 -; CHECK-NEXT: vmov.u16 r0, q2[0] -; CHECK-NEXT: vmov.16 q1[4], r0 -; CHECK-NEXT: vmov.u16 r0, q2[4] -; CHECK-NEXT: vmov.16 q1[5], r0 -; CHECK-NEXT: vmov.u16 r0, q3[0] -; CHECK-NEXT: vmov.16 q1[6], r0 -; CHECK-NEXT: vmov.u16 r0, q3[4] -; CHECK-NEXT: vmov.16 q1[7], r0 -; CHECK-NEXT: vmov.u16 r0, q0[4] +; CHECK-NEXT: vmov.u16 r0, q0[1] +; CHECK-NEXT: vins.f16 s22, s2 +; CHECK-NEXT: vmov.f32 s23, s4 +; CHECK-NEXT: vins.f16 s23, s6 +; CHECK-NEXT: vmov.f32 s20, s12 +; CHECK-NEXT: vins.f16 s20, s14 +; CHECK-NEXT: vmov.16 q3[4], r0 +; CHECK-NEXT: vmov.u16 r0, q0[5] +; CHECK-NEXT: vmov.f32 s21, s8 +; CHECK-NEXT: vmov.16 q3[5], r0 +; CHECK-NEXT: vmov.u16 r0, q1[1] +; CHECK-NEXT: vmov.16 q3[6], r0 +; CHECK-NEXT: vmov.u16 r0, q1[5] +; CHECK-NEXT: vmov.16 q3[7], r0 +; CHECK-NEXT: vmov.u16 r0, q2[5] ; CHECK-NEXT: vmov.16 q6[3], r0 -; CHECK-NEXT: vmov.f32 s26, s6 -; CHECK-NEXT: vmov.f32 s27, s7 -; CHECK-NEXT: vadd.i16 q0, q6, q5 +; CHECK-NEXT: vins.f16 s21, s10 +; CHECK-NEXT: vmov.f32 s26, s14 +; CHECK-NEXT: vmov.f32 s27, s15 +; CHECK-NEXT: vadd.i16 q0, q5, q6 ; CHECK-NEXT: vadd.i16 q0, q0, q4 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} @@ -1099,27 +1079,16 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r0] ; CHECK-NEXT: vmovx.f16 s4, s3 -; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vmovx.f16 s4, s1 -; CHECK-NEXT: vmov r2, s4 -; CHECK-NEXT: vmov.16 q1[0], r2 -; CHECK-NEXT: vmov r2, s1 -; CHECK-NEXT: vmov.16 q1[1], r0 -; CHECK-NEXT: vmov r0, s3 -; CHECK-NEXT: vmov.16 q2[0], r2 -; CHECK-NEXT: vmov.16 q2[1], r0 -; CHECK-NEXT: vadd.f16 q1, q2, q1 -; CHECK-NEXT: vmovx.f16 s8, s0 -; CHECK-NEXT: vmov r0, s8 +; CHECK-NEXT: vmovx.f16 s8, s1 +; CHECK-NEXT: vins.f16 s1, s3 +; CHECK-NEXT: vins.f16 s8, s4 +; CHECK-NEXT: vmov.f32 s4, s1 +; CHECK-NEXT: vmovx.f16 s12, s0 +; CHECK-NEXT: vadd.f16 q1, q1, q2 ; CHECK-NEXT: vmovx.f16 s8, s2 -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: vmov.16 q2[0], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q2[1], r2 -; CHECK-NEXT: vmov r2, s2 -; CHECK-NEXT: vmov.16 q0[0], r0 -; CHECK-NEXT: vmov.16 q0[1], r2 -; CHECK-NEXT: vadd.f16 q0, q0, q2 +; CHECK-NEXT: vins.f16 s12, s8 +; CHECK-NEXT: vins.f16 s0, s2 +; CHECK-NEXT: vadd.f16 q0, q0, q3 ; CHECK-NEXT: vadd.f16 q0, q0, q1 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: str r0, [r1] @@ -1142,50 +1111,29 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .vsave {d8} ; CHECK-NEXT: vpush {d8} -; CHECK-NEXT: vldrh.u16 q1, [r0] -; CHECK-NEXT: vldrh.u16 q0, [r0, #16] -; CHECK-NEXT: vmov r2, s5 -; CHECK-NEXT: vmovx.f16 s12, s5 -; CHECK-NEXT: vmov r3, s7 -; CHECK-NEXT: vmov.16 q2[0], r2 -; CHECK-NEXT: vmov.16 q2[1], r3 -; CHECK-NEXT: vmov r0, s1 -; CHECK-NEXT: vmov.16 q2[2], r0 -; CHECK-NEXT: vmov r0, s12 -; CHECK-NEXT: vmovx.f16 s12, s7 -; CHECK-NEXT: vmovx.f16 s16, s1 -; CHECK-NEXT: vmov r2, s12 -; CHECK-NEXT: vmov.16 q3[0], r0 -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: vmov.16 q3[1], r2 -; CHECK-NEXT: vmovx.f16 s16, s3 -; CHECK-NEXT: vmov.16 q3[2], r0 -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: vmovx.f16 s16, s0 -; CHECK-NEXT: vmov.16 q3[3], r0 -; CHECK-NEXT: vmov r0, s3 -; CHECK-NEXT: vmov.16 q2[3], r0 -; CHECK-NEXT: vadd.f16 q2, q2, q3 -; CHECK-NEXT: vmovx.f16 s12, s4 -; CHECK-NEXT: vmov r0, s12 -; CHECK-NEXT: vmovx.f16 s12, s6 -; CHECK-NEXT: vmov r2, s12 -; CHECK-NEXT: vmov.16 q3[0], r0 -; CHECK-NEXT: vmov.16 q3[1], r2 -; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: vldrh.u16 q0, [r0] +; CHECK-NEXT: vmovx.f16 s8, s1 +; CHECK-NEXT: vins.f16 s1, s3 +; CHECK-NEXT: vmovx.f16 s4, s3 +; CHECK-NEXT: vmov.f32 s12, s1 +; CHECK-NEXT: vins.f16 s8, s4 +; CHECK-NEXT: vldrh.u16 q1, [r0, #16] +; CHECK-NEXT: vmovx.f16 s16, s7 +; CHECK-NEXT: vmovx.f16 s9, s5 +; CHECK-NEXT: vins.f16 s5, s7 +; CHECK-NEXT: vins.f16 s9, s16 +; CHECK-NEXT: vmov.f32 s13, s5 ; CHECK-NEXT: vmovx.f16 s16, s2 -; CHECK-NEXT: vmov.16 q3[2], r0 -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: vmov.16 q3[3], r0 -; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vmov r2, s6 -; CHECK-NEXT: vmov.16 q1[0], r0 -; CHECK-NEXT: vmov.16 q1[1], r2 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q1[2], r0 -; CHECK-NEXT: vmov r0, s2 -; CHECK-NEXT: vmov.16 q1[3], r0 -; CHECK-NEXT: vadd.f16 q0, q1, q3 +; CHECK-NEXT: vadd.f16 q2, q3, q2 +; CHECK-NEXT: vmovx.f16 s12, s0 +; CHECK-NEXT: vins.f16 s12, s16 +; CHECK-NEXT: vins.f16 s0, s2 +; CHECK-NEXT: vmovx.f16 s16, s6 +; CHECK-NEXT: vmovx.f16 s13, s4 +; CHECK-NEXT: vins.f16 s4, s6 +; CHECK-NEXT: vins.f16 s13, s16 +; CHECK-NEXT: vmov.f32 s1, s4 +; CHECK-NEXT: vadd.f16 q0, q0, q3 ; CHECK-NEXT: vadd.f16 q0, q0, q2 ; CHECK-NEXT: vmov r2, s1 ; CHECK-NEXT: vmov r0, s0 @@ -1285,117 +1233,58 @@ define void @vld4_v8f16_align1(<32 x half> *%src, <8 x half> *%dst) { ; CHECK-LABEL: vld4_v8f16_align1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: .pad #64 -; CHECK-NEXT: sub sp, #64 -; CHECK-NEXT: vldrb.u8 q6, [r0] -; CHECK-NEXT: vldrb.u8 q2, [r0, #16] -; CHECK-NEXT: vldrb.u8 q4, [r0, #32] -; CHECK-NEXT: vldrb.u8 q5, [r0, #48] -; CHECK-NEXT: vmov r2, s24 -; CHECK-NEXT: vstrw.32 q2, [sp] @ 16-byte Spill -; CHECK-NEXT: vmov.16 q0[0], r2 -; CHECK-NEXT: vmov r3, s26 -; CHECK-NEXT: vmov.16 q0[1], r3 -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: vmov.16 q0[2], r2 -; CHECK-NEXT: vstrw.32 q0, [sp, #48] @ 16-byte Spill -; CHECK-NEXT: vmovx.f16 s0, s19 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmovx.f16 s0, s17 -; CHECK-NEXT: vmov r3, s0 -; CHECK-NEXT: vmovx.f16 s0, s21 -; CHECK-NEXT: vmov.16 q1[4], r3 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q1[5], r2 -; CHECK-NEXT: vmovx.f16 s0, s23 -; CHECK-NEXT: vmov.16 q1[6], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmovx.f16 s0, s25 -; CHECK-NEXT: vmov.16 q1[7], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmovx.f16 s0, s27 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmov.16 q3[0], r0 -; CHECK-NEXT: vmovx.f16 s0, s9 -; CHECK-NEXT: vmov.16 q3[1], r2 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmovx.f16 s0, s11 -; CHECK-NEXT: vmov.16 q3[2], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q3[3], r0 -; CHECK-NEXT: vmov r0, s17 -; CHECK-NEXT: vmov.16 q0[4], r0 -; CHECK-NEXT: vmov r2, s19 -; CHECK-NEXT: vmov.16 q0[5], r2 -; CHECK-NEXT: vmov r0, s21 -; CHECK-NEXT: vmov.16 q0[6], r0 -; CHECK-NEXT: vmov r0, s23 -; CHECK-NEXT: vmov.16 q0[7], r0 -; CHECK-NEXT: vmov r0, s25 -; CHECK-NEXT: vmov.16 q7[0], r0 -; CHECK-NEXT: vmov r2, s27 -; CHECK-NEXT: vmov.16 q7[1], r2 -; CHECK-NEXT: vmov r0, s9 -; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill -; CHECK-NEXT: vmov.16 q7[2], r0 -; CHECK-NEXT: vmov r0, s11 -; CHECK-NEXT: vmovx.f16 s0, s16 -; CHECK-NEXT: vmov.16 q7[3], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmovx.f16 s0, s18 -; CHECK-NEXT: vstrw.32 q1, [sp, #32] @ 16-byte Spill -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmov.16 q1[4], r0 -; CHECK-NEXT: vmovx.f16 s8, s20 -; CHECK-NEXT: vmov.16 q1[5], r2 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmovx.f16 s8, s22 -; CHECK-NEXT: vmov.16 q1[6], r0 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmovx.f16 s8, s24 -; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload -; CHECK-NEXT: vmov.16 q1[7], r0 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmovx.f16 s8, s26 -; CHECK-NEXT: vmov.16 q6[0], r0 -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: vmovx.f16 s8, s0 -; CHECK-NEXT: vmov.16 q6[1], r2 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmovx.f16 s8, s2 -; CHECK-NEXT: vmov.16 q6[2], r0 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vldrw.u32 q2, [sp, #48] @ 16-byte Reload -; CHECK-NEXT: vmov.16 q6[3], r0 -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: vmov r2, s18 -; CHECK-NEXT: vmov.16 q4[4], r0 -; CHECK-NEXT: vmov.16 q4[5], r2 -; CHECK-NEXT: vmov r0, s20 -; CHECK-NEXT: vmov.16 q4[6], r0 -; CHECK-NEXT: vmov r0, s22 -; CHECK-NEXT: vmov.16 q4[7], r0 -; CHECK-NEXT: vmov r0, s2 -; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload -; CHECK-NEXT: vldrw.u32 q5, [sp, #32] @ 16-byte Reload -; CHECK-NEXT: vmov.16 q2[3], r0 -; CHECK-NEXT: vmov.f32 s26, s6 -; CHECK-NEXT: vmov.f32 s30, s2 -; CHECK-NEXT: vmov.f32 s31, s3 -; CHECK-NEXT: vmov q0, q2 -; CHECK-NEXT: vmov.f32 s14, s22 -; CHECK-NEXT: vmov.f32 s2, s18 -; CHECK-NEXT: vmov.f32 s15, s23 -; CHECK-NEXT: vmov.f32 s27, s7 -; CHECK-NEXT: vadd.f16 q3, q7, q3 -; CHECK-NEXT: vmov.f32 s3, s19 -; CHECK-NEXT: vadd.f16 q0, q0, q6 -; CHECK-NEXT: vadd.f16 q0, q0, q3 +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} +; CHECK-NEXT: vldrb.u8 q2, [r0, #32] +; CHECK-NEXT: vldrb.u8 q3, [r0, #48] +; CHECK-NEXT: vmovx.f16 s4, s11 +; CHECK-NEXT: vmovx.f16 s2, s9 +; CHECK-NEXT: vins.f16 s2, s4 +; CHECK-NEXT: vmovx.f16 s4, s15 +; CHECK-NEXT: vmovx.f16 s3, s13 +; CHECK-NEXT: vins.f16 s9, s11 +; CHECK-NEXT: vins.f16 s3, s4 +; CHECK-NEXT: vldrb.u8 q1, [r0] +; CHECK-NEXT: vmovx.f16 s28, s10 +; CHECK-NEXT: vmovx.f16 s26, s8 +; CHECK-NEXT: vmovx.f16 s16, s7 +; CHECK-NEXT: vmovx.f16 s0, s5 +; CHECK-NEXT: vins.f16 s0, s16 +; CHECK-NEXT: vldrb.u8 q4, [r0, #16] +; CHECK-NEXT: vins.f16 s13, s15 +; CHECK-NEXT: vins.f16 s26, s28 +; CHECK-NEXT: vmovx.f16 s20, s19 +; CHECK-NEXT: vmovx.f16 s1, s17 +; CHECK-NEXT: vins.f16 s1, s20 +; CHECK-NEXT: vmov.f32 s22, s9 +; CHECK-NEXT: vins.f16 s8, s10 +; CHECK-NEXT: vmov.f32 s23, s13 +; CHECK-NEXT: vmovx.f16 s28, s14 +; CHECK-NEXT: vmovx.f16 s27, s12 +; CHECK-NEXT: vmov.f32 s10, s8 +; CHECK-NEXT: vins.f16 s12, s14 +; CHECK-NEXT: vmov.f32 s11, s12 +; CHECK-NEXT: vins.f16 s27, s28 +; CHECK-NEXT: vins.f16 s5, s7 +; CHECK-NEXT: vmovx.f16 s28, s6 +; CHECK-NEXT: vmovx.f16 s24, s4 +; CHECK-NEXT: vmov.f32 s20, s5 +; CHECK-NEXT: vins.f16 s17, s19 +; CHECK-NEXT: vins.f16 s24, s28 +; CHECK-NEXT: vmov.f32 s21, s17 +; CHECK-NEXT: vmovx.f16 s28, s18 +; CHECK-NEXT: vmovx.f16 s25, s16 +; CHECK-NEXT: vins.f16 s4, s6 +; CHECK-NEXT: vins.f16 s16, s18 +; CHECK-NEXT: vins.f16 s25, s28 +; CHECK-NEXT: vmov.f32 s5, s16 +; CHECK-NEXT: vadd.f16 q0, q5, q0 +; CHECK-NEXT: vmov.f32 s6, s10 +; CHECK-NEXT: vmov.f32 s7, s11 +; CHECK-NEXT: vadd.f16 q1, q1, q6 +; CHECK-NEXT: vadd.f16 q0, q1, q0 ; CHECK-NEXT: vstrw.32 q0, [r1] -; CHECK-NEXT: add sp, #64 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} ; CHECK-NEXT: bx lr entry: %l1 = load <32 x half>, <32 x half>* %src, align 1 Index: llvm/test/CodeGen/Thumb2/mve-vldst4.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vldst4.ll +++ llvm/test/CodeGen/Thumb2/mve-vldst4.ll @@ -4,19 +4,19 @@ define void @vldst4(half* nocapture readonly %pIn, half* nocapture %pOut, i32 %numRows, i32 %numCols, i32 %scale.coerce) #0 { ; CHECK-LABEL: vldst4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r7, lr} -; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: .pad #80 -; CHECK-NEXT: sub sp, #80 +; CHECK-NEXT: .pad #24 +; CHECK-NEXT: sub sp, #24 ; CHECK-NEXT: mul r12, r3, r2 ; CHECK-NEXT: movs r2, #0 ; CHECK-NEXT: cmp.w r2, r12, lsr #2 ; CHECK-NEXT: beq.w .LBB0_3 ; CHECK-NEXT: @ %bb.1: @ %vector.ph ; CHECK-NEXT: mvn r3, #7 -; CHECK-NEXT: ldr r5, [sp, #160] +; CHECK-NEXT: ldr r2, [sp, #96] ; CHECK-NEXT: and.w r3, r3, r12, lsr #2 ; CHECK-NEXT: sub.w r12, r3, #8 ; CHECK-NEXT: movs r3, #1 @@ -24,215 +24,110 @@ ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrh.u16 q5, [r0, #32] -; CHECK-NEXT: vldrh.u16 q3, [r0, #48] -; CHECK-NEXT: vldrh.u16 q7, [r0], #64 -; CHECK-NEXT: vmov r2, s20 -; CHECK-NEXT: vmovx.f16 s8, s12 -; CHECK-NEXT: vmov.16 q0[4], r2 -; CHECK-NEXT: vmov r3, s22 -; CHECK-NEXT: vmov.16 q0[5], r3 -; CHECK-NEXT: vmov r2, s12 -; CHECK-NEXT: vmov.16 q0[6], r2 -; CHECK-NEXT: vmov r2, s28 -; CHECK-NEXT: vldrh.u16 q6, [r0, #-48] -; CHECK-NEXT: vmov.16 q1[0], r2 -; CHECK-NEXT: vmov r3, s30 -; CHECK-NEXT: vmov.16 q1[1], r3 -; CHECK-NEXT: vmov r2, s24 -; CHECK-NEXT: vmov.16 q1[2], r2 -; CHECK-NEXT: vmov r2, s14 -; CHECK-NEXT: vmov.16 q0[7], r2 -; CHECK-NEXT: vmov r2, s26 -; CHECK-NEXT: vmov.16 q1[3], r2 -; CHECK-NEXT: vmov.f32 s6, s2 -; CHECK-NEXT: vmov.f32 s7, s3 -; CHECK-NEXT: vmul.f16 q0, q1, r5 -; CHECK-NEXT: vmovx.f16 s4, s24 -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vstrw.32 q0, [sp, #48] @ 16-byte Spill -; CHECK-NEXT: vmovx.f16 s0, s30 -; CHECK-NEXT: vmov r3, s3 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmovx.f16 s0, s28 -; CHECK-NEXT: vmov r4, s0 -; CHECK-NEXT: vmov.16 q0[0], r4 -; CHECK-NEXT: vmov.16 q0[1], r2 -; CHECK-NEXT: vmov r2, s4 -; CHECK-NEXT: vmovx.f16 s4, s22 -; CHECK-NEXT: vmov.16 q0[2], r2 -; CHECK-NEXT: vmov r2, s4 -; CHECK-NEXT: vmovx.f16 s4, s20 -; CHECK-NEXT: vmov r4, s4 -; CHECK-NEXT: vmov.16 q1[4], r4 -; CHECK-NEXT: vmov.16 q1[5], r2 -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: vmovx.f16 s8, s14 -; CHECK-NEXT: vmov.16 q1[6], r2 -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: vmovx.f16 s8, s26 -; CHECK-NEXT: vmov.16 q1[7], r2 -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: vmov.16 q0[3], r2 -; CHECK-NEXT: vmovx.f16 s8, s13 -; CHECK-NEXT: vmov.f32 s2, s6 -; CHECK-NEXT: vmov.f32 s3, s7 -; CHECK-NEXT: vmov.16 q1[0], r3 -; CHECK-NEXT: vmul.f16 q0, q0, r5 -; CHECK-NEXT: vmov r3, s23 -; CHECK-NEXT: vmov r2, s3 -; CHECK-NEXT: vstrw.32 q0, [sp, #64] @ 16-byte Spill -; CHECK-NEXT: vmovx.f16 s0, s19 -; CHECK-NEXT: vmov.16 q1[1], r2 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmov.16 q1[4], r2 -; CHECK-NEXT: vmov r2, s21 -; CHECK-NEXT: vmov.16 q0[4], r2 -; CHECK-NEXT: vmov r2, s13 -; CHECK-NEXT: vmov.16 q0[5], r3 -; CHECK-NEXT: vmov r3, s29 -; CHECK-NEXT: vstrw.32 q1, [sp, #16] @ 16-byte Spill -; CHECK-NEXT: vmov.16 q0[6], r2 -; CHECK-NEXT: vmov r2, s31 -; CHECK-NEXT: vmov.16 q1[0], r3 -; CHECK-NEXT: vmov.16 q1[1], r2 -; CHECK-NEXT: vmov r2, s25 -; CHECK-NEXT: vmov.16 q1[2], r2 -; CHECK-NEXT: vmov r2, s15 -; CHECK-NEXT: vmov.16 q0[7], r2 -; CHECK-NEXT: vmov r2, s27 -; CHECK-NEXT: vmov.16 q1[3], r2 -; CHECK-NEXT: vmov.f32 s6, s2 -; CHECK-NEXT: vmov.f32 s7, s3 -; CHECK-NEXT: vmovx.f16 s0, s31 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmovx.f16 s0, s29 -; CHECK-NEXT: vmov r4, s0 -; CHECK-NEXT: vmul.f16 q4, q1, r5 -; CHECK-NEXT: vmov.16 q0[0], r4 -; CHECK-NEXT: vmovx.f16 s4, s25 -; CHECK-NEXT: vmov.16 q0[1], r2 -; CHECK-NEXT: vmov r2, s4 +; CHECK-NEXT: vldrh.u16 q3, [r0, #32] +; CHECK-NEXT: vldrh.u16 q5, [r0, #48] +; CHECK-NEXT: vldrh.u16 q4, [r0], #64 +; CHECK-NEXT: vmovx.f16 s4, s15 +; CHECK-NEXT: vmovx.f16 s2, s13 +; CHECK-NEXT: vins.f16 s2, s4 ; CHECK-NEXT: vmovx.f16 s4, s23 -; CHECK-NEXT: vmov.16 q0[2], r2 -; CHECK-NEXT: vmov r2, s4 -; CHECK-NEXT: vmovx.f16 s4, s21 -; CHECK-NEXT: vmov r4, s4 -; CHECK-NEXT: vstrw.32 q4, [sp, #32] @ 16-byte Spill -; CHECK-NEXT: vmov.16 q1[4], r4 -; CHECK-NEXT: vmov r3, s16 -; CHECK-NEXT: vmov.16 q1[5], r2 -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: vmovx.f16 s8, s15 -; CHECK-NEXT: vmov.16 q1[6], r2 -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: vmovx.f16 s8, s27 -; CHECK-NEXT: vmov.16 q1[7], r2 -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: vmov.16 q0[3], r2 -; CHECK-NEXT: vldrw.u32 q2, [sp, #48] @ 16-byte Reload -; CHECK-NEXT: vmov.f32 s2, s6 -; CHECK-NEXT: vmov.f32 s3, s7 -; CHECK-NEXT: vmov.16 q1[2], r3 -; CHECK-NEXT: vmul.f16 q6, q0, r5 -; CHECK-NEXT: vmovx.f16 s0, s16 -; CHECK-NEXT: vmov r2, s24 -; CHECK-NEXT: vmov.16 q1[3], r2 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmovx.f16 s0, s24 -; CHECK-NEXT: vmov.16 q1[6], r2 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmovx.f16 s0, s8 -; CHECK-NEXT: vmov.16 q1[7], r2 -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: vstrw.32 q1, [sp] @ 16-byte Spill -; CHECK-NEXT: vldrw.u32 q1, [sp, #64] @ 16-byte Reload -; CHECK-NEXT: vmov.16 q5[0], r2 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmov r3, s4 -; CHECK-NEXT: vmovx.f16 s0, s4 -; CHECK-NEXT: vmov.16 q5[1], r3 -; CHECK-NEXT: vmov r3, s25 -; CHECK-NEXT: vmov.16 q5[4], r2 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmov.16 q5[5], r2 -; CHECK-NEXT: vmov r2, s17 -; CHECK-NEXT: vmov.16 q3[2], r2 +; CHECK-NEXT: vmovx.f16 s3, s21 +; CHECK-NEXT: vldrh.u16 q7, [r0, #-48] +; CHECK-NEXT: vins.f16 s3, s4 +; CHECK-NEXT: vmovx.f16 s4, s19 ; CHECK-NEXT: vmovx.f16 s0, s17 -; CHECK-NEXT: vmov.16 q3[3], r3 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmovx.f16 s0, s25 -; CHECK-NEXT: vmov.16 q3[6], r2 -; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: vins.f16 s13, s15 +; CHECK-NEXT: vins.f16 s0, s4 +; CHECK-NEXT: vmovx.f16 s4, s31 +; CHECK-NEXT: vmovx.f16 s1, s29 +; CHECK-NEXT: vins.f16 s21, s23 +; CHECK-NEXT: vins.f16 s1, s4 +; CHECK-NEXT: vins.f16 s17, s19 +; CHECK-NEXT: vmul.f16 q1, q0, r2 +; CHECK-NEXT: vmov.f32 s2, s13 +; CHECK-NEXT: vmov.f32 s3, s21 +; CHECK-NEXT: vins.f16 s29, s31 +; CHECK-NEXT: vmov.f32 s0, s17 +; CHECK-NEXT: vmov.f32 s1, s29 +; CHECK-NEXT: vmul.f16 q2, q0, r2 +; CHECK-NEXT: vmovx.f16 s0, s4 +; CHECK-NEXT: vmovx.f16 s27, s8 +; CHECK-NEXT: vins.f16 s8, s4 +; CHECK-NEXT: vmov.f32 s25, s8 +; CHECK-NEXT: vmovx.f16 s4, s7 +; CHECK-NEXT: vins.f16 s27, s0 +; CHECK-NEXT: vmovx.f16 s2, s12 +; CHECK-NEXT: vstrw.32 q6, [sp] @ 16-byte Spill +; CHECK-NEXT: vmovx.f16 s24, s14 +; CHECK-NEXT: vins.f16 s2, s24 +; CHECK-NEXT: vmovx.f16 s24, s22 +; CHECK-NEXT: vmovx.f16 s3, s20 +; CHECK-NEXT: vins.f16 s12, s14 +; CHECK-NEXT: vins.f16 s3, s24 +; CHECK-NEXT: vmovx.f16 s24, s18 +; CHECK-NEXT: vmovx.f16 s0, s16 +; CHECK-NEXT: vins.f16 s20, s22 +; CHECK-NEXT: vins.f16 s0, s24 +; CHECK-NEXT: vmovx.f16 s24, s30 +; CHECK-NEXT: vmovx.f16 s1, s28 +; CHECK-NEXT: vins.f16 s16, s18 +; CHECK-NEXT: vins.f16 s1, s24 +; CHECK-NEXT: vins.f16 s28, s30 +; CHECK-NEXT: vmul.f16 q6, q0, r2 +; CHECK-NEXT: vmov.f32 s2, s12 +; CHECK-NEXT: vmov.f32 s3, s20 +; CHECK-NEXT: vmov.f32 s17, s28 +; CHECK-NEXT: vmov.f32 s18, s2 +; CHECK-NEXT: vmov.f32 s19, s3 +; CHECK-NEXT: vmovx.f16 s2, s24 +; CHECK-NEXT: vmul.f16 q5, q4, r2 +; CHECK-NEXT: vmovx.f16 s0, s20 +; CHECK-NEXT: vins.f16 s20, s24 +; CHECK-NEXT: vins.f16 s0, s2 +; CHECK-NEXT: vmov q3, q5 +; CHECK-NEXT: vmov.f32 s14, s0 ; CHECK-NEXT: vmovx.f16 s0, s9 -; CHECK-NEXT: vmov.16 q3[7], r2 -; CHECK-NEXT: vmov r2, s9 -; CHECK-NEXT: vmov.16 q7[0], r2 -; CHECK-NEXT: vmov r3, s5 -; CHECK-NEXT: vmov.16 q7[1], r3 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmovx.f16 s0, s5 -; CHECK-NEXT: vmov.16 q7[4], r2 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vldrw.u32 q0, [sp, #32] @ 16-byte Reload -; CHECK-NEXT: vmov.16 q7[5], r2 -; CHECK-NEXT: vmov r3, s26 -; CHECK-NEXT: vmov r2, s2 -; CHECK-NEXT: vmovx.f16 s0, s2 -; CHECK-NEXT: vmov.16 q2[2], r2 -; CHECK-NEXT: vmov q4, q1 -; CHECK-NEXT: vldrw.u32 q1, [sp, #48] @ 16-byte Reload -; CHECK-NEXT: vmov.16 q2[3], r3 -; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: vins.f16 s9, s5 +; CHECK-NEXT: vmovx.f16 s2, s5 +; CHECK-NEXT: vins.f16 s0, s2 +; CHECK-NEXT: vmov q4, q2 +; CHECK-NEXT: vmov.f32 s19, s0 +; CHECK-NEXT: vmovx.f16 s31, s10 +; CHECK-NEXT: vmovx.f16 s18, s21 +; CHECK-NEXT: vins.f16 s21, s25 +; CHECK-NEXT: vins.f16 s10, s6 +; CHECK-NEXT: vmov.f32 s16, s21 +; CHECK-NEXT: vmovx.f16 s0, s25 +; CHECK-NEXT: vmov.f32 s29, s10 +; CHECK-NEXT: vins.f16 s18, s0 +; CHECK-NEXT: vmovx.f16 s0, s6 +; CHECK-NEXT: vins.f16 s31, s0 ; CHECK-NEXT: vmovx.f16 s0, s26 -; CHECK-NEXT: vmov.16 q2[6], r2 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmov.16 q2[7], r2 -; CHECK-NEXT: vmov r2, s6 -; CHECK-NEXT: vmov r3, s18 -; CHECK-NEXT: vmov.16 q0[0], r2 -; CHECK-NEXT: vmovx.f16 s4, s6 -; CHECK-NEXT: vmov.16 q0[1], r3 -; CHECK-NEXT: vmov r2, s4 -; CHECK-NEXT: vmovx.f16 s4, s18 -; CHECK-NEXT: vldrw.u32 q4, [sp, #32] @ 16-byte Reload -; CHECK-NEXT: vmov.16 q0[4], r2 -; CHECK-NEXT: vmov r2, s4 -; CHECK-NEXT: vmov.16 q0[5], r2 -; CHECK-NEXT: vmov r2, s19 -; CHECK-NEXT: vmov r3, s27 -; CHECK-NEXT: vmov.16 q1[2], r2 -; CHECK-NEXT: vmovx.f16 s16, s19 -; CHECK-NEXT: vmov.16 q1[3], r3 -; CHECK-NEXT: vmov r2, s16 -; CHECK-NEXT: vmovx.f16 s16, s27 -; CHECK-NEXT: vmov.16 q1[6], r2 -; CHECK-NEXT: vmov r2, s16 -; CHECK-NEXT: vldrw.u32 q4, [sp, #64] @ 16-byte Reload -; CHECK-NEXT: vmov.16 q1[7], r2 -; CHECK-NEXT: vmov.f32 s1, s9 -; CHECK-NEXT: vldrw.u32 q6, [sp] @ 16-byte Reload -; CHECK-NEXT: vmovx.f16 s16, s19 -; CHECK-NEXT: vmov.f32 s3, s11 -; CHECK-NEXT: vmov r2, s16 -; CHECK-NEXT: vldrw.u32 q4, [sp, #16] @ 16-byte Reload -; CHECK-NEXT: vmov.f32 s21, s25 -; CHECK-NEXT: vstrh.16 q0, [r1, #32] -; CHECK-NEXT: vmov.16 q4[5], r2 -; CHECK-NEXT: vmov.f32 s29, s13 -; CHECK-NEXT: vmov q2, q4 -; CHECK-NEXT: vmov.f32 s23, s27 -; CHECK-NEXT: vmov.f32 s9, s5 -; CHECK-NEXT: vmov.f32 s11, s7 -; CHECK-NEXT: vstrh.16 q2, [r1, #48] -; CHECK-NEXT: vstrh.16 q5, [r1], #64 -; CHECK-NEXT: vmov.f32 s31, s15 -; CHECK-NEXT: vstrh.16 q7, [r1, #-48] +; CHECK-NEXT: vmovx.f16 s30, s22 +; CHECK-NEXT: vins.f16 s22, s26 +; CHECK-NEXT: vmov.f32 s28, s22 +; CHECK-NEXT: vins.f16 s30, s0 +; CHECK-NEXT: vmovx.f16 s3, s11 +; CHECK-NEXT: vins.f16 s11, s7 +; CHECK-NEXT: vstrh.16 q7, [r1, #32] +; CHECK-NEXT: vmov.f32 s1, s11 +; CHECK-NEXT: vins.f16 s3, s4 +; CHECK-NEXT: vmovx.f16 s4, s27 +; CHECK-NEXT: vmovx.f16 s2, s23 +; CHECK-NEXT: vins.f16 s23, s27 +; CHECK-NEXT: vmov.f32 s0, s23 +; CHECK-NEXT: vins.f16 s2, s4 +; CHECK-NEXT: vldrw.u32 q1, [sp] @ 16-byte Reload +; CHECK-NEXT: vstrh.16 q0, [r1, #48] +; CHECK-NEXT: vmov.f32 s13, s5 +; CHECK-NEXT: vmov.f32 s15, s7 +; CHECK-NEXT: vstrh.16 q3, [r1], #64 +; CHECK-NEXT: vstrh.16 q4, [r1, #-48] ; CHECK-NEXT: le lr, .LBB0_2 ; CHECK-NEXT: .LBB0_3: @ %while.end -; CHECK-NEXT: add sp, #80 +; CHECK-NEXT: add sp, #24 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: pop {r4, r5, r7, pc} +; CHECK-NEXT: pop {r7, pc} entry: %tmp.0.extract.trunc = trunc i32 %scale.coerce to i16 %l0 = bitcast i16 %tmp.0.extract.trunc to half Index: llvm/test/CodeGen/Thumb2/mve-vmovn.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vmovn.ll +++ llvm/test/CodeGen/Thumb2/mve-vmovn.ll @@ -393,45 +393,29 @@ define arm_aapcs_vfpcc <8 x i16> @vmovn16_b2(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: vmovn16_b2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov q2, q0 -; CHECK-NEXT: vmov.u16 r0, q1[1] -; CHECK-NEXT: vmov.16 q0[0], r0 -; CHECK-NEXT: vmov.u16 r0, q2[0] -; CHECK-NEXT: vmov.16 q0[1], r0 -; CHECK-NEXT: vmov.u16 r0, q1[3] -; CHECK-NEXT: vmov.16 q0[2], r0 -; CHECK-NEXT: vmov.u16 r0, q2[2] -; CHECK-NEXT: vmov.16 q0[3], r0 -; CHECK-NEXT: vmov.u16 r0, q1[5] -; CHECK-NEXT: vmov.16 q0[4], r0 -; CHECK-NEXT: vmov.u16 r0, q2[4] -; CHECK-NEXT: vmov.16 q0[5], r0 -; CHECK-NEXT: vmov.u16 r0, q1[7] -; CHECK-NEXT: vmov.16 q0[6], r0 -; CHECK-NEXT: vmov.u16 r0, q2[6] -; CHECK-NEXT: vmov.16 q0[7], r0 +; CHECK-NEXT: vmovx.f16 s9, s5 +; CHECK-NEXT: vins.f16 s9, s1 +; CHECK-NEXT: vmovx.f16 s8, s4 +; CHECK-NEXT: vins.f16 s8, s0 +; CHECK-NEXT: vmovx.f16 s10, s6 +; CHECK-NEXT: vins.f16 s10, s2 +; CHECK-NEXT: vmovx.f16 s11, s7 +; CHECK-NEXT: vins.f16 s11, s3 +; CHECK-NEXT: vmov q0, q2 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn16_b2: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vrev64.16 q2, q1 -; CHECKBE-NEXT: vrev64.16 q3, q0 -; CHECKBE-NEXT: vmov.u16 r0, q2[1] -; CHECKBE-NEXT: vmov.16 q1[0], r0 -; CHECKBE-NEXT: vmov.u16 r0, q3[0] -; CHECKBE-NEXT: vmov.16 q1[1], r0 -; CHECKBE-NEXT: vmov.u16 r0, q2[3] -; CHECKBE-NEXT: vmov.16 q1[2], r0 -; CHECKBE-NEXT: vmov.u16 r0, q3[2] -; CHECKBE-NEXT: vmov.16 q1[3], r0 -; CHECKBE-NEXT: vmov.u16 r0, q2[5] -; CHECKBE-NEXT: vmov.16 q1[4], r0 -; CHECKBE-NEXT: vmov.u16 r0, q3[4] -; CHECKBE-NEXT: vmov.16 q1[5], r0 -; CHECKBE-NEXT: vmov.u16 r0, q2[7] -; CHECKBE-NEXT: vmov.16 q1[6], r0 -; CHECKBE-NEXT: vmov.u16 r0, q3[6] -; CHECKBE-NEXT: vmov.16 q1[7], r0 +; CHECKBE-NEXT: vrev64.16 q2, q0 +; CHECKBE-NEXT: vrev64.16 q0, q1 +; CHECKBE-NEXT: vmovx.f16 s5, s1 +; CHECKBE-NEXT: vins.f16 s5, s9 +; CHECKBE-NEXT: vmovx.f16 s4, s0 +; CHECKBE-NEXT: vins.f16 s4, s8 +; CHECKBE-NEXT: vmovx.f16 s6, s2 +; CHECKBE-NEXT: vins.f16 s6, s10 +; CHECKBE-NEXT: vmovx.f16 s7, s3 +; CHECKBE-NEXT: vins.f16 s7, s11 ; CHECKBE-NEXT: vrev64.16 q0, q1 ; CHECKBE-NEXT: bx lr entry: @@ -442,46 +426,30 @@ define arm_aapcs_vfpcc <8 x i16> @vmovn16_b3(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: vmovn16_b3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.u16 r0, q0[1] ; CHECK-NEXT: vmov q2, q0 -; CHECK-NEXT: vmov.16 q0[0], r0 -; CHECK-NEXT: vmov.u16 r0, q1[0] -; CHECK-NEXT: vmov.16 q0[1], r0 -; CHECK-NEXT: vmov.u16 r0, q2[3] -; CHECK-NEXT: vmov.16 q0[2], r0 -; CHECK-NEXT: vmov.u16 r0, q1[2] -; CHECK-NEXT: vmov.16 q0[3], r0 -; CHECK-NEXT: vmov.u16 r0, q2[5] -; CHECK-NEXT: vmov.16 q0[4], r0 -; CHECK-NEXT: vmov.u16 r0, q1[4] -; CHECK-NEXT: vmov.16 q0[5], r0 -; CHECK-NEXT: vmov.u16 r0, q2[7] -; CHECK-NEXT: vmov.16 q0[6], r0 -; CHECK-NEXT: vmov.u16 r0, q1[6] -; CHECK-NEXT: vmov.16 q0[7], r0 +; CHECK-NEXT: vmovx.f16 s1, s9 +; CHECK-NEXT: vins.f16 s1, s5 +; CHECK-NEXT: vmovx.f16 s0, s8 +; CHECK-NEXT: vins.f16 s0, s4 +; CHECK-NEXT: vmovx.f16 s2, s10 +; CHECK-NEXT: vins.f16 s2, s6 +; CHECK-NEXT: vmovx.f16 s3, s11 +; CHECK-NEXT: vins.f16 s3, s7 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn16_b3: ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vrev64.16 q3, q0 +; CHECKBE-NEXT: vrev64.16 q2, q1 +; CHECKBE-NEXT: vmovx.f16 s5, s13 +; CHECKBE-NEXT: vins.f16 s5, s9 +; CHECKBE-NEXT: vmovx.f16 s4, s12 +; CHECKBE-NEXT: vins.f16 s4, s8 +; CHECKBE-NEXT: vmovx.f16 s6, s14 +; CHECKBE-NEXT: vins.f16 s6, s10 +; CHECKBE-NEXT: vmovx.f16 s7, s15 +; CHECKBE-NEXT: vins.f16 s7, s11 ; CHECKBE-NEXT: vrev64.16 q0, q1 -; CHECKBE-NEXT: vmov.u16 r0, q3[1] -; CHECKBE-NEXT: vmov.16 q2[0], r0 -; CHECKBE-NEXT: vmov.u16 r0, q0[0] -; CHECKBE-NEXT: vmov.16 q2[1], r0 -; CHECKBE-NEXT: vmov.u16 r0, q3[3] -; CHECKBE-NEXT: vmov.16 q2[2], r0 -; CHECKBE-NEXT: vmov.u16 r0, q0[2] -; CHECKBE-NEXT: vmov.16 q2[3], r0 -; CHECKBE-NEXT: vmov.u16 r0, q3[5] -; CHECKBE-NEXT: vmov.16 q2[4], r0 -; CHECKBE-NEXT: vmov.u16 r0, q0[4] -; CHECKBE-NEXT: vmov.16 q2[5], r0 -; CHECKBE-NEXT: vmov.u16 r0, q3[7] -; CHECKBE-NEXT: vmov.16 q2[6], r0 -; CHECKBE-NEXT: vmov.u16 r0, q0[6] -; CHECKBE-NEXT: vmov.16 q2[7], r0 -; CHECKBE-NEXT: vrev64.16 q0, q2 ; CHECKBE-NEXT: bx lr entry: %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> Index: llvm/test/CodeGen/Thumb2/mve-vmovnstore.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vmovnstore.ll +++ llvm/test/CodeGen/Thumb2/mve-vmovnstore.ll @@ -314,22 +314,14 @@ define arm_aapcs_vfpcc void @vmovn16_b2(<8 x i16> %src1, <8 x i16> %src2, <8 x i16> *%dest) { ; CHECK-LABEL: vmovn16_b2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.u16 r1, q1[1] -; CHECK-NEXT: vmov.16 q2[0], r1 -; CHECK-NEXT: vmov.u16 r1, q0[0] -; CHECK-NEXT: vmov.16 q2[1], r1 -; CHECK-NEXT: vmov.u16 r1, q1[3] -; CHECK-NEXT: vmov.16 q2[2], r1 -; CHECK-NEXT: vmov.u16 r1, q0[2] -; CHECK-NEXT: vmov.16 q2[3], r1 -; CHECK-NEXT: vmov.u16 r1, q1[5] -; CHECK-NEXT: vmov.16 q2[4], r1 -; CHECK-NEXT: vmov.u16 r1, q0[4] -; CHECK-NEXT: vmov.16 q2[5], r1 -; CHECK-NEXT: vmov.u16 r1, q1[7] -; CHECK-NEXT: vmov.16 q2[6], r1 -; CHECK-NEXT: vmov.u16 r1, q0[6] -; CHECK-NEXT: vmov.16 q2[7], r1 +; CHECK-NEXT: vmovx.f16 s9, s5 +; CHECK-NEXT: vins.f16 s9, s1 +; CHECK-NEXT: vmovx.f16 s8, s4 +; CHECK-NEXT: vins.f16 s8, s0 +; CHECK-NEXT: vmovx.f16 s10, s6 +; CHECK-NEXT: vins.f16 s10, s2 +; CHECK-NEXT: vmovx.f16 s11, s7 +; CHECK-NEXT: vins.f16 s11, s3 ; CHECK-NEXT: vstrw.32 q2, [r0] ; CHECK-NEXT: bx lr entry: @@ -341,22 +333,14 @@ define arm_aapcs_vfpcc void @vmovn16_b3(<8 x i16> %src1, <8 x i16> %src2, <8 x i16> *%dest) { ; CHECK-LABEL: vmovn16_b3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.u16 r1, q0[1] -; CHECK-NEXT: vmov.16 q2[0], r1 -; CHECK-NEXT: vmov.u16 r1, q1[0] -; CHECK-NEXT: vmov.16 q2[1], r1 -; CHECK-NEXT: vmov.u16 r1, q0[3] -; CHECK-NEXT: vmov.16 q2[2], r1 -; CHECK-NEXT: vmov.u16 r1, q1[2] -; CHECK-NEXT: vmov.16 q2[3], r1 -; CHECK-NEXT: vmov.u16 r1, q0[5] -; CHECK-NEXT: vmov.16 q2[4], r1 -; CHECK-NEXT: vmov.u16 r1, q1[4] -; CHECK-NEXT: vmov.16 q2[5], r1 -; CHECK-NEXT: vmov.u16 r1, q0[7] -; CHECK-NEXT: vmov.16 q2[6], r1 -; CHECK-NEXT: vmov.u16 r1, q1[6] -; CHECK-NEXT: vmov.16 q2[7], r1 +; CHECK-NEXT: vmovx.f16 s9, s1 +; CHECK-NEXT: vins.f16 s9, s5 +; CHECK-NEXT: vmovx.f16 s8, s0 +; CHECK-NEXT: vins.f16 s8, s4 +; CHECK-NEXT: vmovx.f16 s10, s2 +; CHECK-NEXT: vins.f16 s10, s6 +; CHECK-NEXT: vmovx.f16 s11, s3 +; CHECK-NEXT: vins.f16 s11, s7 ; CHECK-NEXT: vstrw.32 q2, [r0] ; CHECK-NEXT: bx lr entry: Index: llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll +++ llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll @@ -140,31 +140,23 @@ define arm_aapcs_vfpcc <8 x i16> @vqdmulh_i16_interleaved(<8 x i16> %s0, <8 x i16> %s1) { ; CHECK-LABEL: vqdmulh_i16_interleaved: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.u16 r0, q0[0] -; CHECK-NEXT: vmov.16 q2[0], r0 -; CHECK-NEXT: vmov.u16 r0, q0[2] -; CHECK-NEXT: vmov.16 q2[1], r0 -; CHECK-NEXT: vmov.u16 r0, q0[4] -; CHECK-NEXT: vmov.16 q2[2], r0 -; CHECK-NEXT: vmov.u16 r0, q0[6] -; CHECK-NEXT: vmov.16 q2[3], r0 +; CHECK-NEXT: vmov.f32 s9, s2 ; CHECK-NEXT: vmov.u16 r0, q0[1] +; CHECK-NEXT: vins.f16 s9, s3 +; CHECK-NEXT: vmov.f32 s8, s0 +; CHECK-NEXT: vins.f16 s8, s1 ; CHECK-NEXT: vmov.16 q2[4], r0 ; CHECK-NEXT: vmov.u16 r0, q0[3] ; CHECK-NEXT: vmov.16 q2[5], r0 ; CHECK-NEXT: vmov.u16 r0, q0[5] ; CHECK-NEXT: vmov.16 q2[6], r0 ; CHECK-NEXT: vmov.u16 r0, q0[7] +; CHECK-NEXT: vmov.f32 s1, s6 ; CHECK-NEXT: vmov.16 q2[7], r0 -; CHECK-NEXT: vmov.u16 r0, q1[0] -; CHECK-NEXT: vmov.16 q0[0], r0 -; CHECK-NEXT: vmov.u16 r0, q1[2] -; CHECK-NEXT: vmov.16 q0[1], r0 -; CHECK-NEXT: vmov.u16 r0, q1[4] -; CHECK-NEXT: vmov.16 q0[2], r0 -; CHECK-NEXT: vmov.u16 r0, q1[6] -; CHECK-NEXT: vmov.16 q0[3], r0 +; CHECK-NEXT: vins.f16 s1, s7 ; CHECK-NEXT: vmov.u16 r0, q1[1] +; CHECK-NEXT: vmov.f32 s0, s4 +; CHECK-NEXT: vins.f16 s0, s5 ; CHECK-NEXT: vmov.16 q0[4], r0 ; CHECK-NEXT: vmov.u16 r0, q1[3] ; CHECK-NEXT: vmov.16 q0[5], r0 @@ -173,18 +165,15 @@ ; CHECK-NEXT: vmov.u16 r0, q1[7] ; CHECK-NEXT: vmov.16 q0[7], r0 ; CHECK-NEXT: vqdmulh.s16 q1, q0, q2 -; CHECK-NEXT: vmov.u16 r0, q1[0] -; CHECK-NEXT: vmov.16 q0[0], r0 -; CHECK-NEXT: vmov.u16 r0, q1[4] -; CHECK-NEXT: vmov.16 q0[1], r0 +; CHECK-NEXT: vmov.f32 s0, s4 ; CHECK-NEXT: vmov.u16 r0, q1[1] +; CHECK-NEXT: vins.f16 s0, s6 +; CHECK-NEXT: vmov.f32 s8, s5 ; CHECK-NEXT: vmov.16 q0[2], r0 ; CHECK-NEXT: vmov.u16 r0, q1[5] ; CHECK-NEXT: vmov.16 q0[3], r0 -; CHECK-NEXT: vmov.u16 r0, q1[2] -; CHECK-NEXT: vmov.16 q0[4], r0 -; CHECK-NEXT: vmov.u16 r0, q1[6] -; CHECK-NEXT: vmov.16 q0[5], r0 +; CHECK-NEXT: vins.f16 s8, s7 +; CHECK-NEXT: vmov.f32 s2, s8 ; CHECK-NEXT: vmov.u16 r0, q1[3] ; CHECK-NEXT: vmov.16 q0[6], r0 ; CHECK-NEXT: vmov.u16 r0, q1[7] Index: llvm/test/CodeGen/Thumb2/mve-vst2.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vst2.ll +++ llvm/test/CodeGen/Thumb2/mve-vst2.ll @@ -208,40 +208,32 @@ ; CHECK-LABEL: vst2_v8i16_align1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: vldrw.u32 q2, [r0, #16] -; CHECK-NEXT: vmov.u16 r2, q1[4] -; CHECK-NEXT: vmov.u16 r0, q2[4] -; CHECK-NEXT: vmov.16 q0[0], r2 -; CHECK-NEXT: vmov.16 q0[1], r0 +; CHECK-NEXT: vldrw.u32 q0, [r0, #16] +; CHECK-NEXT: vmov.f64 d4, d3 ; CHECK-NEXT: vmov.u16 r0, q1[5] -; CHECK-NEXT: vmov.16 q0[2], r0 -; CHECK-NEXT: vmov.u16 r0, q2[5] -; CHECK-NEXT: vmov.16 q0[3], r0 -; CHECK-NEXT: vmov.u16 r0, q1[6] -; CHECK-NEXT: vmov.16 q0[4], r0 -; CHECK-NEXT: vmov.u16 r0, q2[6] -; CHECK-NEXT: vmov.16 q0[5], r0 +; CHECK-NEXT: vins.f16 s8, s2 +; CHECK-NEXT: vmov.f64 d6, d2 +; CHECK-NEXT: vmov.16 q2[2], r0 +; CHECK-NEXT: vmov.u16 r0, q0[5] +; CHECK-NEXT: vmov.16 q2[3], r0 ; CHECK-NEXT: vmov.u16 r0, q1[7] -; CHECK-NEXT: vmov.16 q0[6], r0 -; CHECK-NEXT: vmov.u16 r0, q2[7] -; CHECK-NEXT: vmov.16 q0[7], r0 -; CHECK-NEXT: vmov.u16 r0, q1[0] -; CHECK-NEXT: vmov.16 q3[0], r0 -; CHECK-NEXT: vmov.u16 r0, q2[0] -; CHECK-NEXT: vmov.16 q3[1], r0 +; CHECK-NEXT: vmov.f32 s10, s7 +; CHECK-NEXT: vins.f16 s12, s0 +; CHECK-NEXT: vins.f16 s10, s3 +; CHECK-NEXT: vmov.16 q2[6], r0 +; CHECK-NEXT: vmov.u16 r0, q0[7] +; CHECK-NEXT: vmov.16 q2[7], r0 ; CHECK-NEXT: vmov.u16 r0, q1[1] ; CHECK-NEXT: vmov.16 q3[2], r0 -; CHECK-NEXT: vmov.u16 r0, q2[1] +; CHECK-NEXT: vmov.u16 r0, q0[1] ; CHECK-NEXT: vmov.16 q3[3], r0 -; CHECK-NEXT: vmov.u16 r0, q1[2] -; CHECK-NEXT: vmov.16 q3[4], r0 -; CHECK-NEXT: vmov.u16 r0, q2[2] -; CHECK-NEXT: vmov.16 q3[5], r0 ; CHECK-NEXT: vmov.u16 r0, q1[3] +; CHECK-NEXT: vmov.f32 s14, s5 +; CHECK-NEXT: vstrb.8 q2, [r1, #16] +; CHECK-NEXT: vins.f16 s14, s1 ; CHECK-NEXT: vmov.16 q3[6], r0 -; CHECK-NEXT: vmov.u16 r0, q2[3] +; CHECK-NEXT: vmov.u16 r0, q0[3] ; CHECK-NEXT: vmov.16 q3[7], r0 -; CHECK-NEXT: vstrb.8 q0, [r1, #16] ; CHECK-NEXT: vstrb.8 q3, [r1] ; CHECK-NEXT: bx lr entry: @@ -522,20 +514,14 @@ ; CHECK-LABEL: vst2_v2f16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: ldrd r2, r0, [r0] -; CHECK-NEXT: vmov.32 q0[0], r2 -; CHECK-NEXT: vmov.32 q1[0], r0 -; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: vmov.32 q1[0], r2 +; CHECK-NEXT: vmov.32 q0[0], r0 +; CHECK-NEXT: vmovx.f16 s5, s4 +; CHECK-NEXT: vins.f16 s4, s0 ; CHECK-NEXT: vmovx.f16 s0, s0 +; CHECK-NEXT: vins.f16 s5, s0 ; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vmov.16 q2[0], r2 -; CHECK-NEXT: vmov.16 q2[1], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmovx.f16 s0, s4 -; CHECK-NEXT: vmov.16 q2[2], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q2[3], r0 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmov r2, s9 +; CHECK-NEXT: vmov r2, s5 ; CHECK-NEXT: str r0, [r1] ; CHECK-NEXT: str r2, [r1, #4] ; CHECK-NEXT: bx lr @@ -552,33 +538,24 @@ define void @vst2_v4f16(<4 x half> *%src, <8 x half> *%dst) { ; CHECK-LABEL: vst2_v4f16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: ldm.w r0, {r2, r3, r12} -; CHECK-NEXT: vmov.32 q0[0], r12 -; CHECK-NEXT: ldr r0, [r0, #12] -; CHECK-NEXT: vmov.32 q2[0], r2 -; CHECK-NEXT: vmov.32 q2[1], r3 +; CHECK-NEXT: ldrd r2, r12, [r0] +; CHECK-NEXT: ldrd r3, r0, [r0, #8] +; CHECK-NEXT: vmov.32 q1[0], r2 +; CHECK-NEXT: vmov.32 q0[0], r3 +; CHECK-NEXT: vmov.32 q1[1], r12 ; CHECK-NEXT: vmov.32 q0[1], r0 -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: vmovx.f16 s12, s8 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q1[0], r2 -; CHECK-NEXT: vmov.16 q1[1], r0 -; CHECK-NEXT: vmov r0, s12 -; CHECK-NEXT: vmovx.f16 s12, s0 -; CHECK-NEXT: vmov.16 q1[2], r0 -; CHECK-NEXT: vmov r0, s12 -; CHECK-NEXT: vmovx.f16 s8, s9 -; CHECK-NEXT: vmov.16 q1[3], r0 -; CHECK-NEXT: vmov r0, s9 -; CHECK-NEXT: vmov.16 q1[4], r0 -; CHECK-NEXT: vmov r0, s1 -; CHECK-NEXT: vmov.16 q1[5], r0 -; CHECK-NEXT: vmov r0, s8 +; CHECK-NEXT: vmovx.f16 s12, s4 +; CHECK-NEXT: vmovx.f16 s8, s0 +; CHECK-NEXT: vins.f16 s4, s0 +; CHECK-NEXT: vins.f16 s12, s8 +; CHECK-NEXT: vmov q2, q1 +; CHECK-NEXT: vmov.f32 s9, s12 ; CHECK-NEXT: vmovx.f16 s0, s1 -; CHECK-NEXT: vmov.16 q1[6], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q1[7], r0 -; CHECK-NEXT: vstrh.16 q1, [r1] +; CHECK-NEXT: vmovx.f16 s11, s5 +; CHECK-NEXT: vins.f16 s5, s1 +; CHECK-NEXT: vmov.f32 s10, s5 +; CHECK-NEXT: vins.f16 s11, s0 +; CHECK-NEXT: vstrh.16 q2, [r1] ; CHECK-NEXT: bx lr entry: %s1 = getelementptr <4 x half>, <4 x half>* %src, i32 0 @@ -633,49 +610,30 @@ define void @vst2_v8f16_align1(<8 x half> *%src, <16 x half> *%dst) { ; CHECK-LABEL: vst2_v8f16_align1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vldrw.u32 q0, [r0, #16] -; CHECK-NEXT: vmov r2, s6 -; CHECK-NEXT: vmovx.f16 s12, s6 -; CHECK-NEXT: vmov.16 q2[0], r2 -; CHECK-NEXT: vmov r0, s2 -; CHECK-NEXT: vmov.16 q2[1], r0 -; CHECK-NEXT: vmov r0, s12 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vmovx.f16 s9, s6 +; CHECK-NEXT: vins.f16 s6, s2 +; CHECK-NEXT: vmov.f32 s8, s6 ; CHECK-NEXT: vmovx.f16 s12, s2 -; CHECK-NEXT: vmov.16 q2[2], r0 -; CHECK-NEXT: vmov r0, s12 -; CHECK-NEXT: vmovx.f16 s12, s7 -; CHECK-NEXT: vmov.16 q2[3], r0 -; CHECK-NEXT: vmov r0, s7 -; CHECK-NEXT: vmov.16 q2[4], r0 -; CHECK-NEXT: vmov r0, s3 -; CHECK-NEXT: vmov.16 q2[5], r0 -; CHECK-NEXT: vmov r0, s12 +; CHECK-NEXT: vins.f16 s9, s12 ; CHECK-NEXT: vmovx.f16 s12, s3 -; CHECK-NEXT: vmov.16 q2[6], r0 -; CHECK-NEXT: vmov r0, s12 +; CHECK-NEXT: vmovx.f16 s11, s7 +; CHECK-NEXT: vins.f16 s7, s3 +; CHECK-NEXT: vmov.f32 s10, s7 +; CHECK-NEXT: vins.f16 s11, s12 ; CHECK-NEXT: vmovx.f16 s12, s4 -; CHECK-NEXT: vmov.16 q2[7], r0 -; CHECK-NEXT: vmov r2, s4 ; CHECK-NEXT: vstrb.8 q2, [r1, #16] -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q2[0], r2 -; CHECK-NEXT: vmovx.f16 s4, s5 -; CHECK-NEXT: vmov.16 q2[1], r0 -; CHECK-NEXT: vmov r0, s12 -; CHECK-NEXT: vmovx.f16 s12, s0 -; CHECK-NEXT: vmov.16 q2[2], r0 -; CHECK-NEXT: vmov r0, s12 +; CHECK-NEXT: vmovx.f16 s8, s0 +; CHECK-NEXT: vins.f16 s4, s0 +; CHECK-NEXT: vins.f16 s12, s8 +; CHECK-NEXT: vmov q2, q1 ; CHECK-NEXT: vmovx.f16 s0, s1 -; CHECK-NEXT: vmov.16 q2[3], r0 -; CHECK-NEXT: vmov r0, s5 -; CHECK-NEXT: vmov.16 q2[4], r0 -; CHECK-NEXT: vmov r0, s1 -; CHECK-NEXT: vmov.16 q2[5], r0 -; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vmov.16 q2[6], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q2[7], r0 +; CHECK-NEXT: vmov.f32 s9, s12 +; CHECK-NEXT: vmovx.f16 s11, s5 +; CHECK-NEXT: vins.f16 s5, s1 +; CHECK-NEXT: vmov.f32 s10, s5 +; CHECK-NEXT: vins.f16 s11, s0 ; CHECK-NEXT: vstrb.8 q2, [r1] ; CHECK-NEXT: bx lr entry: Index: llvm/test/CodeGen/Thumb2/mve-vst3.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vst3.ll +++ llvm/test/CodeGen/Thumb2/mve-vst3.ll @@ -385,78 +385,74 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vldrw.u32 q1, [r0, #32] -; CHECK-NEXT: vldrw.u32 q2, [r0] -; CHECK-NEXT: vldrw.u32 q4, [r0, #16] -; CHECK-NEXT: vmov.u16 r2, q1[2] -; CHECK-NEXT: vmov.16 q0[0], r2 -; CHECK-NEXT: vmov.u16 r2, q2[3] -; CHECK-NEXT: vmov.16 q0[1], r2 -; CHECK-NEXT: vmov.u16 r2, q1[4] -; CHECK-NEXT: vmov.16 q0[6], r2 -; CHECK-NEXT: vmov.u16 r2, q2[5] -; CHECK-NEXT: vmov.16 q0[7], r2 -; CHECK-NEXT: vmov.u16 r2, q2[0] -; CHECK-NEXT: vmov.16 q3[0], r2 -; CHECK-NEXT: vmov.u16 r0, q4[0] -; CHECK-NEXT: vmov.16 q3[1], r0 -; CHECK-NEXT: vmov.u16 r0, q4[1] -; CHECK-NEXT: vmov.16 q3[4], r0 -; CHECK-NEXT: vmov.u16 r0, q2[2] -; CHECK-NEXT: vmov.16 q3[6], r0 -; CHECK-NEXT: vmov.u16 r0, q4[2] -; CHECK-NEXT: vmov r2, s4 -; CHECK-NEXT: vmov.16 q3[7], r0 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vldrw.u32 q3, [r0, #32] +; CHECK-NEXT: vldrw.u32 q2, [r0, #16] +; CHECK-NEXT: vmov.f64 d8, d2 +; CHECK-NEXT: vmov.u16 r0, q3[2] +; CHECK-NEXT: vmov.16 q0[0], r0 +; CHECK-NEXT: vmov.u16 r0, q1[3] +; CHECK-NEXT: vmov.16 q0[1], r0 +; CHECK-NEXT: vmov.u16 r0, q3[4] +; CHECK-NEXT: vmov.16 q0[6], r0 +; CHECK-NEXT: vmov.u16 r0, q1[5] +; CHECK-NEXT: vmov.16 q0[7], r0 +; CHECK-NEXT: vins.f16 s16, s8 +; CHECK-NEXT: vmov.u16 r0, q2[1] +; CHECK-NEXT: vmov r2, s12 +; CHECK-NEXT: vmov.16 q4[4], r0 +; CHECK-NEXT: vins.f16 s5, s9 +; CHECK-NEXT: vmov.f32 s19, s5 ; CHECK-NEXT: vdup.32 q5, r2 -; CHECK-NEXT: vmov.f32 s13, s8 +; CHECK-NEXT: vmov.f32 s17, s4 ; CHECK-NEXT: vmov.u16 r2, q5[2] -; CHECK-NEXT: vmov.u16 r0, q3[3] ; CHECK-NEXT: vmov.16 q6[2], r2 -; CHECK-NEXT: vmov r2, s11 +; CHECK-NEXT: vmov.u16 r0, q4[3] ; CHECK-NEXT: vmov.16 q6[3], r0 -; CHECK-NEXT: vmov.u16 r0, q3[4] +; CHECK-NEXT: vmov.u16 r0, q4[4] ; CHECK-NEXT: vmov.16 q6[4], r0 ; CHECK-NEXT: vmov.u16 r0, q5[5] ; CHECK-NEXT: vmov.16 q6[5], r0 -; CHECK-NEXT: vmov.u16 r0, q4[5] +; CHECK-NEXT: vmov.u16 r0, q2[5] ; CHECK-NEXT: vmov.16 q5[0], r0 -; CHECK-NEXT: vmov.u16 r0, q1[5] +; CHECK-NEXT: vmov.u16 r0, q3[5] ; CHECK-NEXT: vmov.16 q5[1], r0 -; CHECK-NEXT: vmov.u16 r0, q4[6] +; CHECK-NEXT: vmov.u16 r0, q2[6] ; CHECK-NEXT: vmov.16 q5[3], r0 -; CHECK-NEXT: vmov.u16 r0, q4[7] -; CHECK-NEXT: vmov.f32 s13, s25 +; CHECK-NEXT: vmov.u16 r0, q2[7] +; CHECK-NEXT: vmov.f32 s17, s25 ; CHECK-NEXT: vmov.16 q5[6], r0 -; CHECK-NEXT: vmov.u16 r0, q1[7] -; CHECK-NEXT: vmov.f32 s14, s26 +; CHECK-NEXT: vmov.u16 r0, q3[7] +; CHECK-NEXT: vmov r2, s7 +; CHECK-NEXT: vmov.f32 s18, s26 ; CHECK-NEXT: vmov.16 q5[7], r0 ; CHECK-NEXT: vdup.32 q6, r2 -; CHECK-NEXT: vmov.f32 s1, s5 +; CHECK-NEXT: vmov.f32 s22, s15 ; CHECK-NEXT: vmov.u16 r2, q6[2] -; CHECK-NEXT: vmov.f32 s22, s7 -; CHECK-NEXT: vrev32.16 q4, q4 -; CHECK-NEXT: vmov.16 q7[2], r2 ; CHECK-NEXT: vmov.u16 r0, q5[3] -; CHECK-NEXT: vmov.u16 r2, q4[2] -; CHECK-NEXT: vmov.f32 s2, s10 +; CHECK-NEXT: vmov.16 q7[2], r2 +; CHECK-NEXT: vmov.f32 s1, s13 ; CHECK-NEXT: vmov.16 q7[3], r0 ; CHECK-NEXT: vmov.u16 r0, q5[4] -; CHECK-NEXT: vstrw.32 q3, [r1] -; CHECK-NEXT: vmov.16 q3[2], r2 -; CHECK-NEXT: vmov.u16 r2, q0[3] +; CHECK-NEXT: vrev32.16 q2, q2 ; CHECK-NEXT: vmov.16 q7[4], r0 ; CHECK-NEXT: vmov.u16 r0, q6[5] -; CHECK-NEXT: vmov.16 q3[3], r2 -; CHECK-NEXT: vmov.u16 r2, q0[4] +; CHECK-NEXT: vmov.u16 r2, q2[2] +; CHECK-NEXT: vmov.f32 s2, s6 ; CHECK-NEXT: vmov.16 q7[5], r0 -; CHECK-NEXT: vmov.u16 r0, q4[5] -; CHECK-NEXT: vmov.16 q3[4], r2 -; CHECK-NEXT: vmov.16 q3[5], r0 +; CHECK-NEXT: vmov.u16 r0, q2[5] +; CHECK-NEXT: vmov.16 q2[2], r2 +; CHECK-NEXT: vmov.u16 r2, q0[3] ; CHECK-NEXT: vmov.f32 s21, s29 -; CHECK-NEXT: vmov.f32 s1, s13 +; CHECK-NEXT: vmov.16 q2[3], r2 +; CHECK-NEXT: vmov.u16 r2, q0[4] +; CHECK-NEXT: vmov.16 q2[4], r2 ; CHECK-NEXT: vmov.f32 s22, s30 -; CHECK-NEXT: vmov.f32 s2, s14 +; CHECK-NEXT: vmov.16 q2[5], r0 ; CHECK-NEXT: vstrw.32 q5, [r1, #32] +; CHECK-NEXT: vmov.f32 s1, s9 +; CHECK-NEXT: vstrw.32 q4, [r1] +; CHECK-NEXT: vmov.f32 s2, s10 ; CHECK-NEXT: vstrw.32 q0, [r1, #16] ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: bx lr @@ -482,181 +478,173 @@ ; CHECK-NEXT: .pad #160 ; CHECK-NEXT: sub sp, #160 ; CHECK-NEXT: vldrw.u32 q0, [r0, #16] -; CHECK-NEXT: vldrw.u32 q1, [r0, #48] -; CHECK-NEXT: vmov.u16 r2, q0[0] +; CHECK-NEXT: vldrw.u32 q7, [r0, #48] +; CHECK-NEXT: vldrw.u32 q1, [r0, #32] +; CHECK-NEXT: vmov.f64 d4, d0 +; CHECK-NEXT: vmov.u16 r2, q7[1] ; CHECK-NEXT: vstrw.32 q0, [sp, #128] @ 16-byte Spill -; CHECK-NEXT: vmov.16 q2[0], r2 -; CHECK-NEXT: vmov.u16 r2, q1[0] -; CHECK-NEXT: vmov.16 q2[1], r2 -; CHECK-NEXT: vmov.u16 r2, q1[1] +; CHECK-NEXT: vstrw.32 q1, [sp, #16] @ 16-byte Spill +; CHECK-NEXT: vins.f16 s8, s28 ; CHECK-NEXT: vmov.16 q2[4], r2 -; CHECK-NEXT: vmov.u16 r2, q0[2] -; CHECK-NEXT: vmov.16 q2[6], r2 -; CHECK-NEXT: vmov.u16 r2, q1[2] -; CHECK-NEXT: vmov.16 q2[7], r2 -; CHECK-NEXT: vmov q7, q1 -; CHECK-NEXT: vmov.f32 s9, s0 +; CHECK-NEXT: vmov.f32 s11, s1 +; CHECK-NEXT: vins.f16 s11, s29 +; CHECK-NEXT: vmov q4, q2 +; CHECK-NEXT: vmov.f32 s17, s0 ; CHECK-NEXT: vldrw.u32 q0, [r0, #80] -; CHECK-NEXT: vmov.u16 r2, q2[3] -; CHECK-NEXT: vmov q3, q2 +; CHECK-NEXT: vmov.u16 r2, q4[3] +; CHECK-NEXT: vstrw.32 q4, [sp, #80] @ 16-byte Spill ; CHECK-NEXT: vmov r3, s0 -; CHECK-NEXT: vmov q1, q0 +; CHECK-NEXT: vmov q2, q0 ; CHECK-NEXT: vdup.32 q0, r3 -; CHECK-NEXT: vstrw.32 q2, [sp, #96] @ 16-byte Spill ; CHECK-NEXT: vmov.u16 r3, q0[2] -; CHECK-NEXT: vstrw.32 q7, [sp, #32] @ 16-byte Spill -; CHECK-NEXT: vmov.16 q2[2], r3 -; CHECK-NEXT: vmov.16 q2[3], r2 -; CHECK-NEXT: vmov.u16 r2, q3[4] -; CHECK-NEXT: vldrw.u32 q3, [r0, #32] -; CHECK-NEXT: vmov.16 q2[4], r2 -; CHECK-NEXT: vmov.u16 r2, q0[5] -; CHECK-NEXT: vldrw.u32 q0, [r0, #64] -; CHECK-NEXT: vmov.16 q2[5], r2 -; CHECK-NEXT: vmov.u16 r2, q3[5] -; CHECK-NEXT: vmov.16 q5[0], r2 +; CHECK-NEXT: vmov.16 q3[2], r3 +; CHECK-NEXT: vmov.16 q3[3], r2 +; CHECK-NEXT: vmov.u16 r2, q4[4] +; CHECK-NEXT: vmov.16 q3[4], r2 ; CHECK-NEXT: vmov.u16 r2, q0[5] -; CHECK-NEXT: vmov.16 q5[1], r2 -; CHECK-NEXT: vmov.u16 r2, q3[6] -; CHECK-NEXT: vmov.16 q5[3], r2 -; CHECK-NEXT: vmov.u16 r2, q3[7] -; CHECK-NEXT: vmov.16 q5[6], r2 -; CHECK-NEXT: vmov.u16 r2, q0[7] -; CHECK-NEXT: vmov.16 q5[7], r2 -; CHECK-NEXT: vstrw.32 q0, [sp, #144] @ 16-byte Spill -; CHECK-NEXT: vmov.f32 s22, s3 ; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vstrw.32 q2, [sp, #112] @ 16-byte Spill -; CHECK-NEXT: vmov.u16 r2, q5[3] -; CHECK-NEXT: vmov r0, s3 -; CHECK-NEXT: vmov q2, q0 +; CHECK-NEXT: vmov.16 q3[5], r2 +; CHECK-NEXT: vmov.u16 r2, q1[1] +; CHECK-NEXT: vstrw.32 q3, [sp, #96] @ 16-byte Spill +; CHECK-NEXT: vmov.f64 d12, d0 +; CHECK-NEXT: vstrw.32 q0, [sp, #144] @ 16-byte Spill +; CHECK-NEXT: vins.f16 s24, s4 +; CHECK-NEXT: vmov.16 q6[4], r2 +; CHECK-NEXT: vmov.f32 s27, s1 +; CHECK-NEXT: vins.f16 s27, s5 +; CHECK-NEXT: vmov.f32 s25, s0 +; CHECK-NEXT: vldrw.u32 q0, [r0, #64] +; CHECK-NEXT: vmov.u16 r2, q6[3] +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: vmov q5, q0 ; CHECK-NEXT: vdup.32 q0, r0 -; CHECK-NEXT: vstrw.32 q2, [sp, #48] @ 16-byte Spill +; CHECK-NEXT: vstrw.32 q5, [sp, #112] @ 16-byte Spill ; CHECK-NEXT: vmov.u16 r0, q0[2] -; CHECK-NEXT: vmov.16 q4[2], r0 -; CHECK-NEXT: vmov.u16 r0, q5[4] -; CHECK-NEXT: vmov.16 q4[3], r2 -; CHECK-NEXT: vmov.16 q4[4], r0 +; CHECK-NEXT: vmov.16 q3[2], r0 +; CHECK-NEXT: vmov.u16 r0, q6[4] +; CHECK-NEXT: vmov.16 q3[3], r2 +; CHECK-NEXT: vmov.16 q3[4], r0 ; CHECK-NEXT: vmov.u16 r0, q0[5] -; CHECK-NEXT: vmov.16 q4[5], r0 -; CHECK-NEXT: vmov.u16 r0, q2[0] -; CHECK-NEXT: vmov.16 q6[0], r0 -; CHECK-NEXT: vmov.u16 r0, q3[0] +; CHECK-NEXT: vmov.16 q3[5], r0 +; CHECK-NEXT: vmov.u16 r0, q1[5] +; CHECK-NEXT: vmov.16 q4[0], r0 +; CHECK-NEXT: vmov.u16 r0, q5[5] ; CHECK-NEXT: vldrw.u32 q0, [sp, #144] @ 16-byte Reload -; CHECK-NEXT: vmov.16 q6[1], r0 -; CHECK-NEXT: vmov.u16 r0, q3[1] -; CHECK-NEXT: vstrw.32 q4, [sp, #80] @ 16-byte Spill -; CHECK-NEXT: vmov.16 q6[4], r0 -; CHECK-NEXT: vmov.u16 r0, q2[2] -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmov.16 q6[6], r0 -; CHECK-NEXT: vmov.u16 r0, q3[2] +; CHECK-NEXT: vmov.16 q4[1], r0 +; CHECK-NEXT: vmov.u16 r0, q1[6] +; CHECK-NEXT: vstrw.32 q3, [sp, #64] @ 16-byte Spill +; CHECK-NEXT: vmov.16 q4[3], r0 +; CHECK-NEXT: vmov.u16 r0, q1[7] +; CHECK-NEXT: vmov r2, s3 +; CHECK-NEXT: vmov.16 q4[6], r0 +; CHECK-NEXT: vmov.u16 r0, q5[7] ; CHECK-NEXT: vdup.32 q0, r2 -; CHECK-NEXT: vmov.16 q6[7], r0 +; CHECK-NEXT: vmov.16 q4[7], r0 ; CHECK-NEXT: vmov.u16 r2, q0[2] -; CHECK-NEXT: vmov.f32 s25, s8 -; CHECK-NEXT: vmov.16 q2[2], r2 -; CHECK-NEXT: vmov.u16 r0, q6[3] -; CHECK-NEXT: vmov q4, q1 -; CHECK-NEXT: vmov.16 q2[3], r0 -; CHECK-NEXT: vmov.u16 r0, q6[4] -; CHECK-NEXT: vmov.16 q2[4], r0 +; CHECK-NEXT: vmov.f32 s18, s23 +; CHECK-NEXT: vmov.16 q1[2], r2 +; CHECK-NEXT: vmov.u16 r0, q4[3] +; CHECK-NEXT: vmov q3, q2 +; CHECK-NEXT: vmov.16 q1[3], r0 +; CHECK-NEXT: vmov.u16 r0, q4[4] +; CHECK-NEXT: vmov.16 q1[4], r0 ; CHECK-NEXT: vmov.u16 r0, q0[5] -; CHECK-NEXT: vmov.16 q2[5], r0 +; CHECK-NEXT: vmov.16 q1[5], r0 ; CHECK-NEXT: vmov.u16 r0, q7[5] -; CHECK-NEXT: vmov.16 q0[0], r0 -; CHECK-NEXT: vmov.u16 r0, q1[5] -; CHECK-NEXT: vmov.16 q0[1], r0 +; CHECK-NEXT: vstrw.32 q1, [sp, #48] @ 16-byte Spill +; CHECK-NEXT: vmov.16 q1[0], r0 +; CHECK-NEXT: vmov.u16 r0, q2[5] +; CHECK-NEXT: vstrw.32 q3, [sp] @ 16-byte Spill +; CHECK-NEXT: vmov.16 q1[1], r0 ; CHECK-NEXT: vmov.u16 r0, q7[6] -; CHECK-NEXT: vmov.16 q0[3], r0 +; CHECK-NEXT: vmov.16 q1[3], r0 ; CHECK-NEXT: vmov.u16 r0, q7[7] -; CHECK-NEXT: vmov.16 q0[6], r0 -; CHECK-NEXT: vmov.u16 r0, q1[7] -; CHECK-NEXT: vldrw.u32 q1, [sp, #128] @ 16-byte Reload -; CHECK-NEXT: vmov.16 q0[7], r0 -; CHECK-NEXT: vmov.f32 s2, s19 -; CHECK-NEXT: vstrw.32 q2, [sp, #64] @ 16-byte Spill -; CHECK-NEXT: vmov r2, s7 -; CHECK-NEXT: vmov.u16 r0, q0[3] -; CHECK-NEXT: vdup.32 q7, r2 -; CHECK-NEXT: vrev32.16 q3, q3 -; CHECK-NEXT: vmov.u16 r2, q7[2] -; CHECK-NEXT: vstrw.32 q3, [sp] @ 16-byte Spill -; CHECK-NEXT: vmov.16 q2[2], r2 -; CHECK-NEXT: vstrw.32 q4, [sp, #16] @ 16-byte Spill -; CHECK-NEXT: vmov.16 q2[3], r0 -; CHECK-NEXT: vmov.u16 r0, q0[4] -; CHECK-NEXT: vmov.16 q2[4], r0 -; CHECK-NEXT: vmov.u16 r0, q7[5] -; CHECK-NEXT: vmov.16 q2[5], r0 -; CHECK-NEXT: vmov.u16 r0, q4[2] -; CHECK-NEXT: vmov.16 q3[0], r0 +; CHECK-NEXT: vmov.16 q1[6], r0 +; CHECK-NEXT: vmov.u16 r0, q2[7] +; CHECK-NEXT: vldrw.u32 q2, [sp, #128] @ 16-byte Reload +; CHECK-NEXT: vmov.16 q1[7], r0 +; CHECK-NEXT: vmov.f32 s6, s15 +; CHECK-NEXT: vmov r2, s11 ; CHECK-NEXT: vmov.u16 r0, q1[3] -; CHECK-NEXT: vmov.16 q3[1], r0 -; CHECK-NEXT: vmov.u16 r0, q4[4] -; CHECK-NEXT: vldrw.u32 q7, [sp, #32] @ 16-byte Reload -; CHECK-NEXT: vmov.16 q3[6], r0 -; CHECK-NEXT: vmov.u16 r0, q1[5] -; CHECK-NEXT: vldrw.u32 q1, [sp, #144] @ 16-byte Reload -; CHECK-NEXT: vldrw.u32 q4, [sp, #48] @ 16-byte Reload -; CHECK-NEXT: vrev32.16 q7, q7 -; CHECK-NEXT: vmov.16 q3[7], r0 -; CHECK-NEXT: vmov.u16 r0, q1[2] -; CHECK-NEXT: vstrw.32 q7, [sp, #32] @ 16-byte Spill +; CHECK-NEXT: vdup.32 q0, r2 +; CHECK-NEXT: vmov.u16 r2, q0[2] +; CHECK-NEXT: vmov.16 q5[2], r2 +; CHECK-NEXT: vmov.16 q5[3], r0 +; CHECK-NEXT: vmov.u16 r0, q1[4] +; CHECK-NEXT: vmov.16 q5[4], r0 +; CHECK-NEXT: vmov.u16 r0, q0[5] +; CHECK-NEXT: vmov.16 q5[5], r0 +; CHECK-NEXT: vmov.u16 r0, q3[2] +; CHECK-NEXT: vrev32.16 q0, q7 ; CHECK-NEXT: vmov.16 q7[0], r0 -; CHECK-NEXT: vmov.u16 r0, q4[3] -; CHECK-NEXT: vmov.f32 s1, s9 +; CHECK-NEXT: vmov.u16 r0, q2[3] +; CHECK-NEXT: vstrw.32 q0, [sp, #32] @ 16-byte Spill ; CHECK-NEXT: vmov.16 q7[1], r0 -; CHECK-NEXT: vmov.u16 r0, q1[4] -; CHECK-NEXT: vldrw.u32 q1, [sp, #64] @ 16-byte Reload -; CHECK-NEXT: vmov.f32 s2, s10 -; CHECK-NEXT: vldrw.u32 q2, [sp, #96] @ 16-byte Reload +; CHECK-NEXT: vmov.u16 r0, q3[4] +; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload ; CHECK-NEXT: vmov.16 q7[6], r0 -; CHECK-NEXT: vmov.f32 s25, s5 -; CHECK-NEXT: vmov.u16 r0, q4[5] -; CHECK-NEXT: vmov.f32 s26, s6 -; CHECK-NEXT: vldrw.u32 q1, [sp, #80] @ 16-byte Reload -; CHECK-NEXT: vstrw.32 q0, [r1, #80] -; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload -; CHECK-NEXT: vmov.f32 s21, s5 +; CHECK-NEXT: vmov.u16 r0, q2[5] +; CHECK-NEXT: vldrw.u32 q2, [sp, #112] @ 16-byte Reload +; CHECK-NEXT: vldrw.u32 q3, [sp, #144] @ 16-byte Reload +; CHECK-NEXT: vrev32.16 q0, q0 ; CHECK-NEXT: vmov.16 q7[7], r0 -; CHECK-NEXT: vmov.f32 s22, s6 -; CHECK-NEXT: vldrw.u32 q1, [sp, #112] @ 16-byte Reload +; CHECK-NEXT: vmov.u16 r0, q2[2] +; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill +; CHECK-NEXT: vmov.16 q0[0], r0 +; CHECK-NEXT: vmov.u16 r0, q3[3] +; CHECK-NEXT: vmov.f32 s5, s21 +; CHECK-NEXT: vmov.16 q0[1], r0 +; CHECK-NEXT: vmov.u16 r0, q2[4] +; CHECK-NEXT: vldrw.u32 q2, [sp, #64] @ 16-byte Reload +; CHECK-NEXT: vmov.16 q0[6], r0 +; CHECK-NEXT: vmov.u16 r0, q3[5] +; CHECK-NEXT: vldrw.u32 q3, [sp, #80] @ 16-byte Reload +; CHECK-NEXT: vmov.f32 s25, s9 +; CHECK-NEXT: vmov.16 q0[7], r0 +; CHECK-NEXT: vmov.f32 s26, s10 +; CHECK-NEXT: vldrw.u32 q2, [sp, #96] @ 16-byte Reload +; CHECK-NEXT: vmov.f32 s6, s22 +; CHECK-NEXT: vstrw.32 q6, [r1] +; CHECK-NEXT: vmov.f32 s13, s9 +; CHECK-NEXT: vstrw.32 q1, [r1, #80] +; CHECK-NEXT: vmov.f32 s14, s10 +; CHECK-NEXT: vldrw.u32 q2, [sp, #48] @ 16-byte Reload +; CHECK-NEXT: vldrw.u32 q1, [sp, #16] @ 16-byte Reload +; CHECK-NEXT: vstrw.32 q3, [r1, #48] +; CHECK-NEXT: vmov.f32 s17, s9 +; CHECK-NEXT: vmov.f32 s18, s10 +; CHECK-NEXT: vldrw.u32 q2, [sp, #112] @ 16-byte Reload +; CHECK-NEXT: vmov.u16 r2, q1[2] +; CHECK-NEXT: vmov.u16 r0, q1[5] +; CHECK-NEXT: vmov.f32 s1, s9 +; CHECK-NEXT: vldrw.u32 q2, [sp, #144] @ 16-byte Reload +; CHECK-NEXT: vmov.16 q1[2], r2 +; CHECK-NEXT: vstrw.32 q4, [r1, #32] +; CHECK-NEXT: vmov.f32 s2, s10 +; CHECK-NEXT: vmov.u16 r2, q0[3] +; CHECK-NEXT: vmov.16 q1[3], r2 +; CHECK-NEXT: vmov.u16 r2, q0[4] +; CHECK-NEXT: vmov.16 q1[4], r2 +; CHECK-NEXT: vmov.16 q1[5], r0 +; CHECK-NEXT: vmov.f32 s1, s5 +; CHECK-NEXT: vmov.f32 s2, s6 +; CHECK-NEXT: vldrw.u32 q1, [sp] @ 16-byte Reload +; CHECK-NEXT: vstrw.32 q0, [r1, #16] +; CHECK-NEXT: vldrw.u32 q0, [sp, #32] @ 16-byte Reload +; CHECK-NEXT: vmov.f32 s29, s5 +; CHECK-NEXT: vldrw.u32 q1, [sp, #128] @ 16-byte Reload ; CHECK-NEXT: vmov.u16 r2, q0[2] ; CHECK-NEXT: vmov.u16 r0, q0[5] -; CHECK-NEXT: vmov.f32 s9, s5 +; CHECK-NEXT: vmov.f32 s30, s6 ; CHECK-NEXT: vmov.16 q0[2], r2 -; CHECK-NEXT: vmov.f32 s10, s6 -; CHECK-NEXT: vldrw.u32 q1, [sp, #144] @ 16-byte Reload -; CHECK-NEXT: vstrw.32 q5, [r1, #32] -; CHECK-NEXT: vstrw.32 q2, [r1, #48] -; CHECK-NEXT: vmov.f32 s29, s5 -; CHECK-NEXT: vldrw.u32 q1, [sp, #16] @ 16-byte Reload -; CHECK-NEXT: vmov.f32 s30, s18 -; CHECK-NEXT: vstrw.32 q6, [r1] ; CHECK-NEXT: vmov.u16 r2, q7[3] -; CHECK-NEXT: vmov.f32 s13, s5 ; CHECK-NEXT: vmov.16 q0[3], r2 ; CHECK-NEXT: vmov.u16 r2, q7[4] ; CHECK-NEXT: vmov.16 q0[4], r2 -; CHECK-NEXT: vldrw.u32 q1, [sp, #128] @ 16-byte Reload ; CHECK-NEXT: vmov.16 q0[5], r0 ; CHECK-NEXT: vmov.f32 s29, s1 ; CHECK-NEXT: vmov.f32 s30, s2 -; CHECK-NEXT: vldrw.u32 q0, [sp, #32] @ 16-byte Reload -; CHECK-NEXT: vmov.f32 s14, s6 -; CHECK-NEXT: vstrw.32 q7, [r1, #16] -; CHECK-NEXT: vmov.u16 r2, q0[2] -; CHECK-NEXT: vmov.u16 r0, q0[5] -; CHECK-NEXT: vmov.16 q0[2], r2 -; CHECK-NEXT: vmov.u16 r2, q3[3] -; CHECK-NEXT: vmov.16 q0[3], r2 -; CHECK-NEXT: vmov.u16 r2, q3[4] -; CHECK-NEXT: vmov.16 q0[4], r2 -; CHECK-NEXT: vmov.16 q0[5], r0 -; CHECK-NEXT: vmov.f32 s13, s1 -; CHECK-NEXT: vmov.f32 s14, s2 -; CHECK-NEXT: vstrw.32 q3, [r1, #64] +; CHECK-NEXT: vstrw.32 q7, [r1, #64] ; CHECK-NEXT: add sp, #160 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: bx lr @@ -1417,27 +1405,20 @@ define void @vst3_v2f16(<2 x half> *%src, <6 x half> *%dst) { ; CHECK-LABEL: vst3_v2f16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldmia r0, {s4, s5} -; CHECK-NEXT: vmov r2, s5 +; CHECK-NEXT: vldmia r0, {s0, s1} ; CHECK-NEXT: ldr r0, [r0, #8] -; CHECK-NEXT: vmov r3, s4 -; CHECK-NEXT: vmovx.f16 s12, s4 -; CHECK-NEXT: vmov.16 q0[0], r3 +; CHECK-NEXT: vmovx.f16 s4, s0 +; CHECK-NEXT: vins.f16 s0, s1 ; CHECK-NEXT: vmov.32 q2[0], r0 -; CHECK-NEXT: vmov.16 q0[1], r2 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmov.16 q0[2], r0 +; CHECK-NEXT: vmov q3, q0 +; CHECK-NEXT: vmovx.f16 s6, s8 +; CHECK-NEXT: vins.f16 s8, s4 +; CHECK-NEXT: vmov.f32 s13, s8 +; CHECK-NEXT: vmovx.f16 s14, s1 +; CHECK-NEXT: vins.f16 s14, s6 ; CHECK-NEXT: vmov r0, s12 -; CHECK-NEXT: vmovx.f16 s4, s5 -; CHECK-NEXT: vmov.16 q0[3], r0 -; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vmovx.f16 s4, s8 -; CHECK-NEXT: vmov.16 q0[4], r0 -; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vmov.16 q0[5], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov r2, s1 -; CHECK-NEXT: vmov r3, s2 +; CHECK-NEXT: vmov r2, s13 +; CHECK-NEXT: vmov r3, s14 ; CHECK-NEXT: stm r1!, {r0, r2, r3} ; CHECK-NEXT: bx lr entry: @@ -1463,42 +1444,28 @@ ; CHECK-NEXT: ldrd r3, r2, [r0, #8] ; CHECK-NEXT: ldrd r4, r0, [r0, #16] ; CHECK-NEXT: vmov q0[2], q0[0], lr, r3 -; CHECK-NEXT: vmov q0[3], q0[1], r12, r2 ; CHECK-NEXT: vmov.32 q1[0], r4 -; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: vmov q0[3], q0[1], r12, r2 +; CHECK-NEXT: vmovx.f16 s8, s0 ; CHECK-NEXT: vmov.32 q1[1], r0 -; CHECK-NEXT: vmov r2, s2 -; CHECK-NEXT: vmov.16 q2[0], r3 -; CHECK-NEXT: vmov.16 q2[1], r2 -; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vmovx.f16 s12, s0 -; CHECK-NEXT: vmov.16 q2[2], r0 -; CHECK-NEXT: vmov r0, s12 -; CHECK-NEXT: vmovx.f16 s12, s2 -; CHECK-NEXT: vmov.16 q2[3], r0 -; CHECK-NEXT: vmov r0, s12 +; CHECK-NEXT: vins.f16 s0, s2 ; CHECK-NEXT: vmovx.f16 s12, s4 -; CHECK-NEXT: vmov.16 q2[4], r0 -; CHECK-NEXT: vmov r0, s12 -; CHECK-NEXT: vmovx.f16 s0, s3 -; CHECK-NEXT: vmov.16 q2[5], r0 -; CHECK-NEXT: vmov r0, s1 -; CHECK-NEXT: vmov.16 q2[6], r0 -; CHECK-NEXT: vmov r0, s3 -; CHECK-NEXT: vmov.16 q2[7], r0 -; CHECK-NEXT: vmov r2, s5 +; CHECK-NEXT: vins.f16 s4, s8 +; CHECK-NEXT: vmov q2, q0 +; CHECK-NEXT: vmov.f32 s9, s4 +; CHECK-NEXT: vmovx.f16 s10, s2 +; CHECK-NEXT: vins.f16 s10, s12 +; CHECK-NEXT: vmovx.f16 s12, s1 +; CHECK-NEXT: vins.f16 s1, s3 +; CHECK-NEXT: vmov.f32 s11, s1 ; CHECK-NEXT: vstrw.32 q2, [r1] -; CHECK-NEXT: vmovx.f16 s8, s1 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmov.16 q2[0], r2 -; CHECK-NEXT: vmov.16 q2[1], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmovx.f16 s0, s5 -; CHECK-NEXT: vmov.16 q2[2], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q2[3], r0 -; CHECK-NEXT: vmov r0, s9 -; CHECK-NEXT: vmov r2, s8 +; CHECK-NEXT: vmovx.f16 s8, s5 +; CHECK-NEXT: vins.f16 s5, s12 +; CHECK-NEXT: vmov.f32 s4, s5 +; CHECK-NEXT: vmovx.f16 s5, s3 +; CHECK-NEXT: vins.f16 s5, s8 +; CHECK-NEXT: vmov r2, s4 +; CHECK-NEXT: vmov r0, s5 ; CHECK-NEXT: strd r2, r0, [r1, #16] ; CHECK-NEXT: pop {r4, pc} entry: @@ -1518,100 +1485,65 @@ define void @vst3_v8f16(<8 x half> *%src, <24 x half> *%dst) { ; CHECK-LABEL: vst3_v8f16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: .pad #16 -; CHECK-NEXT: sub sp, #16 -; CHECK-NEXT: vldrw.u32 q2, [r0] -; CHECK-NEXT: vldrw.u32 q1, [r0, #16] -; CHECK-NEXT: vldrw.u32 q5, [r0, #32] -; CHECK-NEXT: vmov r3, s8 -; CHECK-NEXT: vmovx.f16 s12, s4 -; CHECK-NEXT: vmov r2, s4 -; CHECK-NEXT: vmov.16 q0[0], r3 -; CHECK-NEXT: vmov.16 q0[1], r2 -; CHECK-NEXT: vmov r2, s12 -; CHECK-NEXT: vmov r0, s20 +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} +; CHECK-NEXT: vldrw.u32 q2, [r0, #16] +; CHECK-NEXT: vldrw.u32 q3, [r0] +; CHECK-NEXT: vldrw.u32 q1, [r0, #32] +; CHECK-NEXT: vmovx.f16 s0, s8 +; CHECK-NEXT: vmovx.f16 s20, s12 +; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: vmovx.f16 s24, s6 +; CHECK-NEXT: vmov.f64 d0, d6 +; CHECK-NEXT: vins.f16 s0, s8 +; CHECK-NEXT: vmov r0, s4 ; CHECK-NEXT: vmov.16 q0[4], r2 ; CHECK-NEXT: vdup.32 q4, r0 -; CHECK-NEXT: vmov r2, s9 -; CHECK-NEXT: vmov.16 q0[6], r2 -; CHECK-NEXT: vmov r2, s5 -; CHECK-NEXT: vmovx.f16 s12, s8 -; CHECK-NEXT: vmov r0, s17 -; CHECK-NEXT: vmov.16 q0[7], r2 -; CHECK-NEXT: vmov r2, s12 -; CHECK-NEXT: vmov.16 q3[2], r0 -; CHECK-NEXT: vmov.f32 s1, s8 -; CHECK-NEXT: vmov.16 q3[3], r2 -; CHECK-NEXT: vmov r0, s2 -; CHECK-NEXT: vmovx.f16 s16, s18 -; CHECK-NEXT: vmov.16 q3[4], r0 -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: vmovx.f16 s16, s22 -; CHECK-NEXT: vmov.16 q3[5], r0 -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: vmovx.f16 s16, s6 +; CHECK-NEXT: vmov.f32 s3, s13 +; CHECK-NEXT: vins.f16 s17, s20 +; CHECK-NEXT: vins.f16 s3, s9 +; CHECK-NEXT: vmovx.f16 s20, s18 +; CHECK-NEXT: vmov.f32 s1, s12 +; CHECK-NEXT: vins.f16 s2, s20 +; CHECK-NEXT: vmovx.f16 s20, s10 +; CHECK-NEXT: vins.f16 s20, s24 +; CHECK-NEXT: vmov r0, s11 +; CHECK-NEXT: vmov.16 q5[3], r0 ; CHECK-NEXT: vmovx.f16 s24, s7 -; CHECK-NEXT: vmov r2, s16 -; CHECK-NEXT: vstrw.32 q1, [sp] @ 16-byte Spill -; CHECK-NEXT: vmov.16 q4[0], r2 -; CHECK-NEXT: vmov r2, s11 -; CHECK-NEXT: vmov.16 q4[1], r0 -; CHECK-NEXT: vmov r0, s7 -; CHECK-NEXT: vmov.16 q4[3], r0 -; CHECK-NEXT: vmov r0, s24 -; CHECK-NEXT: vmovx.f16 s24, s23 -; CHECK-NEXT: vmov.16 q4[6], r0 -; CHECK-NEXT: vmov r0, s24 -; CHECK-NEXT: vdup.32 q7, r2 -; CHECK-NEXT: vmov.16 q4[7], r0 -; CHECK-NEXT: vmov r2, s29 -; CHECK-NEXT: vmov.f32 s18, s23 -; CHECK-NEXT: vmovx.f16 s24, s17 -; CHECK-NEXT: vmov r0, s24 -; CHECK-NEXT: vmov.16 q6[2], r2 -; CHECK-NEXT: vmov.16 q6[3], r0 -; CHECK-NEXT: vmovx.f16 s28, s30 -; CHECK-NEXT: vmovx.f16 s4, s10 -; CHECK-NEXT: vmov.f32 s1, s13 -; CHECK-NEXT: vmov.f32 s2, s14 +; CHECK-NEXT: vmovx.f16 s23, s11 +; CHECK-NEXT: vmov r0, s15 +; CHECK-NEXT: vins.f16 s23, s24 +; CHECK-NEXT: vdup.32 q6, r0 +; CHECK-NEXT: vmov.f32 s22, s7 +; CHECK-NEXT: vmovx.f16 s28, s21 +; CHECK-NEXT: vins.f16 s25, s28 +; CHECK-NEXT: vmovx.f16 s28, s26 +; CHECK-NEXT: vins.f16 s22, s28 +; CHECK-NEXT: vmovx.f16 s28, s13 +; CHECK-NEXT: vmov.f32 s4, s5 +; CHECK-NEXT: vrev32.16 q2, q2 +; CHECK-NEXT: vins.f16 s4, s28 +; CHECK-NEXT: vmovx.f16 s28, s14 +; CHECK-NEXT: vins.f16 s6, s28 +; CHECK-NEXT: vmov.f32 s18, s2 +; CHECK-NEXT: vmov.f32 s7, s6 +; CHECK-NEXT: vmov.f32 s6, s14 +; CHECK-NEXT: vmovx.f16 s12, s5 +; CHECK-NEXT: vins.f16 s9, s12 +; CHECK-NEXT: vmovx.f16 s12, s10 +; CHECK-NEXT: vins.f16 s6, s12 +; CHECK-NEXT: vmov.f32 s26, s22 +; CHECK-NEXT: vmov.f32 s10, s6 +; CHECK-NEXT: vmov.f32 s1, s17 +; CHECK-NEXT: vmov.f32 s21, s25 +; CHECK-NEXT: vmov.f32 s5, s9 +; CHECK-NEXT: vmov.f32 s2, s18 ; CHECK-NEXT: vstrw.32 q0, [r1] -; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: vmov.16 q6[4], r0 -; CHECK-NEXT: vmov r0, s28 -; CHECK-NEXT: vmov.16 q6[5], r0 -; CHECK-NEXT: vmovx.f16 s28, s9 -; CHECK-NEXT: vmov r0, s21 -; CHECK-NEXT: vmov r2, s28 -; CHECK-NEXT: vmov.16 q7[0], r0 -; CHECK-NEXT: vmov.16 q7[1], r2 -; CHECK-NEXT: vmov r0, s22 -; CHECK-NEXT: vmov.16 q7[6], r0 -; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vmov.16 q7[7], r0 -; CHECK-NEXT: vmov.f32 s17, s25 -; CHECK-NEXT: vmov.f32 s29, s21 -; CHECK-NEXT: vmov.f32 s30, s10 -; CHECK-NEXT: vmovx.f16 s4, s29 -; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vldrw.u32 q1, [sp] @ 16-byte Reload -; CHECK-NEXT: vmov.f32 s18, s26 -; CHECK-NEXT: vrev32.16 q2, q1 -; CHECK-NEXT: vstrw.32 q4, [r1, #32] -; CHECK-NEXT: vmov r2, s9 -; CHECK-NEXT: vmovx.f16 s8, s10 -; CHECK-NEXT: vmov.16 q1[2], r2 -; CHECK-NEXT: vmov.16 q1[3], r0 -; CHECK-NEXT: vmov r0, s30 -; CHECK-NEXT: vmov.16 q1[4], r0 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmov.16 q1[5], r0 -; CHECK-NEXT: vmov.f32 s29, s5 -; CHECK-NEXT: vmov.f32 s30, s6 -; CHECK-NEXT: vstrw.32 q7, [r1, #16] -; CHECK-NEXT: add sp, #16 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: vmov.f32 s22, s26 +; CHECK-NEXT: vmov.f32 s6, s10 +; CHECK-NEXT: vstrw.32 q5, [r1, #32] +; CHECK-NEXT: vstrw.32 q1, [r1, #16] +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} ; CHECK-NEXT: bx lr entry: %s1 = getelementptr <8 x half>, <8 x half>* %src, i32 0 @@ -1634,200 +1566,141 @@ ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: .pad #144 ; CHECK-NEXT: sub sp, #144 -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vldrw.u32 q2, [r0, #32] -; CHECK-NEXT: vmov r3, s0 -; CHECK-NEXT: vmov q3, q0 -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: vmov.16 q1[0], r3 -; CHECK-NEXT: vmovx.f16 s0, s8 -; CHECK-NEXT: vmov.16 q1[1], r2 -; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: vldrw.u32 q3, [r0, #32] +; CHECK-NEXT: vldrw.u32 q2, [r0] ; CHECK-NEXT: vmovx.f16 s0, s12 -; CHECK-NEXT: vmov.16 q1[4], r2 -; CHECK-NEXT: vmov r2, s13 -; CHECK-NEXT: vmov.16 q1[6], r2 -; CHECK-NEXT: vmov r2, s9 -; CHECK-NEXT: vmov.16 q1[7], r2 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vldrw.u32 q0, [r0, #64] -; CHECK-NEXT: vmov.f32 s5, s12 -; CHECK-NEXT: vmov q5, q3 -; CHECK-NEXT: vstrw.32 q3, [sp, #32] @ 16-byte Spill -; CHECK-NEXT: vmov r3, s0 -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vdup.32 q0, r3 -; CHECK-NEXT: vstrw.32 q1, [sp, #128] @ 16-byte Spill -; CHECK-NEXT: vmov r3, s1 -; CHECK-NEXT: vmovx.f16 s0, s2 -; CHECK-NEXT: vmov.16 q3[2], r3 -; CHECK-NEXT: vstrw.32 q2, [sp, #80] @ 16-byte Spill -; CHECK-NEXT: vmov.16 q3[3], r2 -; CHECK-NEXT: vmov r2, s6 -; CHECK-NEXT: vmov.16 q3[4], r2 +; CHECK-NEXT: vmov q6, q2 ; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: vstrw.32 q2, [sp, #48] @ 16-byte Spill +; CHECK-NEXT: vmov.f64 d0, d4 +; CHECK-NEXT: vmovx.f16 s20, s14 +; CHECK-NEXT: vstrw.32 q3, [sp, #80] @ 16-byte Spill +; CHECK-NEXT: vins.f16 s0, s12 +; CHECK-NEXT: vmov.16 q0[4], r2 +; CHECK-NEXT: vmov.f32 s3, s9 +; CHECK-NEXT: vins.f16 s3, s13 +; CHECK-NEXT: vmov q1, q0 +; CHECK-NEXT: vmovx.f16 s0, s8 +; CHECK-NEXT: vmov.f32 s5, s8 +; CHECK-NEXT: vldrw.u32 q2, [r0, #64] +; CHECK-NEXT: vmov r2, s8 +; CHECK-NEXT: vdup.32 q4, r2 +; CHECK-NEXT: vmov r2, s15 +; CHECK-NEXT: vins.f16 s17, s0 ; CHECK-NEXT: vmovx.f16 s0, s18 -; CHECK-NEXT: vmov.16 q3[5], r2 -; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: vins.f16 s6, s0 ; CHECK-NEXT: vmovx.f16 s0, s10 -; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: vins.f16 s20, s0 ; CHECK-NEXT: vmovx.f16 s0, s11 -; CHECK-NEXT: vmov.16 q1[0], r3 -; CHECK-NEXT: vmov r3, s23 -; CHECK-NEXT: vmov.16 q1[1], r2 -; CHECK-NEXT: vmov r2, s11 -; CHECK-NEXT: vmov.16 q1[3], r2 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmovx.f16 s0, s19 -; CHECK-NEXT: vmov.16 q1[6], r2 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vldrw.u32 q5, [r0, #16] -; CHECK-NEXT: vmov.16 q1[7], r2 -; CHECK-NEXT: vstrw.32 q3, [sp, #112] @ 16-byte Spill -; CHECK-NEXT: vmov.f32 s6, s19 -; CHECK-NEXT: vmovx.f16 s0, s5 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vdup.32 q0, r3 -; CHECK-NEXT: vmov r3, s1 -; CHECK-NEXT: vmovx.f16 s0, s2 -; CHECK-NEXT: vmov.16 q2[2], r3 -; CHECK-NEXT: vmov r3, s20 -; CHECK-NEXT: vmov.16 q2[3], r2 -; CHECK-NEXT: vmov.16 q7[0], r3 -; CHECK-NEXT: vldrw.u32 q3, [r0, #80] +; CHECK-NEXT: vmov.16 q5[3], r2 +; CHECK-NEXT: vmov r2, s27 +; CHECK-NEXT: vmovx.f16 s23, s15 +; CHECK-NEXT: vmov.f32 s18, s6 +; CHECK-NEXT: vins.f16 s23, s0 +; CHECK-NEXT: vstrw.32 q1, [sp, #112] @ 16-byte Spill +; CHECK-NEXT: vmov.f32 s22, s11 +; CHECK-NEXT: vmovx.f16 s0, s21 +; CHECK-NEXT: vdup.32 q1, r2 +; CHECK-NEXT: vldrw.u32 q3, [r0, #48] +; CHECK-NEXT: vins.f16 s5, s0 +; CHECK-NEXT: vmovx.f16 s0, s6 +; CHECK-NEXT: vins.f16 s22, s0 +; CHECK-NEXT: vmovx.f16 s0, s12 +; CHECK-NEXT: vmov.f32 s6, s22 +; CHECK-NEXT: vldrw.u32 q6, [r0, #80] ; CHECK-NEXT: vstrw.32 q1, [sp, #96] @ 16-byte Spill -; CHECK-NEXT: vstrw.32 q4, [sp, #48] @ 16-byte Spill -; CHECK-NEXT: vmov r2, s6 -; CHECK-NEXT: vmov.16 q2[4], r2 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmov.16 q2[5], r2 -; CHECK-NEXT: vstrw.32 q2, [sp, #64] @ 16-byte Spill -; CHECK-NEXT: vldrw.u32 q2, [r0, #48] -; CHECK-NEXT: vmov r0, s12 -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: vmovx.f16 s0, s8 -; CHECK-NEXT: vmov.16 q7[1], r2 +; CHECK-NEXT: vldrw.u32 q1, [r0, #16] ; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: vstrw.32 q4, [sp, #128] @ 16-byte Spill +; CHECK-NEXT: vmov.f64 d14, d2 +; CHECK-NEXT: vstrw.32 q1, [sp] @ 16-byte Spill +; CHECK-NEXT: vmovx.f16 s0, s4 +; CHECK-NEXT: vins.f16 s28, s12 +; CHECK-NEXT: vmov r0, s24 ; CHECK-NEXT: vmov.16 q7[4], r2 -; CHECK-NEXT: vmov r2, s21 -; CHECK-NEXT: vmov.16 q7[6], r2 -; CHECK-NEXT: vmov r2, s9 -; CHECK-NEXT: vmovx.f16 s0, s20 -; CHECK-NEXT: vmov.16 q7[7], r2 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vdup.32 q0, r0 -; CHECK-NEXT: vmov r0, s1 -; CHECK-NEXT: vmovx.f16 s0, s2 -; CHECK-NEXT: vmov.16 q1[2], r0 -; CHECK-NEXT: vmov.f32 s29, s20 -; CHECK-NEXT: vmov.16 q1[3], r2 -; CHECK-NEXT: vmov r0, s30 -; CHECK-NEXT: vmov.16 q1[4], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q1[5], r0 -; CHECK-NEXT: vmovx.f16 s0, s14 -; CHECK-NEXT: vstrw.32 q1, [sp, #16] @ 16-byte Spill -; CHECK-NEXT: vmov q1, q2 -; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: vmov.f32 s8, s9 +; CHECK-NEXT: vmov.f32 s31, s5 +; CHECK-NEXT: vins.f16 s31, s13 +; CHECK-NEXT: vmov.f32 s29, s4 +; CHECK-NEXT: vdup.32 q1, r0 +; CHECK-NEXT: vins.f16 s5, s0 ; CHECK-NEXT: vmovx.f16 s0, s6 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmovx.f16 s0, s7 -; CHECK-NEXT: vmov.16 q2[0], r2 -; CHECK-NEXT: vmov r2, s23 -; CHECK-NEXT: vmov.16 q2[1], r0 -; CHECK-NEXT: vmov r0, s7 -; CHECK-NEXT: vmov.16 q2[3], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmovx.f16 s0, s15 -; CHECK-NEXT: vmov.16 q2[6], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q2[7], r0 -; CHECK-NEXT: vmov.f32 s10, s15 -; CHECK-NEXT: vmovx.f16 s0, s9 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vdup.32 q0, r2 -; CHECK-NEXT: vmov r2, s1 -; CHECK-NEXT: vmovx.f16 s0, s2 -; CHECK-NEXT: vmov.16 q6[2], r2 -; CHECK-NEXT: vmov.16 q6[3], r0 -; CHECK-NEXT: vmov r0, s10 -; CHECK-NEXT: vmov.16 q6[4], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q6[5], r0 -; CHECK-NEXT: vmov r0, s13 -; CHECK-NEXT: vmovx.f16 s0, s21 -; CHECK-NEXT: vmov.16 q4[0], r0 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmovx.f16 s0, s22 -; CHECK-NEXT: vmov.16 q4[1], r2 -; CHECK-NEXT: vmov r0, s14 -; CHECK-NEXT: vmov.16 q4[6], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q4[7], r0 -; CHECK-NEXT: vmov.f32 s9, s25 -; CHECK-NEXT: vmov.f32 s17, s13 -; CHECK-NEXT: vldrw.u32 q3, [sp, #32] @ 16-byte Reload -; CHECK-NEXT: vmov.f32 s18, s22 -; CHECK-NEXT: vmovx.f16 s0, s17 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vrev32.16 q0, q1 -; CHECK-NEXT: vmov r2, s1 -; CHECK-NEXT: vmovx.f16 s0, s2 -; CHECK-NEXT: vmov.16 q1[2], r2 -; CHECK-NEXT: vmov.f32 s10, s26 +; CHECK-NEXT: vins.f16 s30, s0 +; CHECK-NEXT: vmov q0, q3 +; CHECK-NEXT: vmov.f32 s6, s30 +; CHECK-NEXT: vmovx.f16 s12, s26 +; CHECK-NEXT: vstrw.32 q1, [sp, #64] @ 16-byte Spill +; CHECK-NEXT: vmovx.f16 s4, s2 +; CHECK-NEXT: vins.f16 s4, s12 +; CHECK-NEXT: vmov r0, s3 ; CHECK-NEXT: vmov.16 q1[3], r0 -; CHECK-NEXT: vldrw.u32 q6, [sp, #112] @ 16-byte Reload -; CHECK-NEXT: vstrw.32 q2, [r1, #80] -; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: vmov.16 q1[4], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q1[5], r0 -; CHECK-NEXT: vmovx.f16 s0, s13 -; CHECK-NEXT: vstrw.32 q1, [sp] @ 16-byte Spill -; CHECK-NEXT: vldrw.u32 q1, [sp, #48] @ 16-byte Reload -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmovx.f16 s0, s14 -; CHECK-NEXT: vmov r0, s5 -; CHECK-NEXT: vmov.16 q5[0], r0 -; CHECK-NEXT: vmov r0, s6 -; CHECK-NEXT: vmov.16 q5[1], r2 -; CHECK-NEXT: vmov.16 q5[6], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q5[7], r0 -; CHECK-NEXT: vmov.f32 s21, s5 -; CHECK-NEXT: vldrw.u32 q1, [sp, #64] @ 16-byte Reload -; CHECK-NEXT: vmov.f32 s22, s14 -; CHECK-NEXT: vmovx.f16 s0, s21 -; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill +; CHECK-NEXT: vmovx.f16 s7, s3 +; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload +; CHECK-NEXT: vmovx.f16 s12, s27 +; CHECK-NEXT: vins.f16 s7, s12 +; CHECK-NEXT: vmov r0, s3 +; CHECK-NEXT: vmov.f32 s6, s27 +; CHECK-NEXT: vmovx.f16 s16, s5 +; CHECK-NEXT: vdup.32 q3, r0 +; CHECK-NEXT: vmov.f32 s24, s25 +; CHECK-NEXT: vins.f16 s13, s16 +; CHECK-NEXT: vmovx.f16 s16, s14 +; CHECK-NEXT: vins.f16 s6, s16 +; CHECK-NEXT: vmovx.f16 s16, s1 +; CHECK-NEXT: vins.f16 s24, s16 +; CHECK-NEXT: vmovx.f16 s16, s2 +; CHECK-NEXT: vins.f16 s26, s16 +; CHECK-NEXT: vmov.f32 s14, s6 +; CHECK-NEXT: vmov.f32 s27, s26 +; CHECK-NEXT: vstrw.32 q3, [sp, #32] @ 16-byte Spill +; CHECK-NEXT: vmov.f32 s26, s2 +; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload +; CHECK-NEXT: vmovx.f16 s16, s25 +; CHECK-NEXT: vldrw.u32 q3, [sp, #48] @ 16-byte Reload +; CHECK-NEXT: vrev32.16 q0, q0 +; CHECK-NEXT: vins.f16 s1, s16 +; CHECK-NEXT: vmovx.f16 s16, s2 +; CHECK-NEXT: vins.f16 s26, s16 +; CHECK-NEXT: vmovx.f16 s16, s13 +; CHECK-NEXT: vmov.f32 s2, s26 +; CHECK-NEXT: vins.f16 s8, s16 +; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill ; CHECK-NEXT: vldrw.u32 q0, [sp, #80] @ 16-byte Reload -; CHECK-NEXT: vrev32.16 q3, q0 -; CHECK-NEXT: vmov r2, s13 -; CHECK-NEXT: vmovx.f16 s12, s14 -; CHECK-NEXT: vmov.16 q0[2], r2 -; CHECK-NEXT: vmov.16 q0[3], r0 -; CHECK-NEXT: vmov r0, s22 -; CHECK-NEXT: vmov.16 q0[4], r0 -; CHECK-NEXT: vmov r0, s12 -; CHECK-NEXT: vldrw.u32 q3, [sp, #128] @ 16-byte Reload -; CHECK-NEXT: vmov.16 q0[5], r0 -; CHECK-NEXT: vmov.f32 s13, s25 -; CHECK-NEXT: vmov.f32 s14, s26 -; CHECK-NEXT: vldrw.u32 q6, [sp, #96] @ 16-byte Reload -; CHECK-NEXT: vmov.f32 s25, s5 +; CHECK-NEXT: vmovx.f16 s16, s14 +; CHECK-NEXT: vins.f16 s10, s16 +; CHECK-NEXT: vrev32.16 q4, q0 +; CHECK-NEXT: vldrw.u32 q0, [sp, #32] @ 16-byte Reload +; CHECK-NEXT: vmov.f32 s11, s10 +; CHECK-NEXT: vmov.f32 s10, s14 +; CHECK-NEXT: vmovx.f16 s12, s9 +; CHECK-NEXT: vmov.f32 s5, s1 +; CHECK-NEXT: vins.f16 s17, s12 +; CHECK-NEXT: vmov.f32 s6, s2 +; CHECK-NEXT: vmovx.f16 s12, s18 +; CHECK-NEXT: vldrw.u32 q0, [sp, #128] @ 16-byte Reload +; CHECK-NEXT: vins.f16 s10, s12 +; CHECK-NEXT: vldrw.u32 q3, [sp, #112] @ 16-byte Reload +; CHECK-NEXT: vmov.f32 s18, s10 +; CHECK-NEXT: vmov.f32 s13, s1 +; CHECK-NEXT: vstrw.32 q1, [r1, #80] +; CHECK-NEXT: vmov.f32 s14, s2 +; CHECK-NEXT: vldrw.u32 q0, [sp, #96] @ 16-byte Reload +; CHECK-NEXT: vmov.f32 s9, s17 ; CHECK-NEXT: vstrw.32 q3, [r1] ; CHECK-NEXT: vmov.f32 s21, s1 -; CHECK-NEXT: vmov.f32 s26, s6 -; CHECK-NEXT: vldrw.u32 q1, [sp, #16] @ 16-byte Reload ; CHECK-NEXT: vmov.f32 s22, s2 -; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload -; CHECK-NEXT: vmov.f32 s29, s5 -; CHECK-NEXT: vstrw.32 q6, [r1, #32] -; CHECK-NEXT: vmov.f32 s17, s1 -; CHECK-NEXT: vstrw.32 q5, [r1, #16] -; CHECK-NEXT: vmov.f32 s30, s6 -; CHECK-NEXT: vmov.f32 s18, s2 +; CHECK-NEXT: vldrw.u32 q0, [sp, #64] @ 16-byte Reload +; CHECK-NEXT: vstrw.32 q5, [r1, #32] +; CHECK-NEXT: vmov.f32 s29, s1 +; CHECK-NEXT: vmov.f32 s30, s2 +; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload +; CHECK-NEXT: vmov.f32 s10, s18 ; CHECK-NEXT: vstrw.32 q7, [r1, #48] -; CHECK-NEXT: vstrw.32 q4, [r1, #64] +; CHECK-NEXT: vmov.f32 s25, s1 +; CHECK-NEXT: vstrw.32 q2, [r1, #16] +; CHECK-NEXT: vmov.f32 s26, s2 +; CHECK-NEXT: vstrw.32 q6, [r1, #64] ; CHECK-NEXT: add sp, #144 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: bx lr Index: llvm/test/CodeGen/Thumb2/mve-vst4.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vst4.ll +++ llvm/test/CodeGen/Thumb2/mve-vst4.ll @@ -422,76 +422,64 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: vldrw.u32 q3, [r0, #32] -; CHECK-NEXT: vldrw.u32 q2, [r0] -; CHECK-NEXT: vldrw.u32 q1, [r0, #16] -; CHECK-NEXT: vmov.u16 r0, q3[2] -; CHECK-NEXT: vmov.u16 r2, q2[2] -; CHECK-NEXT: vmov.16 q4[2], r0 -; CHECK-NEXT: vmov.16 q0[0], r2 -; CHECK-NEXT: vmov.u16 r2, q1[2] -; CHECK-NEXT: vmov.16 q4[3], r0 -; CHECK-NEXT: vmov.u16 r0, q3[3] -; CHECK-NEXT: vmov.16 q0[1], r2 -; CHECK-NEXT: vmov.16 q4[6], r0 -; CHECK-NEXT: vmov.u16 r2, q2[3] -; CHECK-NEXT: vmov.16 q4[7], r0 -; CHECK-NEXT: vmov.16 q0[4], r2 +; CHECK-NEXT: vldrw.u32 q4, [r0] +; CHECK-NEXT: vldrw.u32 q1, [r0, #32] +; CHECK-NEXT: vldrw.u32 q2, [r0, #16] +; CHECK-NEXT: vmov.f64 d0, d8 +; CHECK-NEXT: vmov.u16 r0, q4[1] +; CHECK-NEXT: vmov.f32 s21, s5 +; CHECK-NEXT: vins.f16 s0, s8 +; CHECK-NEXT: vmov.16 q0[4], r0 +; CHECK-NEXT: vins.f16 s21, s21 ; CHECK-NEXT: vmov.u16 r0, q1[3] -; CHECK-NEXT: vmov.16 q0[5], r0 -; CHECK-NEXT: vmov.u16 r0, q2[0] -; CHECK-NEXT: vmov.f32 s1, s17 -; CHECK-NEXT: vmov.f32 s3, s19 -; CHECK-NEXT: vmov.16 q4[0], r0 -; CHECK-NEXT: vmov.u16 r0, q1[0] -; CHECK-NEXT: vstrb.8 q0, [r1, #16] -; CHECK-NEXT: vmov.16 q4[1], r0 -; CHECK-NEXT: vmov.u16 r0, q2[1] -; CHECK-NEXT: vmov.16 q4[4], r0 -; CHECK-NEXT: vmov.u16 r0, q3[0] -; CHECK-NEXT: vmov.16 q5[2], r0 -; CHECK-NEXT: vmov.16 q5[3], r0 -; CHECK-NEXT: vmov.u16 r0, q3[1] +; CHECK-NEXT: vmov.f32 s12, s17 ; CHECK-NEXT: vmov.16 q5[6], r0 +; CHECK-NEXT: vins.f16 s12, s9 ; CHECK-NEXT: vmov.16 q5[7], r0 +; CHECK-NEXT: vmov.u16 r0, q4[3] +; CHECK-NEXT: vmov.f32 s25, s4 +; CHECK-NEXT: vmov.16 q3[4], r0 +; CHECK-NEXT: vmov.u16 r0, q2[3] +; CHECK-NEXT: vins.f16 s25, s25 +; CHECK-NEXT: vmov.16 q3[5], r0 ; CHECK-NEXT: vmov.u16 r0, q1[1] -; CHECK-NEXT: vmov.16 q4[5], r0 -; CHECK-NEXT: vmov.u16 r0, q2[6] -; CHECK-NEXT: vmov.f32 s17, s21 -; CHECK-NEXT: vmov.f32 s19, s23 -; CHECK-NEXT: vmov.16 q5[0], r0 -; CHECK-NEXT: vmov.u16 r0, q1[6] -; CHECK-NEXT: vstrb.8 q4, [r1] -; CHECK-NEXT: vmov.16 q5[1], r0 -; CHECK-NEXT: vmov.u16 r0, q2[7] -; CHECK-NEXT: vmov.16 q5[4], r0 -; CHECK-NEXT: vmov.u16 r0, q3[6] -; CHECK-NEXT: vmov.16 q6[2], r0 -; CHECK-NEXT: vmov.16 q6[3], r0 -; CHECK-NEXT: vmov.u16 r0, q3[7] ; CHECK-NEXT: vmov.16 q6[6], r0 +; CHECK-NEXT: vmov.f32 s13, s21 ; CHECK-NEXT: vmov.16 q6[7], r0 +; CHECK-NEXT: vmov.u16 r0, q2[1] +; CHECK-NEXT: vmov.16 q0[5], r0 +; CHECK-NEXT: vmov.f32 s15, s23 +; CHECK-NEXT: vmov.f32 s20, s19 +; CHECK-NEXT: vmov.u16 r0, q4[7] +; CHECK-NEXT: vmov.f32 s1, s25 +; CHECK-NEXT: vins.f16 s20, s11 +; CHECK-NEXT: vmov.f32 s3, s27 +; CHECK-NEXT: vmov.16 q5[4], r0 +; CHECK-NEXT: vmov.f32 s25, s7 ; CHECK-NEXT: vmov.u16 r0, q1[7] +; CHECK-NEXT: vins.f16 s25, s25 +; CHECK-NEXT: vstrb.8 q3, [r1, #16] +; CHECK-NEXT: vmov.16 q6[6], r0 +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: vmov.16 q6[7], r0 +; CHECK-NEXT: vmov.u16 r0, q2[7] ; CHECK-NEXT: vmov.16 q5[5], r0 -; CHECK-NEXT: vmov.u16 r0, q2[4] +; CHECK-NEXT: vmov.u16 r0, q4[5] ; CHECK-NEXT: vmov.f32 s21, s25 ; CHECK-NEXT: vmov.f32 s23, s27 -; CHECK-NEXT: vmov.16 q6[0], r0 -; CHECK-NEXT: vmov.u16 r0, q1[4] +; CHECK-NEXT: vmov.f64 d12, d9 ; CHECK-NEXT: vstrb.8 q5, [r1, #48] -; CHECK-NEXT: vmov.16 q6[1], r0 -; CHECK-NEXT: vmov.u16 r0, q2[5] +; CHECK-NEXT: vmov.f32 s17, s6 +; CHECK-NEXT: vins.f16 s24, s10 ; CHECK-NEXT: vmov.16 q6[4], r0 -; CHECK-NEXT: vmov.u16 r0, q3[4] -; CHECK-NEXT: vmov.16 q2[2], r0 -; CHECK-NEXT: vmov.16 q2[3], r0 -; CHECK-NEXT: vmov.u16 r0, q3[5] -; CHECK-NEXT: vmov.16 q2[6], r0 -; CHECK-NEXT: vmov.16 q2[7], r0 +; CHECK-NEXT: vins.f16 s17, s17 ; CHECK-NEXT: vmov.u16 r0, q1[5] +; CHECK-NEXT: vmov.16 q4[6], r0 +; CHECK-NEXT: vmov.16 q4[7], r0 +; CHECK-NEXT: vmov.u16 r0, q2[5] ; CHECK-NEXT: vmov.16 q6[5], r0 -; CHECK-NEXT: vmov.f32 s25, s9 -; CHECK-NEXT: vmov.f32 s27, s11 +; CHECK-NEXT: vmov.f32 s25, s17 +; CHECK-NEXT: vmov.f32 s27, s19 ; CHECK-NEXT: vstrb.8 q6, [r1, #32] ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: bx lr @@ -1057,28 +1045,18 @@ ; CHECK-LABEL: vst4_v2f16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldmia r0, {s4, s5} -; CHECK-NEXT: vmov r2, s5 +; CHECK-NEXT: vmovx.f16 s12, s5 ; CHECK-NEXT: ldr r0, [r0, #8] -; CHECK-NEXT: vmov r3, s4 -; CHECK-NEXT: vmovx.f16 s12, s4 -; CHECK-NEXT: vmov.16 q0[0], r3 +; CHECK-NEXT: vmov.f64 d0, d2 ; CHECK-NEXT: vdup.32 q2, r0 -; CHECK-NEXT: vmov.16 q0[1], r2 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmov.16 q0[2], r0 -; CHECK-NEXT: vmov r0, s9 -; CHECK-NEXT: vmov.16 q0[3], r0 -; CHECK-NEXT: vmov r0, s12 -; CHECK-NEXT: vmovx.f16 s4, s5 -; CHECK-NEXT: vmov.16 q0[4], r0 -; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vmovx.f16 s4, s8 -; CHECK-NEXT: vmov.16 q0[5], r0 -; CHECK-NEXT: vmov r0, s4 +; CHECK-NEXT: vins.f16 s0, s5 +; CHECK-NEXT: vmov.f32 s1, s8 +; CHECK-NEXT: vins.f16 s1, s9 +; CHECK-NEXT: vmovx.f16 s2, s4 ; CHECK-NEXT: vmovx.f16 s4, s9 -; CHECK-NEXT: vmov.16 q0[6], r0 -; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vmov.16 q0[7], r0 +; CHECK-NEXT: vins.f16 s2, s12 +; CHECK-NEXT: vmovx.f16 s3, s8 +; CHECK-NEXT: vins.f16 s3, s4 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1105,52 +1083,32 @@ ; CHECK-NEXT: ldrd lr, r12, [r0] ; CHECK-NEXT: ldrd r3, r2, [r0, #8] ; CHECK-NEXT: ldrd r4, r0, [r0, #16] -; CHECK-NEXT: vmov q1[2], q1[0], lr, r3 -; CHECK-NEXT: vmov q1[3], q1[1], r12, r2 -; CHECK-NEXT: vmov q0[2], q0[0], r4, r4 -; CHECK-NEXT: vmov r3, s5 -; CHECK-NEXT: vmov q0[3], q0[1], r0, r0 -; CHECK-NEXT: vmov r2, s7 -; CHECK-NEXT: vmov.16 q2[0], r3 -; CHECK-NEXT: vmov.16 q2[1], r2 -; CHECK-NEXT: vmov r0, s1 -; CHECK-NEXT: vmov.16 q2[2], r0 -; CHECK-NEXT: vmov r0, s3 -; CHECK-NEXT: vmovx.f16 s12, s5 -; CHECK-NEXT: vmov.16 q2[3], r0 -; CHECK-NEXT: vmov r0, s12 -; CHECK-NEXT: vmovx.f16 s12, s7 -; CHECK-NEXT: vmov.16 q2[4], r0 -; CHECK-NEXT: vmov r0, s12 -; CHECK-NEXT: vmovx.f16 s12, s1 -; CHECK-NEXT: vmov.16 q2[5], r0 -; CHECK-NEXT: vmov r0, s12 +; CHECK-NEXT: vmov q0[2], q0[0], lr, r3 +; CHECK-NEXT: vmov q0[3], q0[1], r12, r2 +; CHECK-NEXT: vmov q1[2], q1[0], r4, r4 +; CHECK-NEXT: vmov.f32 s8, s1 +; CHECK-NEXT: vmov q1[3], q1[1], r0, r0 +; CHECK-NEXT: vins.f16 s8, s3 ; CHECK-NEXT: vmovx.f16 s12, s3 -; CHECK-NEXT: vmov.16 q2[6], r0 -; CHECK-NEXT: vmov r0, s12 -; CHECK-NEXT: vmov.16 q2[7], r0 -; CHECK-NEXT: vmov r2, s4 +; CHECK-NEXT: vmov.f32 s9, s5 +; CHECK-NEXT: vins.f16 s9, s7 +; CHECK-NEXT: vmovx.f16 s10, s1 +; CHECK-NEXT: vins.f16 s10, s12 +; CHECK-NEXT: vmovx.f16 s12, s7 +; CHECK-NEXT: vmovx.f16 s11, s5 +; CHECK-NEXT: vins.f16 s11, s12 ; CHECK-NEXT: vstrh.16 q2, [r1, #16] -; CHECK-NEXT: vmov r0, s6 -; CHECK-NEXT: vmov.16 q2[0], r2 -; CHECK-NEXT: vmovx.f16 s12, s4 -; CHECK-NEXT: vmov.16 q2[1], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q2[2], r0 -; CHECK-NEXT: vmov r0, s2 -; CHECK-NEXT: vmov.16 q2[3], r0 -; CHECK-NEXT: vmov r0, s12 +; CHECK-NEXT: vmovx.f16 s8, s0 +; CHECK-NEXT: vins.f16 s0, s2 +; CHECK-NEXT: vmovx.f16 s3, s4 +; CHECK-NEXT: vins.f16 s4, s6 +; CHECK-NEXT: vmov.f32 s1, s4 +; CHECK-NEXT: vmovx.f16 s10, s2 +; CHECK-NEXT: vins.f16 s8, s10 ; CHECK-NEXT: vmovx.f16 s4, s6 -; CHECK-NEXT: vmov.16 q2[4], r0 -; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vmovx.f16 s4, s0 -; CHECK-NEXT: vmov.16 q2[5], r0 -; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vmovx.f16 s0, s2 -; CHECK-NEXT: vmov.16 q2[6], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q2[7], r0 -; CHECK-NEXT: vstrh.16 q2, [r1] +; CHECK-NEXT: vmov.f32 s2, s8 +; CHECK-NEXT: vins.f16 s3, s4 +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: pop {r4, pc} entry: %s1 = getelementptr <4 x half>, <4 x half>* %src, i32 0 @@ -1238,101 +1196,56 @@ define void @vst4_v8f16_align1(<8 x half> *%src, <32 x half> *%dst) { ; CHECK-LABEL: vst4_v8f16_align1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: .pad #32 -; CHECK-NEXT: sub sp, #32 -; CHECK-NEXT: vldrw.u32 q7, [r0] -; CHECK-NEXT: vldrw.u32 q0, [r0, #16] -; CHECK-NEXT: vldrw.u32 q5, [r0, #32] -; CHECK-NEXT: vmov r3, s30 -; CHECK-NEXT: vmov q2, q0 -; CHECK-NEXT: vmov r2, s2 -; CHECK-NEXT: vmov.16 q1[0], r3 -; CHECK-NEXT: vmovx.f16 s0, s30 -; CHECK-NEXT: vmov.16 q1[1], r2 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmovx.f16 s0, s21 -; CHECK-NEXT: vmov.16 q1[4], r2 -; CHECK-NEXT: vmov r0, s21 -; CHECK-NEXT: vstrw.32 q1, [sp] @ 16-byte Spill -; CHECK-NEXT: vmov.16 q1[2], r0 -; CHECK-NEXT: vmov.16 q1[3], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q1[6], r0 -; CHECK-NEXT: vmov r2, s29 -; CHECK-NEXT: vmov.16 q1[7], r0 -; CHECK-NEXT: vmov.16 q3[0], r2 -; CHECK-NEXT: vstrw.32 q1, [sp, #16] @ 16-byte Spill -; CHECK-NEXT: vmov q1, q2 -; CHECK-NEXT: vmov r0, s5 -; CHECK-NEXT: vmovx.f16 s0, s29 -; CHECK-NEXT: vmov.16 q3[1], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmovx.f16 s0, s5 -; CHECK-NEXT: vmov.16 q3[4], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmovx.f16 s0, s20 -; CHECK-NEXT: vmov.16 q3[5], r0 -; CHECK-NEXT: vmov r0, s20 -; CHECK-NEXT: vmov.16 q4[2], r0 -; CHECK-NEXT: vmov r2, s4 -; CHECK-NEXT: vmov.16 q4[3], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q4[6], r0 -; CHECK-NEXT: vmovx.f16 s0, s28 -; CHECK-NEXT: vmov.16 q4[7], r0 -; CHECK-NEXT: vmov r0, s28 -; CHECK-NEXT: vmov.16 q6[0], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q6[1], r2 -; CHECK-NEXT: vmovx.f16 s0, s4 -; CHECK-NEXT: vmov.16 q6[4], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q6[5], r0 -; CHECK-NEXT: vmov r0, s23 -; CHECK-NEXT: vmov.16 q2[2], r0 -; CHECK-NEXT: vmovx.f16 s0, s23 -; CHECK-NEXT: vmov.16 q2[3], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q2[6], r0 -; CHECK-NEXT: vmov r2, s31 -; CHECK-NEXT: vmov.16 q2[7], r0 -; CHECK-NEXT: vmov r0, s7 -; CHECK-NEXT: vmov.16 q0[0], r2 -; CHECK-NEXT: vmovx.f16 s28, s31 -; CHECK-NEXT: vmov.16 q0[1], r0 -; CHECK-NEXT: vmov r0, s28 -; CHECK-NEXT: vmovx.f16 s28, s7 -; CHECK-NEXT: vmov.16 q0[4], r0 -; CHECK-NEXT: vmov r0, s28 -; CHECK-NEXT: vmovx.f16 s20, s22 -; CHECK-NEXT: vmov.16 q0[5], r0 -; CHECK-NEXT: vmov r0, s22 -; CHECK-NEXT: vmov.16 q7[2], r0 -; CHECK-NEXT: vmov.f32 s25, s17 -; CHECK-NEXT: vmov.16 q7[3], r0 -; CHECK-NEXT: vmov r0, s20 -; CHECK-NEXT: vmov.16 q7[6], r0 -; CHECK-NEXT: vmovx.f16 s20, s6 -; CHECK-NEXT: vldrw.u32 q1, [sp] @ 16-byte Reload -; CHECK-NEXT: vmov.16 q7[7], r0 -; CHECK-NEXT: vmov r0, s20 -; CHECK-NEXT: vldrw.u32 q5, [sp, #16] @ 16-byte Reload -; CHECK-NEXT: vmov.16 q1[5], r0 -; CHECK-NEXT: vmov.f32 s1, s9 -; CHECK-NEXT: vmov.f32 s13, s21 -; CHECK-NEXT: vmov.f32 s5, s29 -; CHECK-NEXT: vmov.f32 s15, s23 -; CHECK-NEXT: vmov.f32 s27, s19 -; CHECK-NEXT: vstrb.8 q3, [r1, #16] -; CHECK-NEXT: vmov.f32 s3, s11 -; CHECK-NEXT: vstrb.8 q6, [r1] -; CHECK-NEXT: vmov.f32 s7, s31 -; CHECK-NEXT: vstrb.8 q0, [r1, #48] -; CHECK-NEXT: vstrb.8 q1, [r1, #32] -; CHECK-NEXT: add sp, #32 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12} +; CHECK-NEXT: vldrw.u32 q4, [r0, #16] +; CHECK-NEXT: vldrw.u32 q5, [r0] +; CHECK-NEXT: vmovx.f16 s2, s21 +; CHECK-NEXT: vins.f16 s21, s17 +; CHECK-NEXT: vmov.f32 s0, s21 +; CHECK-NEXT: vmovx.f16 s4, s17 +; CHECK-NEXT: vins.f16 s2, s4 +; CHECK-NEXT: vmovx.f16 s8, s20 +; CHECK-NEXT: vmovx.f16 s4, s16 +; CHECK-NEXT: vins.f16 s20, s16 +; CHECK-NEXT: vins.f16 s8, s4 +; CHECK-NEXT: vmov q1, q5 +; CHECK-NEXT: vmov.f32 s6, s8 +; CHECK-NEXT: vmovx.f16 s10, s23 +; CHECK-NEXT: vins.f16 s23, s19 +; CHECK-NEXT: vmovx.f16 s12, s19 +; CHECK-NEXT: vmov.f32 s8, s23 +; CHECK-NEXT: vmovx.f16 s16, s18 +; CHECK-NEXT: vins.f16 s10, s12 +; CHECK-NEXT: vmovx.f16 s14, s22 +; CHECK-NEXT: vins.f16 s22, s18 +; CHECK-NEXT: vmov.f32 s12, s22 +; CHECK-NEXT: vins.f16 s14, s16 +; CHECK-NEXT: vldrw.u32 q4, [r0, #32] +; CHECK-NEXT: vmov.f32 s13, s18 +; CHECK-NEXT: vmovx.f16 s24, s17 +; CHECK-NEXT: vins.f16 s17, s17 +; CHECK-NEXT: vins.f16 s24, s24 +; CHECK-NEXT: vmov q5, q4 +; CHECK-NEXT: vmovx.f16 s7, s16 +; CHECK-NEXT: vmov.f32 s23, s24 +; CHECK-NEXT: vins.f16 s16, s16 +; CHECK-NEXT: vmov.f32 s5, s16 +; CHECK-NEXT: vmovx.f16 s11, s19 +; CHECK-NEXT: vins.f16 s19, s19 +; CHECK-NEXT: vins.f16 s13, s13 +; CHECK-NEXT: vmov.f32 s9, s19 +; CHECK-NEXT: vmovx.f16 s15, s18 +; CHECK-NEXT: vmov.f32 s1, s21 +; CHECK-NEXT: vins.f16 s7, s7 +; CHECK-NEXT: vins.f16 s11, s11 +; CHECK-NEXT: vins.f16 s15, s15 +; CHECK-NEXT: vstrb.8 q2, [r1, #48] +; CHECK-NEXT: vstrb.8 q3, [r1, #32] +; CHECK-NEXT: vmov.f32 s3, s23 +; CHECK-NEXT: vstrb.8 q1, [r1] +; CHECK-NEXT: vstrb.8 q0, [r1, #16] +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12} ; CHECK-NEXT: bx lr entry: %s1 = getelementptr <8 x half>, <8 x half>* %src, i32 0