Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -396,6 +396,8 @@ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); } + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); + // We can do bitwise operations on v2i64 vectors setOperationAction(ISD::AND, MVT::v2i64, Legal); setOperationAction(ISD::OR, MVT::v2i64, Legal); Index: llvm/lib/Target/ARM/ARMInstrMVE.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrMVE.td +++ llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1901,6 +1901,8 @@ (VMOVH (EXTRACT_SUBREG MQPR:$src, (SSubReg_f16_reg imm_odd:$lane))), HPR)>; + def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; def : Pat<(v4f32 (scalar_to_vector SPR:$src)), (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; def : Pat<(v4f32 (scalar_to_vector GPR:$src)), Index: llvm/test/CodeGen/Thumb2/mve-extractstore.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-extractstore.ll +++ llvm/test/CodeGen/Thumb2/mve-extractstore.ll @@ -4,28 +4,15 @@ define half @extret_f16_sf(<8 x half> %a, <8 x half> %b, half* nocapture %p) { ; CHECK-LABEL: extret_f16_sf: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r6, r7, lr} -; CHECK-NEXT: push {r4, r6, r7, lr} -; CHECK-NEXT: .setfp r7, sp, #8 -; CHECK-NEXT: add r7, sp, #8 -; CHECK-NEXT: .pad #16 -; CHECK-NEXT: sub sp, #16 -; CHECK-NEXT: mov r4, sp -; CHECK-NEXT: bfc r4, #0, #4 -; CHECK-NEXT: mov sp, r4 -; CHECK-NEXT: strd r0, r1, [sp] -; CHECK-NEXT: add.w r0, r7, #8 -; CHECK-NEXT: mov r1, sp -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vldrw.u32 q1, [r1] -; CHECK-NEXT: sub.w r4, r7, #8 -; CHECK-NEXT: ldr r0, [r7, #24] -; CHECK-NEXT: vadd.f16 q0, q1, q0 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: mov r0, sp +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: ldr r0, [sp, #16] +; CHECK-NEXT: vadd.f16 q0, q0, q1 ; CHECK-NEXT: vmovx.f16 s0, s0 ; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: mov sp, r4 -; CHECK-NEXT: pop {r4, r6, r7, pc} +; CHECK-NEXT: bx lr %c = fadd <8 x half> %a, %b %e = extractelement <8 x half> %c, i32 1 store half %e, half* %p, align 2 @@ -48,27 +35,14 @@ define float @extret_f32_sf(<4 x float> %a, <4 x float> %b, float* nocapture %p) { ; CHECK-LABEL: extret_f32_sf: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r6, r7, lr} -; CHECK-NEXT: push {r4, r6, r7, lr} -; CHECK-NEXT: .setfp r7, sp, #8 -; CHECK-NEXT: add r7, sp, #8 -; CHECK-NEXT: .pad #16 -; CHECK-NEXT: sub sp, #16 -; CHECK-NEXT: mov r4, sp -; CHECK-NEXT: bfc r4, #0, #4 -; CHECK-NEXT: mov sp, r4 -; CHECK-NEXT: strd r0, r1, [sp] -; CHECK-NEXT: add.w r0, r7, #8 -; CHECK-NEXT: mov r1, sp -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vldrw.u32 q1, [r1] -; CHECK-NEXT: ldr r1, [r7, #24] -; CHECK-NEXT: sub.w r4, r7, #8 -; CHECK-NEXT: vadd.f32 q0, q1, q0 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: mov r0, sp +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: ldr r1, [sp, #16] +; CHECK-NEXT: vadd.f32 q0, q0, q1 ; CHECK-NEXT: vmov r0, s1 ; CHECK-NEXT: str r0, [r1] -; CHECK-NEXT: mov sp, r4 -; CHECK-NEXT: pop {r4, r6, r7, pc} +; CHECK-NEXT: bx lr %c = fadd <4 x float> %a, %b %e = extractelement <4 x float> %c, i32 1 store float %e, float* %p, align 2 Index: llvm/test/CodeGen/Thumb2/mve-shuffle.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-shuffle.ll +++ llvm/test/CodeGen/Thumb2/mve-shuffle.ll @@ -1670,21 +1670,8 @@ define arm_aapcs_vfpcc <2 x double> @insert_f64(double %a) { ; CHECK-LABEL: insert_f64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r6, r7, lr} -; CHECK-NEXT: push {r4, r6, r7, lr} -; CHECK-NEXT: .setfp r7, sp, #8 -; CHECK-NEXT: add r7, sp, #8 -; CHECK-NEXT: .pad #16 -; CHECK-NEXT: sub sp, #16 -; CHECK-NEXT: mov r4, sp -; CHECK-NEXT: bfc r4, #0, #4 -; CHECK-NEXT: mov sp, r4 -; CHECK-NEXT: sub.w r4, r7, #8 -; CHECK-NEXT: vstr d0, [sp] -; CHECK-NEXT: mov r0, sp -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: mov sp, r4 -; CHECK-NEXT: pop {r4, r6, r7, pc} +; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: bx lr entry: %res = insertelement <2 x double> undef, double %a, i32 0 ret <2 x double> %res