diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -4017,6 +4017,7 @@ def : MVE_unpred_vector_store_typed; def : MVE_unpred_vector_store_typed; def : MVE_unpred_vector_store_typed; + def : MVE_unpred_vector_store_typed; } class MVE_unpred_vector_load_typed; def : MVE_unpred_vector_load_typed; def : MVE_unpred_vector_load_typed; + def : MVE_unpred_vector_load_typed; } let Predicates = [HasMVEInt, IsLE] in { diff --git a/llvm/test/CodeGen/Thumb2/mve-basic.ll b/llvm/test/CodeGen/Thumb2/mve-basic.ll --- a/llvm/test/CodeGen/Thumb2/mve-basic.ll +++ b/llvm/test/CodeGen/Thumb2/mve-basic.ll @@ -29,3 +29,27 @@ store <4 x i32> %result, <4 x i32>* %resultp, align 16 ret void } + +define arm_aapcs_vfpcc <16 x i8> @stack_slot_handling(<16 x i8> %a) #0 { +; CHECK-LABEL: stack_slot_handling: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: push {r4, r6, r7, lr} +; CHECK-NEXT: add r7, sp, #8 +; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: mov r4, sp +; CHECK-NEXT: bfc r4, #0, #4 +; CHECK-NEXT: mov sp, r4 +; CHECK-NEXT: mov r0, sp +; CHECK-NEXT: vstrw.32 q0, [r0] +; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: sub.w r4, r7, #8 +; CHECK-NEXT: mov sp, r4 +; CHECK-NEXT: pop {r4, r6, r7, pc} +entry: + %a.addr = alloca <16 x i8>, align 8 + store <16 x i8> %a, <16 x i8>* %a.addr, align 8 + %0 = load <16 x i8>, <16 x i8>* %a.addr, align 8 + ret <16 x i8> %0 +} + +attributes #0 = { noinline optnone }