diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -1132,11 +1132,15 @@ [llvm_i32_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], llvm_anyvector_ty>; -def int_arm_mve_vld2q: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_anyptr_ty], [IntrReadMem, IntrArgMemOnly]>; -def int_arm_mve_vld4q: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_anyptr_ty], [IntrReadMem, IntrArgMemOnly]>; - -def int_arm_mve_vst2q: Intrinsic<[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty], [IntrWriteMem, IntrArgMemOnly]>; -def int_arm_mve_vst4q: Intrinsic<[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, LLVMMatchType<1>, llvm_i32_ty], [IntrWriteMem, IntrArgMemOnly]>; +def int_arm_mve_vld2q: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_anyptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_arm_mve_vld4q: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_anyptr_ty], + [IntrReadMem, IntrArgMemOnly]>; + +def int_arm_mve_vst2q: Intrinsic<[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty], + [IntrWriteMem, IntrArgMemOnly], "", [SDNPMemOperand]>; +def int_arm_mve_vst4q: Intrinsic<[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, LLVMMatchType<1>, llvm_i32_ty], + [IntrWriteMem, IntrArgMemOnly], "", [SDNPMemOperand]>; // MVE vector absolute difference and accumulate across vector // The first operand is an 'unsigned' flag. The remaining operands are: diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -2764,6 +2764,7 @@ CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops); Data = SDValue(LoadInst, 0); Chain = SDValue(LoadInst, 1); + transferMemOperands(N, LoadInst); } // The last may need a writeback on it if (HasWriteback) @@ -2771,6 +2772,7 @@ SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain}; auto LoadInst = CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops); + transferMemOperands(N, LoadInst); unsigned i; for (i = 0; i < NumVecs; i++) diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -6027,8 +6027,8 @@ def SDTARMVST4 : SDTypeProfile<1, 7, [SDTCisPtrTy<0>, SDTCisPtrTy<1>, SDTCisVT<2, i32>, SDTCisVec<3>, SDTCisSameAs<3, 4>, SDTCisSameAs<3, 5>, SDTCisSameAs<3, 6>, SDTCisVT<7, i32>]>; -def MVEVST2UPD : SDNode<"ARMISD::VST2_UPD", SDTARMVST2, [SDNPHasChain]>; -def MVEVST4UPD : SDNode<"ARMISD::VST4_UPD", SDTARMVST4, [SDNPHasChain]>; +def MVEVST2UPD : SDNode<"ARMISD::VST2_UPD", SDTARMVST2, [SDNPHasChain, SDNPMemOperand]>; +def MVEVST4UPD : SDNode<"ARMISD::VST4_UPD", SDTARMVST4, [SDNPHasChain, SDNPMemOperand]>; multiclass MVE_vst24_patterns { foreach stage = [0,1] in diff --git a/llvm/test/CodeGen/Thumb2/mve-multivec-spill.ll b/llvm/test/CodeGen/Thumb2/mve-multivec-spill.ll --- a/llvm/test/CodeGen/Thumb2/mve-multivec-spill.ll +++ b/llvm/test/CodeGen/Thumb2/mve-multivec-spill.ll @@ -19,16 +19,16 @@ ; CHECK-NEXT: vld21.32 {q0, q1}, [r5]! ; CHECK-NEXT: adds r0, #64 ; CHECK-NEXT: vstmia lr, {d0, d1, d2, d3} @ 32-byte Spill -; CHECK-NEXT: add.w lr, sp, #32 ; CHECK-NEXT: vld20.32 {q0, q1}, [r0] +; CHECK-NEXT: add.w lr, sp, #32 ; CHECK-NEXT: vld21.32 {q0, q1}, [r0] ; CHECK-NEXT: add.w r0, r4, #128 ; CHECK-NEXT: vstmia lr, {d0, d1, d2, d3} @ 32-byte Spill ; CHECK-NEXT: vld20.32 {q0, q1}, [r0] ; CHECK-NEXT: vld21.32 {q0, q1}, [r0] ; CHECK-NEXT: add.w r0, r4, #192 -; CHECK-NEXT: vstmia sp, {d0, d1, d2, d3} @ 32-byte Spill ; CHECK-NEXT: vld20.32 {q6, q7}, [r0] +; CHECK-NEXT: vstmia sp, {d0, d1, d2, d3} @ 32-byte Spill ; CHECK-NEXT: vld21.32 {q6, q7}, [r0] ; CHECK-NEXT: add.w r0, r4, #256 ; CHECK-NEXT: vld20.32 {q4, q5}, [r0] diff --git a/llvm/test/CodeGen/Thumb2/mve-vld2.ll b/llvm/test/CodeGen/Thumb2/mve-vld2.ll --- a/llvm/test/CodeGen/Thumb2/mve-vld2.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vld2.ll @@ -71,22 +71,22 @@ ; CHECK-NEXT: vld20.32 {q0, q1}, [r0] ; CHECK-NEXT: add.w r2, r0, #96 ; CHECK-NEXT: add.w r3, r0, #64 +; CHECK-NEXT: vld20.32 {q3, q4}, [r2] ; CHECK-NEXT: vld21.32 {q0, q1}, [r0]! +; CHECK-NEXT: vld21.32 {q3, q4}, [r2] +; CHECK-NEXT: vld20.32 {q5, q6}, [r0] ; CHECK-NEXT: vadd.i32 q0, q0, q1 ; CHECK-NEXT: vld20.32 {q1, q2}, [r3] -; CHECK-NEXT: vld20.32 {q3, q4}, [r2] -; CHECK-NEXT: vld20.32 {q5, q6}, [r0] +; CHECK-NEXT: vadd.i32 q3, q3, q4 ; CHECK-NEXT: vld21.32 {q5, q6}, [r0] ; CHECK-NEXT: vld21.32 {q1, q2}, [r3] -; CHECK-NEXT: vld21.32 {q3, q4}, [r2] +; CHECK-NEXT: vstrw.32 q3, [r1, #48] ; CHECK-NEXT: @ kill: def $q1 killed $q1 killed $q1_q2 -; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: vadd.i32 q5, q5, q6 +; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: vadd.i32 q1, q1, q2 -; CHECK-NEXT: vadd.i32 q3, q3, q4 -; CHECK-NEXT: vstrw.32 q1, [r1, #32] -; CHECK-NEXT: vstrw.32 q3, [r1, #48] ; CHECK-NEXT: vstrw.32 q5, [r1, #16] +; CHECK-NEXT: vstrw.32 q1, [r1, #32] ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: bx lr entry: @@ -473,22 +473,22 @@ ; CHECK-NEXT: vld20.32 {q0, q1}, [r0] ; CHECK-NEXT: add.w r2, r0, #96 ; CHECK-NEXT: add.w r3, r0, #64 +; CHECK-NEXT: vld20.32 {q3, q4}, [r2] ; CHECK-NEXT: vld21.32 {q0, q1}, [r0]! +; CHECK-NEXT: vld21.32 {q3, q4}, [r2] +; CHECK-NEXT: vld20.32 {q5, q6}, [r0] ; CHECK-NEXT: vadd.f32 q0, q0, q1 ; CHECK-NEXT: vld20.32 {q1, q2}, [r3] -; CHECK-NEXT: vld20.32 {q3, q4}, [r2] -; CHECK-NEXT: vld20.32 {q5, q6}, [r0] +; CHECK-NEXT: vadd.f32 q3, q3, q4 ; CHECK-NEXT: vld21.32 {q5, q6}, [r0] ; CHECK-NEXT: vld21.32 {q1, q2}, [r3] -; CHECK-NEXT: vld21.32 {q3, q4}, [r2] +; CHECK-NEXT: vstrw.32 q3, [r1, #48] ; CHECK-NEXT: @ kill: def $q1 killed $q1 killed $q1_q2 -; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: vadd.f32 q5, q5, q6 +; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: vadd.f32 q1, q1, q2 -; CHECK-NEXT: vadd.f32 q3, q3, q4 -; CHECK-NEXT: vstrw.32 q1, [r1, #32] -; CHECK-NEXT: vstrw.32 q3, [r1, #48] ; CHECK-NEXT: vstrw.32 q5, [r1, #16] +; CHECK-NEXT: vstrw.32 q1, [r1, #32] ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: bx lr entry: diff --git a/llvm/test/CodeGen/Thumb2/mve-vld4.ll b/llvm/test/CodeGen/Thumb2/mve-vld4.ll --- a/llvm/test/CodeGen/Thumb2/mve-vld4.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vld4.ll @@ -128,19 +128,19 @@ ; CHECK-NEXT: vadd.i32 q4, q2, q3 ; CHECK-NEXT: vadd.i32 q0, q0, q1 ; CHECK-NEXT: vstrw.32 q4, [sp, #112] @ 16-byte Spill -; CHECK-NEXT: vstrw.32 q0, [sp, #96] @ 16-byte Spill ; CHECK-NEXT: vld40.32 {q1, q2, q3, q4}, [r3] +; CHECK-NEXT: vstrw.32 q0, [sp, #96] @ 16-byte Spill +; CHECK-NEXT: vldrw.u32 q6, [sp, #112] @ 16-byte Reload ; CHECK-NEXT: vld41.32 {q1, q2, q3, q4}, [r3] +; CHECK-NEXT: vldrw.u32 q5, [sp, #96] @ 16-byte Reload ; CHECK-NEXT: vld42.32 {q1, q2, q3, q4}, [r3] +; CHECK-NEXT: vadd.i32 q6, q5, q6 +; CHECK-NEXT: vstrw.32 q6, [sp, #112] @ 16-byte Spill ; CHECK-NEXT: vld43.32 {q1, q2, q3, q4}, [r3] -; CHECK-NEXT: vldrw.u32 q6, [sp, #112] @ 16-byte Reload -; CHECK-NEXT: vldrw.u32 q5, [sp, #96] @ 16-byte Reload ; CHECK-NEXT: vstrw.32 q4, [sp, #80] @ 16-byte Spill ; CHECK-NEXT: vmov q0, q1 -; CHECK-NEXT: vadd.i32 q6, q5, q6 ; CHECK-NEXT: vldrw.u32 q5, [sp, #80] @ 16-byte Reload ; CHECK-NEXT: vadd.i32 q0, q0, q2 -; CHECK-NEXT: vstrw.32 q6, [sp, #112] @ 16-byte Spill ; CHECK-NEXT: vadd.i32 q1, q3, q5 ; CHECK-NEXT: vadd.i32 q0, q0, q1 ; CHECK-NEXT: vstrw.32 q0, [sp, #96] @ 16-byte Spill @@ -927,19 +927,19 @@ ; CHECK-NEXT: vadd.f32 q4, q2, q3 ; CHECK-NEXT: vadd.f32 q0, q0, q1 ; CHECK-NEXT: vstrw.32 q4, [sp, #112] @ 16-byte Spill -; CHECK-NEXT: vstrw.32 q0, [sp, #96] @ 16-byte Spill ; CHECK-NEXT: vld40.32 {q1, q2, q3, q4}, [r3] +; CHECK-NEXT: vstrw.32 q0, [sp, #96] @ 16-byte Spill +; CHECK-NEXT: vldrw.u32 q6, [sp, #112] @ 16-byte Reload ; CHECK-NEXT: vld41.32 {q1, q2, q3, q4}, [r3] +; CHECK-NEXT: vldrw.u32 q5, [sp, #96] @ 16-byte Reload ; CHECK-NEXT: vld42.32 {q1, q2, q3, q4}, [r3] +; CHECK-NEXT: vadd.f32 q6, q5, q6 +; CHECK-NEXT: vstrw.32 q6, [sp, #112] @ 16-byte Spill ; CHECK-NEXT: vld43.32 {q1, q2, q3, q4}, [r3] -; CHECK-NEXT: vldrw.u32 q6, [sp, #112] @ 16-byte Reload -; CHECK-NEXT: vldrw.u32 q5, [sp, #96] @ 16-byte Reload ; CHECK-NEXT: vstrw.32 q4, [sp, #80] @ 16-byte Spill ; CHECK-NEXT: vmov q0, q1 -; CHECK-NEXT: vadd.f32 q6, q5, q6 ; CHECK-NEXT: vldrw.u32 q5, [sp, #80] @ 16-byte Reload ; CHECK-NEXT: vadd.f32 q0, q0, q2 -; CHECK-NEXT: vstrw.32 q6, [sp, #112] @ 16-byte Spill ; CHECK-NEXT: vadd.f32 q1, q3, q5 ; CHECK-NEXT: vadd.f32 q0, q0, q1 ; CHECK-NEXT: vstrw.32 q0, [sp, #96] @ 16-byte Spill @@ -1153,8 +1153,8 @@ ; CHECK-NEXT: vld41.16 {q0, q1, q2, q3}, [r0] ; CHECK-NEXT: vld42.16 {q0, q1, q2, q3}, [r0] ; CHECK-NEXT: vld43.16 {q0, q1, q2, q3}, [r0]! -; CHECK-NEXT: vstmia sp, {d0, d1, d2, d3, d4, d5, d6, d7} @ 64-byte Spill ; CHECK-NEXT: vld40.16 {q4, q5, q6, q7}, [r0] +; CHECK-NEXT: vstmia sp, {d0, d1, d2, d3, d4, d5, d6, d7} @ 64-byte Spill ; CHECK-NEXT: vld41.16 {q4, q5, q6, q7}, [r0] ; CHECK-NEXT: vld42.16 {q4, q5, q6, q7}, [r0] ; CHECK-NEXT: vld43.16 {q4, q5, q6, q7}, [r0] diff --git a/llvm/test/CodeGen/Thumb2/mve-vst2.ll b/llvm/test/CodeGen/Thumb2/mve-vst2.ll --- a/llvm/test/CodeGen/Thumb2/mve-vst2.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vst2.ll @@ -75,15 +75,15 @@ ; CHECK-NEXT: vldrw.u32 q0, [r0, #48] ; CHECK-NEXT: vldrw.u32 q2, [r0, #32] ; CHECK-NEXT: vldrw.u32 q4, [r0, #16] -; CHECK-NEXT: vst20.32 {q6, q7}, [r1] ; CHECK-NEXT: add.w r0, r1, #96 ; CHECK-NEXT: add.w r2, r1, #64 +; CHECK-NEXT: vst20.32 {q6, q7}, [r1] ; CHECK-NEXT: vst21.32 {q6, q7}, [r1]! ; CHECK-NEXT: vst20.32 {q4, q5}, [r1] -; CHECK-NEXT: vst21.32 {q4, q5}, [r1] ; CHECK-NEXT: vst20.32 {q2, q3}, [r2] -; CHECK-NEXT: vst21.32 {q2, q3}, [r2] ; CHECK-NEXT: vst20.32 {q0, q1}, [r0] +; CHECK-NEXT: vst21.32 {q4, q5}, [r1] +; CHECK-NEXT: vst21.32 {q2, q3}, [r2] ; CHECK-NEXT: vst21.32 {q0, q1}, [r0] ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: bx lr @@ -460,15 +460,15 @@ ; CHECK-NEXT: vldrw.u32 q0, [r0, #48] ; CHECK-NEXT: vldrw.u32 q2, [r0, #32] ; CHECK-NEXT: vldrw.u32 q4, [r0, #16] -; CHECK-NEXT: vst20.32 {q6, q7}, [r1] ; CHECK-NEXT: add.w r0, r1, #96 ; CHECK-NEXT: add.w r2, r1, #64 +; CHECK-NEXT: vst20.32 {q6, q7}, [r1] ; CHECK-NEXT: vst21.32 {q6, q7}, [r1]! ; CHECK-NEXT: vst20.32 {q4, q5}, [r1] -; CHECK-NEXT: vst21.32 {q4, q5}, [r1] ; CHECK-NEXT: vst20.32 {q2, q3}, [r2] -; CHECK-NEXT: vst21.32 {q2, q3}, [r2] ; CHECK-NEXT: vst20.32 {q0, q1}, [r0] +; CHECK-NEXT: vst21.32 {q4, q5}, [r1] +; CHECK-NEXT: vst21.32 {q2, q3}, [r2] ; CHECK-NEXT: vst21.32 {q0, q1}, [r0] ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: bx lr diff --git a/llvm/test/CodeGen/Thumb2/mve-vst4.ll b/llvm/test/CodeGen/Thumb2/mve-vst4.ll --- a/llvm/test/CodeGen/Thumb2/mve-vst4.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vst4.ll @@ -175,10 +175,10 @@ ; CHECK-NEXT: vst40.32 {q4, q5, q6, q7}, [r1] ; CHECK-NEXT: vst41.32 {q4, q5, q6, q7}, [r1] ; CHECK-NEXT: vst42.32 {q4, q5, q6, q7}, [r1] -; CHECK-NEXT: vst43.32 {q4, q5, q6, q7}, [r1] ; CHECK-NEXT: vst40.32 {q0, q1, q2, q3}, [r0] ; CHECK-NEXT: vst41.32 {q0, q1, q2, q3}, [r0] ; CHECK-NEXT: vst42.32 {q0, q1, q2, q3}, [r0] +; CHECK-NEXT: vst43.32 {q4, q5, q6, q7}, [r1] ; CHECK-NEXT: vst43.32 {q0, q1, q2, q3}, [r0] ; CHECK-NEXT: add sp, #216 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} @@ -945,10 +945,10 @@ ; CHECK-NEXT: vst40.32 {q4, q5, q6, q7}, [r1] ; CHECK-NEXT: vst41.32 {q4, q5, q6, q7}, [r1] ; CHECK-NEXT: vst42.32 {q4, q5, q6, q7}, [r1] -; CHECK-NEXT: vst43.32 {q4, q5, q6, q7}, [r1] ; CHECK-NEXT: vst40.32 {q0, q1, q2, q3}, [r0] ; CHECK-NEXT: vst41.32 {q0, q1, q2, q3}, [r0] ; CHECK-NEXT: vst42.32 {q0, q1, q2, q3}, [r0] +; CHECK-NEXT: vst43.32 {q4, q5, q6, q7}, [r1] ; CHECK-NEXT: vst43.32 {q0, q1, q2, q3}, [r0] ; CHECK-NEXT: add sp, #216 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}