diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -439,6 +439,8 @@ let Inst{19-17} = RdaLo{3-1}; let Inst{11-9} = RdaHi{3-1}; + + let hasSideEffects = 0; } class MVE_ScalarShiftDRegImm op5_4, bit op16, @@ -756,6 +758,7 @@ let horizontalReduction = 1; let Predicates = [HasMVEFloat]; + let hasSideEffects = 0; } multiclass MVE_VMINMAXNMV_p @@ -5227,6 +5234,7 @@ let Inst{8} = 0b0; let Inst{5} = 0b0; let validForTailPredication = 1; + let hasSideEffects = 0; } multiclass MVE_VMLA_qr_multi; @@ -5410,6 +5419,7 @@ let Inst{3-1} = Rm{3-1}; let Inst{0} = imm{0}; let validForTailPredication = 1; + let hasSideEffects = 0; } def MVE_VIWDUPu8 : MVE_VxWDUP<"viwdup", "u8", 0b00, 0b0>; @@ -5478,6 +5488,8 @@ let Inst{12-5} = 0b01111000; let Inst{4} = idx2; let Inst{3-0} = Rt{3-0}; + + let hasSideEffects = 0; } // The assembly syntax for these instructions mentions the vector diff --git a/llvm/test/CodeGen/Thumb2/mve-vld3.ll b/llvm/test/CodeGen/Thumb2/mve-vld3.ll --- a/llvm/test/CodeGen/Thumb2/mve-vld3.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vld3.ll @@ -84,17 +84,17 @@ ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: vldrw.u32 q1, [r0, #48] -; CHECK-NEXT: vldrw.u32 q0, [r0, #64] ; CHECK-NEXT: vldrw.u32 q2, [r0, #80] +; CHECK-NEXT: vldrw.u32 q0, [r0, #64] ; CHECK-NEXT: vmov.f32 s12, s5 -; CHECK-NEXT: vmov.f32 s13, s0 ; CHECK-NEXT: vmov.32 r2, q2[2] +; CHECK-NEXT: vmov.f32 s13, s0 ; CHECK-NEXT: vdup.32 q4, r2 ; CHECK-NEXT: vmov.f32 s14, s3 -; CHECK-NEXT: vmov.f32 s15, s19 ; CHECK-NEXT: vmov.32 r2, q2[1] -; CHECK-NEXT: vmov.f64 d8, d2 +; CHECK-NEXT: vmov.f32 s15, s19 ; CHECK-NEXT: vdup.32 q5, r2 +; CHECK-NEXT: vmov.f64 d8, d2 ; CHECK-NEXT: vmov.f32 s17, s7 ; CHECK-NEXT: vmov.f32 s18, s2 ; CHECK-NEXT: vmov.f32 s0, s6 @@ -108,6 +108,7 @@ ; CHECK-NEXT: vadd.i32 q0, q3, q0 ; CHECK-NEXT: vldrw.u32 q3, [r0, #32] ; CHECK-NEXT: vmov.f32 s16, s9 +; CHECK-NEXT: vstrw.32 q0, [r1, #16] ; CHECK-NEXT: vmov.f32 s17, s4 ; CHECK-NEXT: vmov.32 r0, q3[2] ; CHECK-NEXT: vdup.32 q5, r0 @@ -116,7 +117,6 @@ ; CHECK-NEXT: vmov.32 r0, q3[1] ; CHECK-NEXT: vmov.f64 d10, d4 ; CHECK-NEXT: vdup.32 q6, r0 -; CHECK-NEXT: vstrw.32 q0, [r1, #16] ; CHECK-NEXT: vmov.f32 s21, s11 ; CHECK-NEXT: vmov.f32 s22, s6 ; CHECK-NEXT: vmov.f32 s4, s10 @@ -148,17 +148,17 @@ ; CHECK-NEXT: .pad #16 ; CHECK-NEXT: sub sp, #16 ; CHECK-NEXT: vldrw.u32 q1, [r0, #48] -; CHECK-NEXT: vldrw.u32 q0, [r0, #64] ; CHECK-NEXT: vldrw.u32 q2, [r0, #80] +; CHECK-NEXT: vldrw.u32 q0, [r0, #64] ; CHECK-NEXT: vmov.f32 s12, s5 -; CHECK-NEXT: vmov.f32 s13, s0 ; CHECK-NEXT: vmov.32 r2, q2[2] +; CHECK-NEXT: vmov.f32 s13, s0 ; CHECK-NEXT: vdup.32 q4, r2 ; CHECK-NEXT: vmov.f32 s14, s3 -; CHECK-NEXT: vmov.f32 s15, s19 ; CHECK-NEXT: vmov.32 r2, q2[1] -; CHECK-NEXT: vmov.f64 d8, d2 +; CHECK-NEXT: vmov.f32 s15, s19 ; CHECK-NEXT: vdup.32 q5, r2 +; CHECK-NEXT: vmov.f64 d8, d2 ; CHECK-NEXT: vmov.f32 s17, s7 ; CHECK-NEXT: vmov.f32 s18, s2 ; CHECK-NEXT: vmov.f32 s0, s6 @@ -181,6 +181,7 @@ ; CHECK-NEXT: vmov.32 r2, q3[1] ; CHECK-NEXT: vmov.f64 d10, d4 ; CHECK-NEXT: vdup.32 q6, r2 +; CHECK-NEXT: vldrw.u32 q0, [r0, #128] ; CHECK-NEXT: vmov.f32 s21, s11 ; CHECK-NEXT: vmov.f32 s22, s6 ; CHECK-NEXT: vmov.f32 s4, s10 @@ -202,7 +203,6 @@ ; CHECK-NEXT: vmov.32 r2, q4[1] ; CHECK-NEXT: vmov.f64 d12, d6 ; CHECK-NEXT: vdup.32 q7, r2 -; CHECK-NEXT: vldrw.u32 q0, [r0, #128] ; CHECK-NEXT: vmov.f32 s25, s15 ; CHECK-NEXT: vmov.f32 s26, s10 ; CHECK-NEXT: vmov.f32 s8, s14 @@ -1125,17 +1125,17 @@ ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: vldrw.u32 q1, [r0, #48] -; CHECK-NEXT: vldrw.u32 q0, [r0, #64] ; CHECK-NEXT: vldrw.u32 q2, [r0, #80] +; CHECK-NEXT: vldrw.u32 q0, [r0, #64] ; CHECK-NEXT: vmov.f32 s12, s5 -; CHECK-NEXT: vmov.f32 s13, s0 ; CHECK-NEXT: vmov.32 r2, q2[2] +; CHECK-NEXT: vmov.f32 s13, s0 ; CHECK-NEXT: vdup.32 q4, r2 ; CHECK-NEXT: vmov.f32 s14, s3 -; CHECK-NEXT: vmov.f32 s15, s19 ; CHECK-NEXT: vmov.32 r2, q2[1] -; CHECK-NEXT: vmov.f64 d8, d2 +; CHECK-NEXT: vmov.f32 s15, s19 ; CHECK-NEXT: vdup.32 q5, r2 +; CHECK-NEXT: vmov.f64 d8, d2 ; CHECK-NEXT: vmov.f32 s17, s7 ; CHECK-NEXT: vmov.f32 s18, s2 ; CHECK-NEXT: vmov.f32 s0, s6 @@ -1149,6 +1149,7 @@ ; CHECK-NEXT: vadd.f32 q0, q3, q0 ; CHECK-NEXT: vldrw.u32 q3, [r0, #32] ; CHECK-NEXT: vmov.f32 s16, s9 +; CHECK-NEXT: vstrw.32 q0, [r1, #16] ; CHECK-NEXT: vmov.f32 s17, s4 ; CHECK-NEXT: vmov.32 r0, q3[2] ; CHECK-NEXT: vdup.32 q5, r0 @@ -1157,7 +1158,6 @@ ; CHECK-NEXT: vmov.32 r0, q3[1] ; CHECK-NEXT: vmov.f64 d10, d4 ; CHECK-NEXT: vdup.32 q6, r0 -; CHECK-NEXT: vstrw.32 q0, [r1, #16] ; CHECK-NEXT: vmov.f32 s21, s11 ; CHECK-NEXT: vmov.f32 s22, s6 ; CHECK-NEXT: vmov.f32 s4, s10 @@ -1189,17 +1189,17 @@ ; CHECK-NEXT: .pad #16 ; CHECK-NEXT: sub sp, #16 ; CHECK-NEXT: vldrw.u32 q1, [r0, #48] -; CHECK-NEXT: vldrw.u32 q0, [r0, #64] ; CHECK-NEXT: vldrw.u32 q2, [r0, #80] +; CHECK-NEXT: vldrw.u32 q0, [r0, #64] ; CHECK-NEXT: vmov.f32 s12, s5 -; CHECK-NEXT: vmov.f32 s13, s0 ; CHECK-NEXT: vmov.32 r2, q2[2] +; CHECK-NEXT: vmov.f32 s13, s0 ; CHECK-NEXT: vdup.32 q4, r2 ; CHECK-NEXT: vmov.f32 s14, s3 -; CHECK-NEXT: vmov.f32 s15, s19 ; CHECK-NEXT: vmov.32 r2, q2[1] -; CHECK-NEXT: vmov.f64 d8, d2 +; CHECK-NEXT: vmov.f32 s15, s19 ; CHECK-NEXT: vdup.32 q5, r2 +; CHECK-NEXT: vmov.f64 d8, d2 ; CHECK-NEXT: vmov.f32 s17, s7 ; CHECK-NEXT: vmov.f32 s18, s2 ; CHECK-NEXT: vmov.f32 s0, s6 @@ -1222,6 +1222,7 @@ ; CHECK-NEXT: vmov.32 r2, q3[1] ; CHECK-NEXT: vmov.f64 d10, d4 ; CHECK-NEXT: vdup.32 q6, r2 +; CHECK-NEXT: vldrw.u32 q0, [r0, #128] ; CHECK-NEXT: vmov.f32 s21, s11 ; CHECK-NEXT: vmov.f32 s22, s6 ; CHECK-NEXT: vmov.f32 s4, s10 @@ -1243,7 +1244,6 @@ ; CHECK-NEXT: vmov.32 r2, q4[1] ; CHECK-NEXT: vmov.f64 d12, d6 ; CHECK-NEXT: vdup.32 q7, r2 -; CHECK-NEXT: vldrw.u32 q0, [r0, #128] ; CHECK-NEXT: vmov.f32 s25, s15 ; CHECK-NEXT: vmov.f32 s26, s10 ; CHECK-NEXT: vmov.f32 s8, s14 diff --git a/llvm/test/CodeGen/Thumb2/mve-vld4.ll b/llvm/test/CodeGen/Thumb2/mve-vld4.ll --- a/llvm/test/CodeGen/Thumb2/mve-vld4.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vld4.ll @@ -22,10 +22,10 @@ ; CHECK-NEXT: add r2, r3 ; CHECK-NEXT: vmov r3, s0 ; CHECK-NEXT: add r0, r2 -; CHECK-NEXT: str r0, [r1, #4] ; CHECK-NEXT: vmov.32 r2, q0[2] -; CHECK-NEXT: vmov r0, s8 ; CHECK-NEXT: vdup.32 q1, r2 +; CHECK-NEXT: str r0, [r1, #4] +; CHECK-NEXT: vmov r0, s8 ; CHECK-NEXT: vmov r2, s4 ; CHECK-NEXT: add r0, r2 ; CHECK-NEXT: vmov r2, s12 diff --git a/llvm/test/CodeGen/Thumb2/mve-vst3.ll b/llvm/test/CodeGen/Thumb2/mve-vst3.ll --- a/llvm/test/CodeGen/Thumb2/mve-vst3.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vst3.ll @@ -93,20 +93,21 @@ ; CHECK-NEXT: vldrw.u32 q0, [r0, #80] ; CHECK-NEXT: vldrw.u32 q5, [r0, #16] ; CHECK-NEXT: vldrw.u32 q1, [r0, #48] -; CHECK-NEXT: vmov.f64 d6, d1 -; CHECK-NEXT: vmov.32 r2, q1[3] ; CHECK-NEXT: vldrw.u32 q4, [r0] +; CHECK-NEXT: vmov.f64 d6, d1 ; CHECK-NEXT: vldrw.u32 q6, [r0, #32] -; CHECK-NEXT: vdup.32 q2, r2 +; CHECK-NEXT: vmov.32 r2, q1[3] ; CHECK-NEXT: vldrw.u32 q7, [r0, #64] +; CHECK-NEXT: vdup.32 q2, r2 ; CHECK-NEXT: vstrw.32 q4, [sp, #32] @ 16-byte Spill +; CHECK-NEXT: vstrw.32 q5, [sp] @ 16-byte Spill +; CHECK-NEXT: vmov.32 r0, q7[0] ; CHECK-NEXT: vmov.f32 s13, s23 ; CHECK-NEXT: vstrw.32 q6, [sp, #16] @ 16-byte Spill ; CHECK-NEXT: vmov.f32 s15, s3 -; CHECK-NEXT: vstrw.32 q5, [sp] @ 16-byte Spill ; CHECK-NEXT: vmov.f32 s14, s10 -; CHECK-NEXT: vmov.32 r0, q7[0] ; CHECK-NEXT: vmov.f64 d4, d8 +; CHECK-NEXT: vstrw.32 q3, [r1, #80] ; CHECK-NEXT: vmov.f32 s9, s24 ; CHECK-NEXT: vmov.f32 s11, s17 ; CHECK-NEXT: vmov q4, q5 @@ -120,20 +121,19 @@ ; CHECK-NEXT: vmov.f32 s3, s6 ; CHECK-NEXT: vldrw.u32 q1, [sp] @ 16-byte Reload ; CHECK-NEXT: vdup.32 q6, r0 +; CHECK-NEXT: vmov.32 r0, q4[3] ; CHECK-NEXT: vmov.f32 s22, s26 ; CHECK-NEXT: vldrw.u32 q6, [sp, #32] @ 16-byte Reload ; CHECK-NEXT: vmov.f32 s2, s6 -; CHECK-NEXT: vmov.32 r0, q4[3] -; CHECK-NEXT: vmov.f32 s4, s17 ; CHECK-NEXT: vstrw.32 q5, [r1, #48] -; CHECK-NEXT: vmov.f32 s5, s29 +; CHECK-NEXT: vmov.f32 s4, s17 ; CHECK-NEXT: vstrw.32 q0, [r1, #64] +; CHECK-NEXT: vmov.f32 s5, s29 +; CHECK-NEXT: vstrw.32 q2, [r1] ; CHECK-NEXT: vmov.f32 s28, s30 -; CHECK-NEXT: vstrw.32 q3, [r1, #80] ; CHECK-NEXT: vmov.f32 s7, s18 ; CHECK-NEXT: vdup.32 q4, r0 ; CHECK-NEXT: vmov.f32 s29, s27 -; CHECK-NEXT: vstrw.32 q2, [r1] ; CHECK-NEXT: vmov.f32 s6, s26 ; CHECK-NEXT: vmov.f32 s30, s18 ; CHECK-NEXT: vstrw.32 q1, [r1, #16] @@ -185,32 +185,32 @@ ; CHECK-NEXT: vstrw.32 q2, [sp, #16] @ 16-byte Spill ; CHECK-NEXT: vmov.f64 d4, d15 ; CHECK-NEXT: vldrw.u32 q4, [r0, #176] -; CHECK-NEXT: vstrw.32 q0, [sp, #32] @ 16-byte Spill -; CHECK-NEXT: vstrw.32 q1, [r1, #16] ; CHECK-NEXT: vmov.32 r0, q6[3] +; CHECK-NEXT: vstrw.32 q1, [r1, #16] ; CHECK-NEXT: vdup.32 q1, r0 +; CHECK-NEXT: vmov.32 r0, q3[3] +; CHECK-NEXT: vstrw.32 q0, [sp, #32] @ 16-byte Spill ; CHECK-NEXT: vmov.f32 s9, s23 ; CHECK-NEXT: vmov.f32 s11, s31 ; CHECK-NEXT: vmov.f32 s10, s6 ; CHECK-NEXT: vmov.f64 d2, d9 ; CHECK-NEXT: vstrw.32 q2, [r1, #32] -; CHECK-NEXT: vmov.32 r0, q3[3] ; CHECK-NEXT: vmov.f32 s5, s3 ; CHECK-NEXT: vmov.f32 s7, s19 ; CHECK-NEXT: vmov q2, q1 ; CHECK-NEXT: vdup.32 q1, r0 ; CHECK-NEXT: vmov.f32 s10, s6 +; CHECK-NEXT: vmov.32 r0, q7[0] ; CHECK-NEXT: vmov.f64 d2, d10 ; CHECK-NEXT: vstrw.32 q2, [sp, #80] @ 16-byte Spill -; CHECK-NEXT: vmov.32 r0, q7[0] ; CHECK-NEXT: vldrw.u32 q2, [sp, #32] @ 16-byte Reload ; CHECK-NEXT: vdup.32 q0, r0 +; CHECK-NEXT: vmov.32 r0, q4[0] ; CHECK-NEXT: vmov.f32 s5, s24 ; CHECK-NEXT: vmov.f32 s7, s21 ; CHECK-NEXT: vmov.f32 s6, s2 ; CHECK-NEXT: vmov.f64 d0, d4 ; CHECK-NEXT: vstrw.32 q1, [sp, #64] @ 16-byte Spill -; CHECK-NEXT: vmov.32 r0, q4[0] ; CHECK-NEXT: vmov.f32 s1, s12 ; CHECK-NEXT: vmov.f32 s3, s9 ; CHECK-NEXT: vmov q1, q0 @@ -227,9 +227,9 @@ ; CHECK-NEXT: vmov.f32 s27, s6 ; CHECK-NEXT: vldrw.u32 q1, [sp, #16] @ 16-byte Reload ; CHECK-NEXT: vmov.f32 s18, s10 +; CHECK-NEXT: vmov.32 r0, q3[3] ; CHECK-NEXT: vmov.f64 d4, d1 ; CHECK-NEXT: vstrw.32 q4, [sp, #32] @ 16-byte Spill -; CHECK-NEXT: vmov.32 r0, q3[3] ; CHECK-NEXT: vmov q5, q1 ; CHECK-NEXT: vldrw.u32 q4, [sp, #144] @ 16-byte Reload ; CHECK-NEXT: vmov.f32 s9, s7 @@ -242,6 +242,8 @@ ; CHECK-NEXT: vmov.f64 d6, d1 ; CHECK-NEXT: vmov.32 r0, q4[3] ; CHECK-NEXT: vmov q7, q0 +; CHECK-NEXT: vstrw.32 q6, [r1, #112] +; CHECK-NEXT: vstrw.32 q2, [r1, #128] ; CHECK-NEXT: vmov.f32 s13, s7 ; CHECK-NEXT: vldrw.u32 q1, [sp, #96] @ 16-byte Reload ; CHECK-NEXT: vmov.f32 s15, s3 @@ -249,18 +251,16 @@ ; CHECK-NEXT: vmov.f32 s14, s2 ; CHECK-NEXT: vmov q0, q5 ; CHECK-NEXT: vmov.f32 s21, s4 +; CHECK-NEXT: vstrw.32 q3, [r1, #80] ; CHECK-NEXT: vmov.f32 s23, s1 ; CHECK-NEXT: vldrw.u32 q0, [sp, #112] @ 16-byte Reload ; CHECK-NEXT: vmov.32 r0, q0[0] ; CHECK-NEXT: vdup.32 q0, r0 +; CHECK-NEXT: vmov.32 r0, q7[0] ; CHECK-NEXT: vmov.f32 s22, s2 ; CHECK-NEXT: vldrw.u32 q0, [sp, #128] @ 16-byte Reload -; CHECK-NEXT: vmov.32 r0, q7[0] -; CHECK-NEXT: vstrw.32 q3, [r1, #80] -; CHECK-NEXT: vmov.f64 d2, d0 ; CHECK-NEXT: vstrw.32 q5, [r1, #96] -; CHECK-NEXT: vstrw.32 q6, [r1, #112] -; CHECK-NEXT: vstrw.32 q2, [r1, #128] +; CHECK-NEXT: vmov.f64 d2, d0 ; CHECK-NEXT: vmov.f32 s5, s16 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vmov.f32 s7, s1 @@ -503,13 +503,14 @@ ; CHECK-NEXT: vmov q7, q1 ; CHECK-NEXT: vmov.f32 s9, s0 ; CHECK-NEXT: vldrw.u32 q0, [r0, #80] -; CHECK-NEXT: vstrw.32 q2, [sp, #96] @ 16-byte Spill ; CHECK-NEXT: vmov.u16 r2, q2[3] +; CHECK-NEXT: vmov q3, q2 ; CHECK-NEXT: vmov.32 r3, q0[0] ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vdup.32 q0, r3 -; CHECK-NEXT: vmov q3, q2 +; CHECK-NEXT: vstrw.32 q2, [sp, #96] @ 16-byte Spill ; CHECK-NEXT: vmov.u16 r3, q0[2] +; CHECK-NEXT: vstrw.32 q7, [sp, #32] @ 16-byte Spill ; CHECK-NEXT: vmov.16 q2[2], r3 ; CHECK-NEXT: vmov.16 q2[3], r2 ; CHECK-NEXT: vmov.u16 r2, q3[4] @@ -578,17 +579,17 @@ ; CHECK-NEXT: vmov.16 q0[6], r0 ; CHECK-NEXT: vmov.u16 r0, q1[7] ; CHECK-NEXT: vldrw.u32 q1, [sp, #128] @ 16-byte Reload -; CHECK-NEXT: vstrw.32 q2, [sp, #64] @ 16-byte Spill -; CHECK-NEXT: vstrw.32 q7, [sp, #32] @ 16-byte Spill ; CHECK-NEXT: vmov.16 q0[7], r0 -; CHECK-NEXT: vmov.32 r2, q1[3] ; CHECK-NEXT: vmov.f32 s2, s19 -; CHECK-NEXT: vdup.32 q7, r2 +; CHECK-NEXT: vstrw.32 q2, [sp, #64] @ 16-byte Spill +; CHECK-NEXT: vmov.32 r2, q1[3] ; CHECK-NEXT: vmov.u16 r0, q0[3] -; CHECK-NEXT: vmov.u16 r2, q7[2] +; CHECK-NEXT: vdup.32 q7, r2 ; CHECK-NEXT: vrev32.16 q3, q3 -; CHECK-NEXT: vmov.16 q2[2], r2 +; CHECK-NEXT: vmov.u16 r2, q7[2] ; CHECK-NEXT: vstrw.32 q3, [sp] @ 16-byte Spill +; CHECK-NEXT: vmov.16 q2[2], r2 +; CHECK-NEXT: vstrw.32 q4, [sp, #16] @ 16-byte Spill ; CHECK-NEXT: vmov.16 q2[3], r0 ; CHECK-NEXT: vmov.u16 r0, q0[4] ; CHECK-NEXT: vmov.16 q2[4], r0 @@ -603,7 +604,6 @@ ; CHECK-NEXT: vmov.16 q3[6], r0 ; CHECK-NEXT: vmov.u16 r0, q1[5] ; CHECK-NEXT: vldrw.u32 q1, [sp, #144] @ 16-byte Reload -; CHECK-NEXT: vstrw.32 q4, [sp, #16] @ 16-byte Spill ; CHECK-NEXT: vldrw.u32 q4, [sp, #48] @ 16-byte Reload ; CHECK-NEXT: vrev32.16 q7, q7 ; CHECK-NEXT: vmov.16 q3[7], r0 @@ -611,34 +611,35 @@ ; CHECK-NEXT: vstrw.32 q7, [sp, #32] @ 16-byte Spill ; CHECK-NEXT: vmov.16 q7[0], r0 ; CHECK-NEXT: vmov.u16 r0, q4[3] +; CHECK-NEXT: vmov.f32 s1, s9 ; CHECK-NEXT: vmov.16 q7[1], r0 ; CHECK-NEXT: vmov.u16 r0, q1[4] ; CHECK-NEXT: vldrw.u32 q1, [sp, #64] @ 16-byte Reload -; CHECK-NEXT: vmov.f32 s1, s9 ; CHECK-NEXT: vmov.f32 s2, s10 ; CHECK-NEXT: vldrw.u32 q2, [sp, #96] @ 16-byte Reload -; CHECK-NEXT: vmov.f32 s25, s5 ; CHECK-NEXT: vmov.16 q7[6], r0 +; CHECK-NEXT: vmov.f32 s25, s5 +; CHECK-NEXT: vmov.u16 r0, q4[5] ; CHECK-NEXT: vmov.f32 s26, s6 ; CHECK-NEXT: vldrw.u32 q1, [sp, #80] @ 16-byte Reload -; CHECK-NEXT: vmov.u16 r0, q4[5] ; CHECK-NEXT: vstrw.32 q0, [r1, #80] -; CHECK-NEXT: vmov.f32 s21, s5 ; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload +; CHECK-NEXT: vmov.f32 s21, s5 +; CHECK-NEXT: vmov.16 q7[7], r0 ; CHECK-NEXT: vmov.f32 s22, s6 ; CHECK-NEXT: vldrw.u32 q1, [sp, #112] @ 16-byte Reload -; CHECK-NEXT: vmov.16 q7[7], r0 ; CHECK-NEXT: vmov.u16 r2, q0[2] -; CHECK-NEXT: vmov.f32 s9, s5 ; CHECK-NEXT: vmov.u16 r0, q0[5] +; CHECK-NEXT: vmov.f32 s9, s5 +; CHECK-NEXT: vmov.16 q0[2], r2 ; CHECK-NEXT: vmov.f32 s10, s6 ; CHECK-NEXT: vldrw.u32 q1, [sp, #144] @ 16-byte Reload -; CHECK-NEXT: vmov.16 q0[2], r2 ; CHECK-NEXT: vstrw.32 q5, [r1, #32] +; CHECK-NEXT: vstrw.32 q2, [r1, #48] ; CHECK-NEXT: vmov.f32 s29, s5 ; CHECK-NEXT: vldrw.u32 q1, [sp, #16] @ 16-byte Reload ; CHECK-NEXT: vmov.f32 s30, s18 -; CHECK-NEXT: vstrw.32 q2, [r1, #48] +; CHECK-NEXT: vstrw.32 q6, [r1] ; CHECK-NEXT: vmov.u16 r2, q7[3] ; CHECK-NEXT: vmov.f32 s13, s5 ; CHECK-NEXT: vmov.16 q0[3], r2 @@ -646,7 +647,6 @@ ; CHECK-NEXT: vmov.16 q0[4], r2 ; CHECK-NEXT: vldrw.u32 q1, [sp, #128] @ 16-byte Reload ; CHECK-NEXT: vmov.16 q0[5], r0 -; CHECK-NEXT: vstrw.32 q6, [r1] ; CHECK-NEXT: vmov.f32 s29, s1 ; CHECK-NEXT: vmov.f32 s30, s2 ; CHECK-NEXT: vldrw.u32 q0, [sp, #32] @ 16-byte Reload @@ -1251,20 +1251,21 @@ ; CHECK-NEXT: vldrw.u32 q0, [r0, #80] ; CHECK-NEXT: vldrw.u32 q5, [r0, #16] ; CHECK-NEXT: vldrw.u32 q1, [r0, #48] -; CHECK-NEXT: vmov.f64 d6, d1 -; CHECK-NEXT: vmov.32 r2, q1[3] ; CHECK-NEXT: vldrw.u32 q4, [r0] +; CHECK-NEXT: vmov.f64 d6, d1 ; CHECK-NEXT: vldrw.u32 q6, [r0, #32] -; CHECK-NEXT: vdup.32 q2, r2 +; CHECK-NEXT: vmov.32 r2, q1[3] ; CHECK-NEXT: vldrw.u32 q7, [r0, #64] +; CHECK-NEXT: vdup.32 q2, r2 ; CHECK-NEXT: vstrw.32 q4, [sp, #32] @ 16-byte Spill +; CHECK-NEXT: vstrw.32 q5, [sp] @ 16-byte Spill +; CHECK-NEXT: vmov.32 r0, q7[0] ; CHECK-NEXT: vmov.f32 s13, s23 ; CHECK-NEXT: vstrw.32 q6, [sp, #16] @ 16-byte Spill ; CHECK-NEXT: vmov.f32 s15, s3 -; CHECK-NEXT: vstrw.32 q5, [sp] @ 16-byte Spill ; CHECK-NEXT: vmov.f32 s14, s10 -; CHECK-NEXT: vmov.32 r0, q7[0] ; CHECK-NEXT: vmov.f64 d4, d8 +; CHECK-NEXT: vstrw.32 q3, [r1, #80] ; CHECK-NEXT: vmov.f32 s9, s24 ; CHECK-NEXT: vmov.f32 s11, s17 ; CHECK-NEXT: vmov q4, q5 @@ -1278,20 +1279,19 @@ ; CHECK-NEXT: vmov.f32 s3, s6 ; CHECK-NEXT: vldrw.u32 q1, [sp] @ 16-byte Reload ; CHECK-NEXT: vdup.32 q6, r0 +; CHECK-NEXT: vmov.32 r0, q4[3] ; CHECK-NEXT: vmov.f32 s22, s26 ; CHECK-NEXT: vldrw.u32 q6, [sp, #32] @ 16-byte Reload ; CHECK-NEXT: vmov.f32 s2, s6 -; CHECK-NEXT: vmov.32 r0, q4[3] -; CHECK-NEXT: vmov.f32 s4, s17 ; CHECK-NEXT: vstrw.32 q5, [r1, #48] -; CHECK-NEXT: vmov.f32 s5, s29 +; CHECK-NEXT: vmov.f32 s4, s17 ; CHECK-NEXT: vstrw.32 q0, [r1, #64] +; CHECK-NEXT: vmov.f32 s5, s29 +; CHECK-NEXT: vstrw.32 q2, [r1] ; CHECK-NEXT: vmov.f32 s28, s30 -; CHECK-NEXT: vstrw.32 q3, [r1, #80] ; CHECK-NEXT: vmov.f32 s7, s18 ; CHECK-NEXT: vdup.32 q4, r0 ; CHECK-NEXT: vmov.f32 s29, s27 -; CHECK-NEXT: vstrw.32 q2, [r1] ; CHECK-NEXT: vmov.f32 s6, s26 ; CHECK-NEXT: vmov.f32 s30, s18 ; CHECK-NEXT: vstrw.32 q1, [r1, #16] @@ -1343,32 +1343,32 @@ ; CHECK-NEXT: vstrw.32 q2, [sp, #16] @ 16-byte Spill ; CHECK-NEXT: vmov.f64 d4, d15 ; CHECK-NEXT: vldrw.u32 q4, [r0, #176] -; CHECK-NEXT: vstrw.32 q0, [sp, #32] @ 16-byte Spill -; CHECK-NEXT: vstrw.32 q1, [r1, #16] ; CHECK-NEXT: vmov.32 r0, q6[3] +; CHECK-NEXT: vstrw.32 q1, [r1, #16] ; CHECK-NEXT: vdup.32 q1, r0 +; CHECK-NEXT: vmov.32 r0, q3[3] +; CHECK-NEXT: vstrw.32 q0, [sp, #32] @ 16-byte Spill ; CHECK-NEXT: vmov.f32 s9, s23 ; CHECK-NEXT: vmov.f32 s11, s31 ; CHECK-NEXT: vmov.f32 s10, s6 ; CHECK-NEXT: vmov.f64 d2, d9 ; CHECK-NEXT: vstrw.32 q2, [r1, #32] -; CHECK-NEXT: vmov.32 r0, q3[3] ; CHECK-NEXT: vmov.f32 s5, s3 ; CHECK-NEXT: vmov.f32 s7, s19 ; CHECK-NEXT: vmov q2, q1 ; CHECK-NEXT: vdup.32 q1, r0 ; CHECK-NEXT: vmov.f32 s10, s6 +; CHECK-NEXT: vmov.32 r0, q7[0] ; CHECK-NEXT: vmov.f64 d2, d10 ; CHECK-NEXT: vstrw.32 q2, [sp, #80] @ 16-byte Spill -; CHECK-NEXT: vmov.32 r0, q7[0] ; CHECK-NEXT: vldrw.u32 q2, [sp, #32] @ 16-byte Reload ; CHECK-NEXT: vdup.32 q0, r0 +; CHECK-NEXT: vmov.32 r0, q4[0] ; CHECK-NEXT: vmov.f32 s5, s24 ; CHECK-NEXT: vmov.f32 s7, s21 ; CHECK-NEXT: vmov.f32 s6, s2 ; CHECK-NEXT: vmov.f64 d0, d4 ; CHECK-NEXT: vstrw.32 q1, [sp, #64] @ 16-byte Spill -; CHECK-NEXT: vmov.32 r0, q4[0] ; CHECK-NEXT: vmov.f32 s1, s12 ; CHECK-NEXT: vmov.f32 s3, s9 ; CHECK-NEXT: vmov q1, q0 @@ -1385,9 +1385,9 @@ ; CHECK-NEXT: vmov.f32 s27, s6 ; CHECK-NEXT: vldrw.u32 q1, [sp, #16] @ 16-byte Reload ; CHECK-NEXT: vmov.f32 s18, s10 +; CHECK-NEXT: vmov.32 r0, q3[3] ; CHECK-NEXT: vmov.f64 d4, d1 ; CHECK-NEXT: vstrw.32 q4, [sp, #32] @ 16-byte Spill -; CHECK-NEXT: vmov.32 r0, q3[3] ; CHECK-NEXT: vmov q5, q1 ; CHECK-NEXT: vldrw.u32 q4, [sp, #144] @ 16-byte Reload ; CHECK-NEXT: vmov.f32 s9, s7 @@ -1400,6 +1400,8 @@ ; CHECK-NEXT: vmov.f64 d6, d1 ; CHECK-NEXT: vmov.32 r0, q4[3] ; CHECK-NEXT: vmov q7, q0 +; CHECK-NEXT: vstrw.32 q6, [r1, #112] +; CHECK-NEXT: vstrw.32 q2, [r1, #128] ; CHECK-NEXT: vmov.f32 s13, s7 ; CHECK-NEXT: vldrw.u32 q1, [sp, #96] @ 16-byte Reload ; CHECK-NEXT: vmov.f32 s15, s3 @@ -1407,18 +1409,16 @@ ; CHECK-NEXT: vmov.f32 s14, s2 ; CHECK-NEXT: vmov q0, q5 ; CHECK-NEXT: vmov.f32 s21, s4 +; CHECK-NEXT: vstrw.32 q3, [r1, #80] ; CHECK-NEXT: vmov.f32 s23, s1 ; CHECK-NEXT: vldrw.u32 q0, [sp, #112] @ 16-byte Reload ; CHECK-NEXT: vmov.32 r0, q0[0] ; CHECK-NEXT: vdup.32 q0, r0 +; CHECK-NEXT: vmov.32 r0, q7[0] ; CHECK-NEXT: vmov.f32 s22, s2 ; CHECK-NEXT: vldrw.u32 q0, [sp, #128] @ 16-byte Reload -; CHECK-NEXT: vmov.32 r0, q7[0] -; CHECK-NEXT: vstrw.32 q3, [r1, #80] -; CHECK-NEXT: vmov.f64 d2, d0 ; CHECK-NEXT: vstrw.32 q5, [r1, #96] -; CHECK-NEXT: vstrw.32 q6, [r1, #112] -; CHECK-NEXT: vstrw.32 q2, [r1, #128] +; CHECK-NEXT: vmov.f64 d2, d0 ; CHECK-NEXT: vmov.f32 s5, s16 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vmov.f32 s7, s1 @@ -1573,11 +1573,11 @@ ; CHECK-NEXT: vmovx.f16 s12, s4 ; CHECK-NEXT: vmov r2, s4 ; CHECK-NEXT: vmov.16 q0[0], r3 -; CHECK-NEXT: vmov.32 r0, q5[0] ; CHECK-NEXT: vmov.16 q0[1], r2 ; CHECK-NEXT: vmov r2, s12 -; CHECK-NEXT: vdup.32 q4, r0 +; CHECK-NEXT: vmov.32 r0, q5[0] ; CHECK-NEXT: vmov.16 q0[4], r2 +; CHECK-NEXT: vdup.32 q4, r0 ; CHECK-NEXT: vmov r2, s9 ; CHECK-NEXT: vmov.16 q0[6], r2 ; CHECK-NEXT: vmov r2, s5 @@ -1606,17 +1606,17 @@ ; CHECK-NEXT: vmovx.f16 s24, s23 ; CHECK-NEXT: vmov.16 q4[6], r0 ; CHECK-NEXT: vmov r0, s24 -; CHECK-NEXT: vstrw.32 q1, [sp] @ 16-byte Spill -; CHECK-NEXT: vmov.16 q4[7], r0 ; CHECK-NEXT: vmov.32 r2, q2[3] -; CHECK-NEXT: vmov.f32 s18, s23 +; CHECK-NEXT: vmov.16 q4[7], r0 ; CHECK-NEXT: vdup.32 q7, r2 +; CHECK-NEXT: vmov.f32 s18, s23 ; CHECK-NEXT: vmovx.f16 s24, s17 ; CHECK-NEXT: vmov r2, s29 +; CHECK-NEXT: vmovx.f16 s28, s30 ; CHECK-NEXT: vmov r0, s24 ; CHECK-NEXT: vmov.16 q6[2], r2 ; CHECK-NEXT: vmov.16 q6[3], r0 -; CHECK-NEXT: vmovx.f16 s28, s30 +; CHECK-NEXT: vstrw.32 q1, [sp] @ 16-byte Spill ; CHECK-NEXT: vmovx.f16 s4, s10 ; CHECK-NEXT: vmov.f32 s1, s13 ; CHECK-NEXT: vmov.f32 s2, s14 @@ -1696,12 +1696,12 @@ ; CHECK-NEXT: vmov r2, s0 ; CHECK-NEXT: vldrw.u32 q0, [r0, #64] ; CHECK-NEXT: vmov.f32 s5, s12 +; CHECK-NEXT: vmov q5, q3 ; CHECK-NEXT: vstrw.32 q3, [sp, #32] @ 16-byte Spill -; CHECK-NEXT: vstrw.32 q1, [sp, #128] @ 16-byte Spill ; CHECK-NEXT: vmov.32 r3, q0[0] ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vdup.32 q0, r3 -; CHECK-NEXT: vmov q5, q3 +; CHECK-NEXT: vstrw.32 q1, [sp, #128] @ 16-byte Spill ; CHECK-NEXT: vmov r3, s1 ; CHECK-NEXT: vmovx.f16 s0, s2 ; CHECK-NEXT: vmov.16 q3[2], r3 @@ -1725,23 +1725,23 @@ ; CHECK-NEXT: vmov.16 q1[6], r2 ; CHECK-NEXT: vmov r2, s0 ; CHECK-NEXT: vmov.16 q1[7], r2 -; CHECK-NEXT: vstrw.32 q3, [sp, #112] @ 16-byte Spill +; CHECK-NEXT: vmov.32 r3, q5[3] ; CHECK-NEXT: vmov.f32 s6, s19 ; CHECK-NEXT: vmovx.f16 s0, s5 -; CHECK-NEXT: vstrw.32 q4, [sp, #48] @ 16-byte Spill -; CHECK-NEXT: vstrw.32 q1, [sp, #96] @ 16-byte Spill -; CHECK-NEXT: vmov.32 r3, q5[3] ; CHECK-NEXT: vmov r2, s0 ; CHECK-NEXT: vdup.32 q0, r3 -; CHECK-NEXT: vldrw.u32 q5, [r0, #16] ; CHECK-NEXT: vmov r3, s1 ; CHECK-NEXT: vmovx.f16 s0, s2 ; CHECK-NEXT: vmov.16 q2[2], r3 -; CHECK-NEXT: vmov r3, s20 +; CHECK-NEXT: vldrw.u32 q5, [r0, #16] ; CHECK-NEXT: vmov.16 q2[3], r2 +; CHECK-NEXT: vstrw.32 q3, [sp, #112] @ 16-byte Spill +; CHECK-NEXT: vmov r3, s20 +; CHECK-NEXT: vldrw.u32 q3, [r0, #80] ; CHECK-NEXT: vmov.16 q7[0], r3 +; CHECK-NEXT: vstrw.32 q1, [sp, #96] @ 16-byte Spill ; CHECK-NEXT: vmov r2, s6 -; CHECK-NEXT: vldrw.u32 q3, [r0, #80] +; CHECK-NEXT: vstrw.32 q4, [sp, #48] @ 16-byte Spill ; CHECK-NEXT: vmov.16 q2[4], r2 ; CHECK-NEXT: vmov r2, s0 ; CHECK-NEXT: vmov.16 q2[5], r2 diff --git a/llvm/unittests/Target/ARM/MachineInstrTest.cpp b/llvm/unittests/Target/ARM/MachineInstrTest.cpp --- a/llvm/unittests/Target/ARM/MachineInstrTest.cpp +++ b/llvm/unittests/Target/ARM/MachineInstrTest.cpp @@ -936,59 +936,14 @@ TEST(MachineInstr, HasSideEffects) { using namespace ARM; - unsigned Opcodes[] = { - // MVE Loads/Stores - MVE_VLDRBS16, MVE_VLDRBS16_post, MVE_VLDRBS16_pre, - MVE_VLDRBS16_rq, MVE_VLDRBS32, MVE_VLDRBS32_post, - MVE_VLDRBS32_pre, MVE_VLDRBS32_rq, MVE_VLDRBU16, - MVE_VLDRBU16_post, MVE_VLDRBU16_pre, MVE_VLDRBU16_rq, - MVE_VLDRBU32, MVE_VLDRBU32_post, MVE_VLDRBU32_pre, - MVE_VLDRBU32_rq, MVE_VLDRBU8, MVE_VLDRBU8_post, - MVE_VLDRBU8_pre, MVE_VLDRBU8_rq, MVE_VLDRDU64_qi, - MVE_VLDRDU64_qi_pre, MVE_VLDRDU64_rq, MVE_VLDRDU64_rq_u, - MVE_VLDRHS32, MVE_VLDRHS32_post, MVE_VLDRHS32_pre, - MVE_VLDRHS32_rq, MVE_VLDRHS32_rq_u, MVE_VLDRHU16, - MVE_VLDRHU16_post, MVE_VLDRHU16_pre, MVE_VLDRHU16_rq, - MVE_VLDRHU16_rq_u, MVE_VLDRHU32, MVE_VLDRHU32_post, - MVE_VLDRHU32_pre, MVE_VLDRHU32_rq, MVE_VLDRHU32_rq_u, - MVE_VLDRWU32, MVE_VLDRWU32_post, MVE_VLDRWU32_pre, - MVE_VLDRWU32_qi, MVE_VLDRWU32_qi_pre, MVE_VLDRWU32_rq, - MVE_VLDRWU32_rq_u, MVE_VLD20_16, MVE_VLD20_16_wb, - MVE_VLD20_32, MVE_VLD20_32_wb, MVE_VLD20_8, - MVE_VLD20_8_wb, MVE_VLD21_16, MVE_VLD21_16_wb, - MVE_VLD21_32, MVE_VLD21_32_wb, MVE_VLD21_8, - MVE_VLD21_8_wb, MVE_VLD40_16, MVE_VLD40_16_wb, - MVE_VLD40_32, MVE_VLD40_32_wb, MVE_VLD40_8, - MVE_VLD40_8_wb, MVE_VLD41_16, MVE_VLD41_16_wb, - MVE_VLD41_32, MVE_VLD41_32_wb, MVE_VLD41_8, - MVE_VLD41_8_wb, MVE_VLD42_16, MVE_VLD42_16_wb, - MVE_VLD42_32, MVE_VLD42_32_wb, MVE_VLD42_8, - MVE_VLD42_8_wb, MVE_VLD43_16, MVE_VLD43_16_wb, - MVE_VLD43_32, MVE_VLD43_32_wb, MVE_VLD43_8, - MVE_VLD43_8_wb, MVE_VSTRB16, MVE_VSTRB16_post, - MVE_VSTRB16_pre, MVE_VSTRB16_rq, MVE_VSTRB32, - MVE_VSTRB32_post, MVE_VSTRB32_pre, MVE_VSTRB32_rq, - MVE_VSTRB8_rq, MVE_VSTRBU8, MVE_VSTRBU8_post, - MVE_VSTRBU8_pre, MVE_VSTRD64_qi, MVE_VSTRD64_qi_pre, - MVE_VSTRD64_rq, MVE_VSTRD64_rq_u, MVE_VSTRH16_rq, - MVE_VSTRH16_rq_u, MVE_VSTRH32, MVE_VSTRH32_post, - MVE_VSTRH32_pre, MVE_VSTRH32_rq, MVE_VSTRH32_rq_u, - MVE_VSTRHU16, MVE_VSTRHU16_post, MVE_VSTRHU16_pre, - MVE_VSTRW32_qi, MVE_VSTRW32_qi_pre, MVE_VSTRW32_rq, - MVE_VSTRW32_rq_u, MVE_VSTRWU32, MVE_VSTRWU32_post, - MVE_VSTRWU32_pre, MVE_VST20_16, MVE_VST20_16_wb, - MVE_VST20_32, MVE_VST20_32_wb, MVE_VST20_8, - MVE_VST20_8_wb, MVE_VST21_16, MVE_VST21_16_wb, - MVE_VST21_32, MVE_VST21_32_wb, MVE_VST21_8, - MVE_VST21_8_wb, MVE_VST40_16, MVE_VST40_16_wb, - MVE_VST40_32, MVE_VST40_32_wb, MVE_VST40_8, - MVE_VST40_8_wb, MVE_VST41_16, MVE_VST41_16_wb, - MVE_VST41_32, MVE_VST41_32_wb, MVE_VST41_8, - MVE_VST41_8_wb, MVE_VST42_16, MVE_VST42_16_wb, - MVE_VST42_32, MVE_VST42_32_wb, MVE_VST42_8, - MVE_VST42_8_wb, MVE_VST43_16, MVE_VST43_16_wb, - MVE_VST43_32, MVE_VST43_32_wb, MVE_VST43_8, - MVE_VST43_8_wb, + std::set UnpredictableOpcodes = { + MVE_VCTP8, MVE_VCTP16, MVE_VCTP32, MVE_VCTP64, MVE_VPST, + MVE_VPTv16i8, MVE_VPTv8i16, MVE_VPTv4i32, MVE_VPTv16i8r, MVE_VPTv8i16r, + MVE_VPTv4i32r, MVE_VPTv16s8, MVE_VPTv8s16, MVE_VPTv4s32, MVE_VPTv16s8r, + MVE_VPTv8s16r, MVE_VPTv4s32r, MVE_VPTv16u8, MVE_VPTv8u16, MVE_VPTv4u32, + MVE_VPTv16u8r, MVE_VPTv8u16r, MVE_VPTv4u32r, MVE_VPTv8f16, MVE_VPTv4f32, + MVE_VPTv8f16r, MVE_VPTv4f32r, MVE_VADC, MVE_VADCI, MVE_VSBC, + MVE_VSBCI, MVE_VSHLC, }; LLVMInitializeARMTargetInfo(); @@ -1013,8 +968,13 @@ const ARMBaseInstrInfo *TII = ST.getInstrInfo(); auto MII = TM->getMCInstrInfo(); - for (unsigned Op : Opcodes) { + for (unsigned Op = 0; Op < ARM::INSTRUCTION_LIST_END; ++Op) { const MCInstrDesc &Desc = TII->get(Op); + if ((Desc.TSFlags & ARMII::DomainMask) != ARMII::DomainMVE) + continue; + if (UnpredictableOpcodes.count(Op)) + continue; + ASSERT_FALSE(Desc.hasUnmodeledSideEffects()) << MII->getName(Op) << " has unexpected side effects"; }