Index: llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp =================================================================== --- llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -2289,10 +2289,7 @@ return false; Align Alignment = (*Op0->memoperands_begin())->getAlign(); - const Function &Func = MF->getFunction(); - Align ReqAlign = - STI->hasV6Ops() ? TD->getABITypeAlign(Type::getInt64Ty(Func.getContext())) - : Align(8); // Pre-v6 need 8-byte align + Align ReqAlign = STI->getDualLoadStoreAlignment(); if (Alignment < ReqAlign) return false; Index: llvm/test/CodeGen/ARM/copy-by-struct-i32.ll =================================================================== --- llvm/test/CodeGen/ARM/copy-by-struct-i32.ll +++ llvm/test/CodeGen/ARM/copy-by-struct-i32.ll @@ -21,9 +21,8 @@ ; ASSEMBLY-NEXT: sbc r5, r5, #0 ; ASSEMBLY-NEXT: ldr r2, [r1, #8] ; ASSEMBLY-NEXT: ldr r3, [r1, #12] -; ASSEMBLY-NEXT: str r5, [sp, #132] +; ASSEMBLY-NEXT: strd r4, r5, [sp, #128] ; ASSEMBLY-NEXT: add r5, r1, #16 -; ASSEMBLY-NEXT: str r4, [sp, #128] ; ASSEMBLY-NEXT: mov r4, sp ; ASSEMBLY-NEXT: vld1.32 {d16}, [r5]! ; ASSEMBLY-NEXT: vst1.32 {d16}, [r4]! Index: llvm/test/CodeGen/ARM/ha-alignstack-call.ll =================================================================== --- llvm/test/CodeGen/ARM/ha-alignstack-call.ll +++ llvm/test/CodeGen/ARM/ha-alignstack-call.ll @@ -81,11 +81,10 @@ ret float %call } ; CHECK-LABEL: f1_1_call: -; CHECK: movw r1, #52429 -; CHECK: mov r0, #0 -; CHECK: movt r1, #16204 -; CHECK-DAG: str r1, [sp] -; CHECK-DAG: str r0, [sp, #4] +; CHECK: movw r0, #52429 +; CHECK: mov r1, #0 +; CHECK: movt r0, #16204 +; CHECK-DAG: strd r0, r1, [sp] ; CHECK: bl f1_1 ; pass in memory, alignment 8 @@ -96,13 +95,12 @@ ret float %call } ; CHECK-LABEL: f1_2_call: -; CHECK-DAG: mov r0, #0 -; CHECK-DAG: movw r1, #26214 -; CHECK: str r0, [sp, #12] +; CHECK-DAG: movw r0, #26214 +; CHECK-DAG: mov r1, #0 +; CHECK: movt r0, #16230 +; CHECK: strd r0, r1, [sp, #8] ; CHECK: movw r0, #52429 -; CHECK: movt r1, #16230 ; CHECK: movt r0, #16204 -; CHECK-DAG: str r1, [sp, #8] ; CHECK-DAG: str r0, [sp] ; CHECK: bl f1_2 Index: llvm/test/CodeGen/ARM/indexed-mem.ll =================================================================== --- llvm/test/CodeGen/ARM/indexed-mem.ll +++ llvm/test/CodeGen/ARM/indexed-mem.ll @@ -225,7 +225,8 @@ ; ; CHECK-V8A-LABEL: post_inc_ldrd: ; CHECK-V8A: @ %bb.0: -; CHECK-V8A-NEXT: ldm r0!, {r2, r3} +; CHECK-V8A-NEXT: ldrd r2, r3, [r0] +; CHECK-V8A-NEXT: add r0, r0, #8 ; CHECK-V8A-NEXT: add r2, r2, r3 ; CHECK-V8A-NEXT: str r2, [r1] ; CHECK-V8A-NEXT: bx lr @@ -248,8 +249,8 @@ ; ; CHECK-V8A-LABEL: pre_inc_str_multi: ; CHECK-V8A: @ %bb.0: -; CHECK-V8A-NEXT: ldm r0, {r1, r2} -; CHECK-V8A-NEXT: add r1, r1, r2 +; CHECK-V8A-NEXT: ldrd r2, r3, [r0] +; CHECK-V8A-NEXT: add r1, r2, r3 ; CHECK-V8A-NEXT: str r1, [r0, #8]! ; CHECK-V8A-NEXT: bx lr %addr.1 = getelementptr i32, ptr %base, i32 1 @@ -271,8 +272,8 @@ ; ; CHECK-V8A-LABEL: pre_dec_str_multi: ; CHECK-V8A: @ %bb.0: -; CHECK-V8A-NEXT: ldm r0, {r1, r2} -; CHECK-V8A-NEXT: add r1, r1, r2 +; CHECK-V8A-NEXT: ldrd r2, r3, [r0] +; CHECK-V8A-NEXT: add r1, r2, r3 ; CHECK-V8A-NEXT: str r1, [r0, #-4]! ; CHECK-V8A-NEXT: bx lr %addr.1 = getelementptr i32, ptr %base, i32 1 Index: llvm/test/CodeGen/ARM/prera-ldst-aliasing.mir =================================================================== --- llvm/test/CodeGen/ARM/prera-ldst-aliasing.mir +++ llvm/test/CodeGen/ARM/prera-ldst-aliasing.mir @@ -30,10 +30,8 @@ t2STRi12 killed %2, %0, 0, 14, $noreg :: (store (s32) into %ir.x) %3 : gpr = t2LDRi12 %1, 4, 14, $noreg :: (load (s32) from %ir.arrayidx2) t2STRi12 killed %3, %0, 4, 14, $noreg :: (store (s32) into %ir.arrayidx3) - ; CHECK: t2LDRi12 - ; CHECK-NEXT: t2LDRi12 - ; CHECK-NEXT: t2STRi12 - ; CHECK-NEXT: t2STRi12 + ; CHECK: t2LDRDi8 + ; CHECK-NEXT: t2STRDi8 tBX_RET 14, $noreg ... Index: llvm/test/CodeGen/ARM/prera-ldst-insertpt.mir =================================================================== --- llvm/test/CodeGen/ARM/prera-ldst-insertpt.mir +++ llvm/test/CodeGen/ARM/prera-ldst-insertpt.mir @@ -41,8 +41,7 @@ ; Make sure we move the paired stores next to each other, and ; insert them in an appropriate location. - ; CHECK: t2STRi12 %1, - ; CHECK-NEXT: t2STRi12 killed %10, + ; CHECK: t2STRDi8 %1, %10, ; CHECK-NEXT: t2MOVi ; CHECK-NEXT: t2ADDrs @@ -53,8 +52,7 @@ t2STRi12 killed %13, %0, 20, 14, $noreg :: (store (s32)) ; Make sure we move the paired stores next to each other. - ; CHECK: t2STRi12 killed %12, - ; CHECK-NEXT: t2STRi12 killed %13, + ; CHECK: t2STRDi8 %12, %13, tBX_RET 14, $noreg --- @@ -88,8 +86,7 @@ ; CHECK-NEXT: t2MOVi32imm ; CHECK-LIMIT-LABEL: name: b - ; CHECK-LIMIT: t2STRi12 {{.*}}, 0 - ; CHECK-LIMIT-NEXT: t2STRi12 {{.*}}, 4 + ; CHECK-LIMIT: t2STRDi8 {{.*}}, {{.*}}, {{.*}}, 0 ; CHECK-LIMIT-NEXT: t2MUL ; CHECK-LIMIT-NEXT: t2STRi12 {{.*}}, 8 @@ -105,8 +102,7 @@ t2STRi12 killed %13, %0, 20, 14, $noreg :: (store (s32)) ; Make sure we move the paired stores next to each other. - ; CHECK: t2STRi12 {{.*}}, 16 - ; CHECK-NEXT: t2STRi12 {{.*}}, 20 + ; CHECK: t2STRDi8 %12, %13, %0, 16 tBX_RET 14, $noreg Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll @@ -165,74 +165,74 @@ ; CHECK-NEXT: sub sp, #16 ; CHECK-NEXT: wls lr, r1, .LBB2_3 ; CHECK-NEXT: @ %bb.1: @ %while.body.preheader -; CHECK-NEXT: mov r12, r0 -; CHECK-NEXT: add.w r10, r3, #4 -; CHECK-NEXT: adds r0, #4 -; CHECK-NEXT: mvn r9, #1 -; CHECK-NEXT: @ implicit-def: $r8 +; CHECK-NEXT: adds r6, r3, #4 +; CHECK-NEXT: adds r1, r0, #4 +; CHECK-NEXT: mvn r8, #1 +; CHECK-NEXT: @ implicit-def: $r9 ; CHECK-NEXT: @ implicit-def: $r4 ; CHECK-NEXT: str r2, [sp] @ 4-byte Spill ; CHECK-NEXT: .LBB2_2: @ %while.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldr r2, [r0] -; CHECK-NEXT: asrs r5, r4, #31 -; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: muls r2, r3, r2 -; CHECK-NEXT: adds r4, r4, r2 -; CHECK-NEXT: adc.w r2, r5, r2, asr #31 -; CHECK-NEXT: ldr.w r5, [r9, #4] -; CHECK-NEXT: adds.w r4, r4, #-2147483648 -; CHECK-NEXT: adc r1, r2, #0 -; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: smull r5, r6, r5, r8 -; CHECK-NEXT: ldr.w r2, [r9] -; CHECK-NEXT: asrs r4, r1, #31 +; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: asrs r2, r4, #31 +; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [r1] +; CHECK-NEXT: muls r1, r3, r1 +; CHECK-NEXT: adds r4, r4, r1 +; CHECK-NEXT: adc.w r1, r2, r1, asr #31 +; CHECK-NEXT: adds.w r2, r4, #-2147483648 +; CHECK-NEXT: ldrd r2, r4, [r8] +; CHECK-NEXT: adc r5, r1, #0 ; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: subs r5, r1, r5 -; CHECK-NEXT: sbcs r4, r6 -; CHECK-NEXT: adds.w r6, r5, #-2147483648 -; CHECK-NEXT: adc r5, r4, #0 -; CHECK-NEXT: ldr r4, [r0, #-4] +; CHECK-NEXT: smull r4, r2, r4, r9 +; CHECK-NEXT: asrs r1, r5, #31 +; CHECK-NEXT: str r5, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: subs r4, r5, r4 +; CHECK-NEXT: sbcs r1, r2 +; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: adds.w r10, r4, #-2147483648 +; CHECK-NEXT: adc r1, r1, #0 +; CHECK-NEXT: ldr r4, [r2, #-4] ; CHECK-NEXT: muls r4, r3, r4 ; CHECK-NEXT: adds r3, #4 -; CHECK-NEXT: adds.w r0, r4, #-2147483648 -; CHECK-NEXT: asr.w r1, r4, #31 -; CHECK-NEXT: ldr.w r4, [r10] -; CHECK-NEXT: adc r1, r1, #0 -; CHECK-NEXT: mul r2, r4, r12 -; CHECK-NEXT: add.w r12, r12, #4 +; CHECK-NEXT: adds.w r12, r4, #-2147483648 +; CHECK-NEXT: asr.w r5, r4, #31 +; CHECK-NEXT: ldr r4, [r6] +; CHECK-NEXT: adc r5, r5, #0 +; CHECK-NEXT: mul r2, r4, r0 +; CHECK-NEXT: adds r0, #4 ; CHECK-NEXT: add.w r2, r2, #-2147483648 -; CHECK-NEXT: asrl r0, r1, r2 +; CHECK-NEXT: asrl r12, r5, r2 +; CHECK-NEXT: smull r2, r5, r4, r12 +; CHECK-NEXT: lsll r2, r5, #30 +; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: asr.w r11, r5, #31 +; CHECK-NEXT: mov r12, r5 +; CHECK-NEXT: lsll r12, r11, r4 +; CHECK-NEXT: mul r2, r2, r9 +; CHECK-NEXT: lsrl r12, r11, #2 +; CHECK-NEXT: adds r2, #2 +; CHECK-NEXT: lsll r12, r11, r2 ; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload -; CHECK-NEXT: smull r0, r1, r4, r0 -; CHECK-NEXT: lsll r0, r1, #30 -; CHECK-NEXT: asr.w r11, r1, #31 -; CHECK-NEXT: mov r0, r1 -; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: lsll r0, r11, r4 -; CHECK-NEXT: lsrl r0, r11, #2 -; CHECK-NEXT: mul r1, r1, r8 -; CHECK-NEXT: adds r1, #2 -; CHECK-NEXT: lsll r0, r11, r1 -; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: add.w r0, r0, #-2147483648 -; CHECK-NEXT: asrl r6, r5, r0 -; CHECK-NEXT: movs r0, #2 -; CHECK-NEXT: lsrl r6, r5, #2 -; CHECK-NEXT: str r6, [r0] -; CHECK-NEXT: mov r8, r6 -; CHECK-NEXT: ldr r0, [r9], #-4 -; CHECK-NEXT: mls r0, r0, r4, r1 -; CHECK-NEXT: adds.w r4, r0, #-2147483648 -; CHECK-NEXT: asr.w r1, r0, #31 +; CHECK-NEXT: add.w r5, r12, #-2147483648 +; CHECK-NEXT: asrl r10, r1, r5 +; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: lsrl r10, r1, #2 +; CHECK-NEXT: movs r1, #2 +; CHECK-NEXT: mov r9, r10 +; CHECK-NEXT: str.w r10, [r1] +; CHECK-NEXT: ldr r1, [r8], #-4 +; CHECK-NEXT: mls r5, r1, r4, r5 +; CHECK-NEXT: adds.w r4, r5, #-2147483648 +; CHECK-NEXT: asr.w r1, r5, #31 ; CHECK-NEXT: adc r1, r1, #0 ; CHECK-NEXT: lsrl r4, r1, #2 -; CHECK-NEXT: rsbs r0, r4, #0 -; CHECK-NEXT: str r0, [r2] -; CHECK-NEXT: str r0, [r10, #-4] -; CHECK-NEXT: add.w r10, r10, #4 -; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: rsbs r1, r4, #0 +; CHECK-NEXT: str r1, [r2] +; CHECK-NEXT: str r1, [r6, #-4] +; CHECK-NEXT: adds r6, #4 +; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: adds r1, #4 ; CHECK-NEXT: le lr, .LBB2_2 ; CHECK-NEXT: .LBB2_3: @ %while.end ; CHECK-NEXT: add sp, #16 Index: llvm/test/CodeGen/Thumb2/mve-float16regloops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-float16regloops.ll +++ llvm/test/CodeGen/Thumb2/mve-float16regloops.ll @@ -1350,7 +1350,7 @@ ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: ldrd r12, r6, [r0, #4] +; CHECK-NEXT: ldrd r6, r12, [r0, #4] ; CHECK-NEXT: ldrb.w r9, [r0] ; CHECK-NEXT: vldr.16 s0, .LCPI17_0 ; CHECK-NEXT: lsr.w r8, r3, #1 @@ -1358,26 +1358,26 @@ ; CHECK-NEXT: .LBB17_1: @ %if.else ; CHECK-NEXT: @ in Loop: Header=BB17_3 Depth=1 ; CHECK-NEXT: vmovx.f16 s5, s4 -; CHECK-NEXT: vstr.16 s4, [r12] +; CHECK-NEXT: vstr.16 s4, [r6] ; CHECK-NEXT: .LBB17_2: @ %if.end ; CHECK-NEXT: @ in Loop: Header=BB17_3 Depth=1 -; CHECK-NEXT: vstr.16 s5, [r12, #2] -; CHECK-NEXT: adds r6, #10 +; CHECK-NEXT: vstr.16 s5, [r6, #2] +; CHECK-NEXT: add.w r12, r12, #10 ; CHECK-NEXT: subs.w r9, r9, #1 -; CHECK-NEXT: add.w r12, r12, #4 +; CHECK-NEXT: add.w r6, r6, #4 ; CHECK-NEXT: mov r1, r2 ; CHECK-NEXT: beq .LBB17_8 ; CHECK-NEXT: .LBB17_3: @ %do.body ; CHECK-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-NEXT: @ Child Loop BB17_5 Depth 2 -; CHECK-NEXT: vldrh.u16 q2, [r6] +; CHECK-NEXT: vldrh.u16 q2, [r12] ; CHECK-NEXT: movs r5, #0 ; CHECK-NEXT: vmov q4, q2 ; CHECK-NEXT: vshlc q4, r5, #16 -; CHECK-NEXT: vldrh.u16 q3, [r6, #4] +; CHECK-NEXT: vldrh.u16 q3, [r12, #4] ; CHECK-NEXT: vmov q5, q3 ; CHECK-NEXT: vshlc q5, r5, #16 -; CHECK-NEXT: vldrh.u16 q1, [r12] +; CHECK-NEXT: vldrh.u16 q1, [r6] ; CHECK-NEXT: vmov.f32 s5, s1 ; CHECK-NEXT: mov r5, r2 ; CHECK-NEXT: wls lr, r8, .LBB17_6 @@ -1414,7 +1414,7 @@ ; CHECK-NEXT: vfma.f16 q1, q3, r0 ; CHECK-NEXT: strh r0, [r5] ; CHECK-NEXT: vmovx.f16 s2, s4 -; CHECK-NEXT: vstr.16 s2, [r12] +; CHECK-NEXT: vstr.16 s2, [r6] ; CHECK-NEXT: b .LBB17_2 ; CHECK-NEXT: .LBB17_8: @ %do.end ; CHECK-NEXT: vpop {d8, d9, d10, d11} Index: llvm/test/CodeGen/Thumb2/mve-float32regloops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-float32regloops.ll +++ llvm/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -1331,7 +1331,7 @@ ; CHECK-NEXT: sub sp, #24 ; CHECK-NEXT: mov r8, r3 ; CHECK-NEXT: ldrb.w r12, [r0] -; CHECK-NEXT: ldrd r3, r0, [r0, #4] +; CHECK-NEXT: ldrd r0, r3, [r0, #4] ; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: cmp.w r8, #0 ; CHECK-NEXT: strd r4, r4, [sp, #16] @@ -1343,13 +1343,13 @@ ; CHECK-NEXT: .LBB17_2: @ %bb29 ; CHECK-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-NEXT: @ Child Loop BB17_3 Depth 2 -; CHECK-NEXT: ldrd r5, r7, [r0] -; CHECK-NEXT: vldrw.u32 q1, [r3] -; CHECK-NEXT: ldr r6, [r0, #12] -; CHECK-NEXT: vldr s8, [r0, #8] +; CHECK-NEXT: ldrd r5, r7, [r3] +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: ldr r6, [r3, #12] +; CHECK-NEXT: vldr s8, [r3, #8] ; CHECK-NEXT: vstrw.32 q1, [r4] ; CHECK-NEXT: vdup.32 q1, r7 -; CHECK-NEXT: vldr s12, [r0, #16] +; CHECK-NEXT: vldr s12, [r3, #16] ; CHECK-NEXT: vmov.f32 s6, s8 ; CHECK-NEXT: dls lr, r8 ; CHECK-NEXT: vmov.f32 s7, s8 @@ -1373,18 +1373,18 @@ ; CHECK-NEXT: @ %bb.4: @ %bb75 ; CHECK-NEXT: @ in Loop: Header=BB17_2 Depth=1 ; CHECK-NEXT: subs.w r12, r12, #1 -; CHECK-NEXT: add.w r0, r0, #20 -; CHECK-NEXT: vstrb.8 q3, [r3], #16 +; CHECK-NEXT: add.w r3, r3, #20 +; CHECK-NEXT: vstrb.8 q3, [r0], #16 ; CHECK-NEXT: mov r1, r2 ; CHECK-NEXT: bne .LBB17_2 ; CHECK-NEXT: b .LBB17_7 ; CHECK-NEXT: .LBB17_5: @ %bb21.preheader ; CHECK-NEXT: dls lr, r12 -; CHECK-NEXT: mov r0, sp +; CHECK-NEXT: mov r1, sp ; CHECK-NEXT: .LBB17_6: @ %bb21 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q0, [r3], #16 -; CHECK-NEXT: vstrw.32 q0, [r0] +; CHECK-NEXT: vldrw.u32 q0, [r0], #16 +; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: le lr, .LBB17_6 ; CHECK-NEXT: .LBB17_7: @ %bb80 ; CHECK-NEXT: add sp, #24 @@ -1918,7 +1918,7 @@ ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: ldrd r12, r6, [r0, #4] +; CHECK-NEXT: ldrd r6, r12, [r0, #4] ; CHECK-NEXT: lsr.w r8, r3, #1 ; CHECK-NEXT: ldrb r0, [r0] ; CHECK-NEXT: vldr s0, .LCPI20_0 @@ -1926,26 +1926,26 @@ ; CHECK-NEXT: .LBB20_1: @ %if.else ; CHECK-NEXT: @ in Loop: Header=BB20_3 Depth=1 ; CHECK-NEXT: vmov.f32 s6, s5 -; CHECK-NEXT: vstr s4, [r12] +; CHECK-NEXT: vstr s4, [r6] ; CHECK-NEXT: .LBB20_2: @ %if.end ; CHECK-NEXT: @ in Loop: Header=BB20_3 Depth=1 -; CHECK-NEXT: vstr s6, [r12, #4] -; CHECK-NEXT: adds r6, #20 +; CHECK-NEXT: vstr s6, [r6, #4] +; CHECK-NEXT: add.w r12, r12, #20 ; CHECK-NEXT: subs r0, #1 -; CHECK-NEXT: add.w r12, r12, #8 +; CHECK-NEXT: add.w r6, r6, #8 ; CHECK-NEXT: mov r1, r2 ; CHECK-NEXT: beq .LBB20_8 ; CHECK-NEXT: .LBB20_3: @ %do.body ; CHECK-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-NEXT: @ Child Loop BB20_5 Depth 2 -; CHECK-NEXT: vldrw.u32 q3, [r6] +; CHECK-NEXT: vldrw.u32 q3, [r12] ; CHECK-NEXT: movs r5, #0 ; CHECK-NEXT: vmov q4, q3 ; CHECK-NEXT: vshlc q4, r5, #32 -; CHECK-NEXT: vldrw.u32 q2, [r6, #8] +; CHECK-NEXT: vldrw.u32 q2, [r12, #8] ; CHECK-NEXT: vmov q5, q2 ; CHECK-NEXT: vshlc q5, r5, #32 -; CHECK-NEXT: vldrw.u32 q1, [r12] +; CHECK-NEXT: vldrw.u32 q1, [r6] ; CHECK-NEXT: vmov.f32 s6, s0 ; CHECK-NEXT: mov r5, r2 ; CHECK-NEXT: vmov.f32 s7, s0 @@ -1985,7 +1985,7 @@ ; CHECK-NEXT: vmov r1, s4 ; CHECK-NEXT: vstr s4, [r5] ; CHECK-NEXT: vfma.f32 q1, q2, r1 -; CHECK-NEXT: vstr s5, [r12] +; CHECK-NEXT: vstr s5, [r6] ; CHECK-NEXT: b .LBB20_2 ; CHECK-NEXT: .LBB20_8: @ %do.end ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} Index: llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll +++ llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll @@ -6,10 +6,10 @@ define arm_aapcs_vfpcc <2 x i32> @ptr_v2i32(ptr %offptr) { ; CHECK-LABEL: ptr_v2i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: ldrd r1, r0, [r0] -; CHECK-NEXT: ldr r0, [r0] +; CHECK-NEXT: ldrd r0, r1, [r0] ; CHECK-NEXT: ldr r1, [r1] -; CHECK-NEXT: vmov q0[2], q0[0], r1, r0 +; CHECK-NEXT: ldr r0, [r0] +; CHECK-NEXT: vmov q0[2], q0[0], r0, r1 ; CHECK-NEXT: bx lr entry: %offs = load <2 x ptr>, ptr %offptr, align 4 @@ -112,9 +112,9 @@ define arm_aapcs_vfpcc <2 x float> @ptr_v2f32(ptr %offptr) { ; CHECK-LABEL: ptr_v2f32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: ldrd r1, r0, [r0] -; CHECK-NEXT: vldr s1, [r0] -; CHECK-NEXT: vldr s0, [r1] +; CHECK-NEXT: ldrd r0, r1, [r0] +; CHECK-NEXT: vldr s1, [r1] +; CHECK-NEXT: vldr s0, [r0] ; CHECK-NEXT: bx lr entry: %offs = load <2 x ptr>, ptr %offptr, align 4 @@ -199,13 +199,13 @@ define arm_aapcs_vfpcc <2 x i32> @ptr_v2i16_sext(ptr %offptr) { ; CHECK-LABEL: ptr_v2i16_sext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: ldrd r1, r0, [r0] -; CHECK-NEXT: ldrsh.w r0, [r0] +; CHECK-NEXT: ldrd r0, r1, [r0] ; CHECK-NEXT: ldrsh.w r1, [r1] -; CHECK-NEXT: vmov q0[2], q0[0], r1, r0 -; CHECK-NEXT: asrs r0, r0, #31 +; CHECK-NEXT: ldrsh.w r0, [r0] +; CHECK-NEXT: vmov q0[2], q0[0], r0, r1 ; CHECK-NEXT: asrs r1, r1, #31 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r0 +; CHECK-NEXT: asrs r0, r0, #31 +; CHECK-NEXT: vmov q0[3], q0[1], r0, r1 ; CHECK-NEXT: bx lr entry: %offs = load <2 x ptr>, ptr %offptr, align 4 @@ -217,11 +217,11 @@ define arm_aapcs_vfpcc <2 x i32> @ptr_v2i16_zext(ptr %offptr) { ; CHECK-LABEL: ptr_v2i16_zext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: ldrd r1, r0, [r0] +; CHECK-NEXT: ldrd r0, r1, [r0] ; CHECK-NEXT: vmov.i64 q0, #0xffff -; CHECK-NEXT: ldrh r0, [r0] ; CHECK-NEXT: ldrh r1, [r1] -; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 +; CHECK-NEXT: ldrh r0, [r0] +; CHECK-NEXT: vmov q1[2], q1[0], r0, r1 ; CHECK-NEXT: vand q0, q1, q0 ; CHECK-NEXT: bx lr entry: Index: llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll +++ llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll @@ -94,22 +94,22 @@ ; CHECK-LE-NEXT: .pad #4 ; CHECK-LE-NEXT: sub sp, #4 ; CHECK-LE-NEXT: ldrd r12, lr, [r1] -; CHECK-LE-NEXT: movs r3, #0 +; CHECK-LE-NEXT: movs r1, #0 ; CHECK-LE-NEXT: @ implicit-def: $q1 -; CHECK-LE-NEXT: rsbs.w r1, r12, #0 +; CHECK-LE-NEXT: rsbs.w r3, r12, #0 ; CHECK-LE-NEXT: vmov q0[2], q0[0], r12, lr -; CHECK-LE-NEXT: sbcs.w r1, r3, r12, asr #31 -; CHECK-LE-NEXT: csetm r1, lt +; CHECK-LE-NEXT: sbcs.w r3, r1, r12, asr #31 +; CHECK-LE-NEXT: csetm r3, lt ; CHECK-LE-NEXT: rsbs.w r4, lr, #0 -; CHECK-LE-NEXT: sbcs.w r4, r3, lr, asr #31 -; CHECK-LE-NEXT: bfi r3, r1, #0, #1 -; CHECK-LE-NEXT: csetm r1, lt -; CHECK-LE-NEXT: bfi r3, r1, #1, #1 -; CHECK-LE-NEXT: lsls r1, r3, #31 +; CHECK-LE-NEXT: sbcs.w r4, r1, lr, asr #31 +; CHECK-LE-NEXT: bfi r1, r3, #0, #1 +; CHECK-LE-NEXT: csetm r3, lt +; CHECK-LE-NEXT: bfi r1, r3, #1, #1 +; CHECK-LE-NEXT: lsls r3, r1, #31 ; CHECK-LE-NEXT: itt ne -; CHECK-LE-NEXT: ldrne r1, [r2] -; CHECK-LE-NEXT: vmovne.32 q1[0], r1 -; CHECK-LE-NEXT: lsls r1, r3, #30 +; CHECK-LE-NEXT: ldrne r3, [r2] +; CHECK-LE-NEXT: vmovne.32 q1[0], r3 +; CHECK-LE-NEXT: lsls r1, r1, #30 ; CHECK-LE-NEXT: itt mi ; CHECK-LE-NEXT: ldrmi r1, [r2, #4] ; CHECK-LE-NEXT: vmovmi.32 q1[2], r1 @@ -218,22 +218,22 @@ ; CHECK-LE-NEXT: .pad #4 ; CHECK-LE-NEXT: sub sp, #4 ; CHECK-LE-NEXT: ldrd r12, lr, [r1] -; CHECK-LE-NEXT: movs r3, #0 +; CHECK-LE-NEXT: movs r1, #0 ; CHECK-LE-NEXT: @ implicit-def: $q0 -; CHECK-LE-NEXT: rsbs.w r1, r12, #0 +; CHECK-LE-NEXT: rsbs.w r3, r12, #0 ; CHECK-LE-NEXT: vmov q1[2], q1[0], r12, lr -; CHECK-LE-NEXT: sbcs.w r1, r3, r12, asr #31 -; CHECK-LE-NEXT: csetm r1, lt +; CHECK-LE-NEXT: sbcs.w r3, r1, r12, asr #31 +; CHECK-LE-NEXT: csetm r3, lt ; CHECK-LE-NEXT: rsbs.w r4, lr, #0 -; CHECK-LE-NEXT: sbcs.w r4, r3, lr, asr #31 -; CHECK-LE-NEXT: bfi r3, r1, #0, #1 -; CHECK-LE-NEXT: csetm r1, lt -; CHECK-LE-NEXT: bfi r3, r1, #1, #1 -; CHECK-LE-NEXT: lsls r1, r3, #31 +; CHECK-LE-NEXT: sbcs.w r4, r1, lr, asr #31 +; CHECK-LE-NEXT: bfi r1, r3, #0, #1 +; CHECK-LE-NEXT: csetm r3, lt +; CHECK-LE-NEXT: bfi r1, r3, #1, #1 +; CHECK-LE-NEXT: lsls r3, r1, #31 ; CHECK-LE-NEXT: itt ne -; CHECK-LE-NEXT: ldrne r1, [r2] -; CHECK-LE-NEXT: vmovne.32 q0[0], r1 -; CHECK-LE-NEXT: lsls r1, r3, #30 +; CHECK-LE-NEXT: ldrne r3, [r2] +; CHECK-LE-NEXT: vmovne.32 q0[0], r3 +; CHECK-LE-NEXT: lsls r1, r1, #30 ; CHECK-LE-NEXT: itt mi ; CHECK-LE-NEXT: ldrmi r1, [r2, #4] ; CHECK-LE-NEXT: vmovmi.32 q0[2], r1 @@ -346,23 +346,23 @@ ; CHECK-LE-NEXT: .pad #4 ; CHECK-LE-NEXT: sub sp, #4 ; CHECK-LE-NEXT: ldrd r12, lr, [r1] -; CHECK-LE-NEXT: movs r3, #0 +; CHECK-LE-NEXT: movs r1, #0 ; CHECK-LE-NEXT: @ implicit-def: $q0 ; CHECK-LE-NEXT: vmov.i64 q2, #0xffffffff -; CHECK-LE-NEXT: rsbs.w r1, r12, #0 +; CHECK-LE-NEXT: rsbs.w r3, r12, #0 ; CHECK-LE-NEXT: vmov q1[2], q1[0], r12, lr -; CHECK-LE-NEXT: sbcs.w r1, r3, r12, asr #31 -; CHECK-LE-NEXT: csetm r1, lt +; CHECK-LE-NEXT: sbcs.w r3, r1, r12, asr #31 +; CHECK-LE-NEXT: csetm r3, lt ; CHECK-LE-NEXT: rsbs.w r4, lr, #0 -; CHECK-LE-NEXT: sbcs.w r4, r3, lr, asr #31 -; CHECK-LE-NEXT: bfi r3, r1, #0, #1 -; CHECK-LE-NEXT: csetm r1, lt -; CHECK-LE-NEXT: bfi r3, r1, #1, #1 -; CHECK-LE-NEXT: lsls r1, r3, #31 +; CHECK-LE-NEXT: sbcs.w r4, r1, lr, asr #31 +; CHECK-LE-NEXT: bfi r1, r3, #0, #1 +; CHECK-LE-NEXT: csetm r3, lt +; CHECK-LE-NEXT: bfi r1, r3, #1, #1 +; CHECK-LE-NEXT: lsls r3, r1, #31 ; CHECK-LE-NEXT: itt ne -; CHECK-LE-NEXT: ldrne r1, [r2] -; CHECK-LE-NEXT: vmovne.32 q0[0], r1 -; CHECK-LE-NEXT: lsls r1, r3, #30 +; CHECK-LE-NEXT: ldrne r3, [r2] +; CHECK-LE-NEXT: vmovne.32 q0[0], r3 +; CHECK-LE-NEXT: lsls r1, r1, #30 ; CHECK-LE-NEXT: itt mi ; CHECK-LE-NEXT: ldrmi r1, [r2, #4] ; CHECK-LE-NEXT: vmovmi.32 q0[2], r1 @@ -460,23 +460,23 @@ ; CHECK-LE-NEXT: .pad #4 ; CHECK-LE-NEXT: sub sp, #4 ; CHECK-LE-NEXT: ldrd r12, lr, [r1] -; CHECK-LE-NEXT: movs r3, #0 +; CHECK-LE-NEXT: movs r1, #0 ; CHECK-LE-NEXT: @ implicit-def: $q0 ; CHECK-LE-NEXT: vmov.i64 q2, #0xffffffff -; CHECK-LE-NEXT: rsbs.w r1, r12, #0 +; CHECK-LE-NEXT: rsbs.w r3, r12, #0 ; CHECK-LE-NEXT: vmov q1[2], q1[0], r12, lr -; CHECK-LE-NEXT: sbcs.w r1, r3, r12, asr #31 -; CHECK-LE-NEXT: csetm r1, lt +; CHECK-LE-NEXT: sbcs.w r3, r1, r12, asr #31 +; CHECK-LE-NEXT: csetm r3, lt ; CHECK-LE-NEXT: rsbs.w r4, lr, #0 -; CHECK-LE-NEXT: sbcs.w r4, r3, lr, asr #31 -; CHECK-LE-NEXT: bfi r3, r1, #0, #1 -; CHECK-LE-NEXT: csetm r1, lt -; CHECK-LE-NEXT: bfi r3, r1, #1, #1 -; CHECK-LE-NEXT: lsls r1, r3, #31 +; CHECK-LE-NEXT: sbcs.w r4, r1, lr, asr #31 +; CHECK-LE-NEXT: bfi r1, r3, #0, #1 +; CHECK-LE-NEXT: csetm r3, lt +; CHECK-LE-NEXT: bfi r1, r3, #1, #1 +; CHECK-LE-NEXT: lsls r3, r1, #31 ; CHECK-LE-NEXT: itt ne -; CHECK-LE-NEXT: ldrne r1, [r2] -; CHECK-LE-NEXT: vmovne.32 q0[0], r1 -; CHECK-LE-NEXT: lsls r1, r3, #30 +; CHECK-LE-NEXT: ldrne r3, [r2] +; CHECK-LE-NEXT: vmovne.32 q0[0], r3 +; CHECK-LE-NEXT: lsls r1, r1, #30 ; CHECK-LE-NEXT: itt mi ; CHECK-LE-NEXT: ldrmi r1, [r2, #4] ; CHECK-LE-NEXT: vmovmi.32 q0[2], r1 Index: llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll +++ llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll @@ -6,102 +6,101 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #12 -; CHECK-NEXT: sub sp, #12 +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: beq.w .LBB0_8 ; CHECK-NEXT: @ %bb.1: @ %entry +; CHECK-NEXT: mov r11, r2 ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: bne .LBB0_3 ; CHECK-NEXT: @ %bb.2: -; CHECK-NEXT: movs r7, #0 +; CHECK-NEXT: movs r2, #0 ; CHECK-NEXT: mov r12, r0 -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: mov r10, r2 +; CHECK-NEXT: mov r8, r1 +; CHECK-NEXT: mov r10, r11 ; CHECK-NEXT: b .LBB0_6 ; CHECK-NEXT: .LBB0_3: @ %vector.ph -; CHECK-NEXT: bic r5, r3, #1 -; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: subs r7, r5, #2 -; CHECK-NEXT: movs r6, #1 -; CHECK-NEXT: add.w r3, r1, r5, lsl #2 +; CHECK-NEXT: bic r2, r3, #1 ; CHECK-NEXT: adr r4, .LCPI0_0 +; CHECK-NEXT: subs r7, r2, #2 +; CHECK-NEXT: movs r6, #1 +; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: add.w r10, r11, r2, lsl #2 ; CHECK-NEXT: add.w lr, r6, r7, lsr #1 -; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: str r5, [sp] @ 4-byte Spill -; CHECK-NEXT: add.w r10, r2, r5, lsl #2 -; CHECK-NEXT: add.w r12, r0, r5, lsl #2 +; CHECK-NEXT: str r2, [sp] @ 4-byte Spill +; CHECK-NEXT: add.w r8, r1, r2, lsl #2 +; CHECK-NEXT: add.w r12, r0, r2, lsl #2 ; CHECK-NEXT: vldrw.u32 q0, [r4] ; CHECK-NEXT: vmvn.i32 q1, #0x80000000 ; CHECK-NEXT: .LBB0_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrd r4, r6, [r0], #8 +; CHECK-NEXT: ldrd r4, r2, [r0], #8 ; CHECK-NEXT: movs r5, #0 -; CHECK-NEXT: ldrd r7, r8, [r1], #8 -; CHECK-NEXT: smull r4, r11, r7, r4 -; CHECK-NEXT: asrl r4, r11, #31 +; CHECK-NEXT: ldrd r7, r6, [r1], #8 +; CHECK-NEXT: smull r4, r7, r7, r4 +; CHECK-NEXT: asrl r4, r7, #31 ; CHECK-NEXT: rsbs.w r9, r4, #-2147483648 ; CHECK-NEXT: mov.w r9, #-1 -; CHECK-NEXT: sbcs.w r3, r9, r11 +; CHECK-NEXT: sbcs.w r3, r9, r7 ; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r5, r3, #0, #8 -; CHECK-NEXT: smull r6, r3, r8, r6 -; CHECK-NEXT: asrl r6, r3, #31 -; CHECK-NEXT: rsbs.w r7, r6, #-2147483648 -; CHECK-NEXT: vmov q2[2], q2[0], r4, r6 -; CHECK-NEXT: sbcs.w r7, r9, r3 -; CHECK-NEXT: vmov q2[3], q2[1], r11, r3 -; CHECK-NEXT: csetm r7, lt -; CHECK-NEXT: mvn r6, #-2147483648 -; CHECK-NEXT: bfi r5, r7, #8, #8 +; CHECK-NEXT: smull r2, r3, r6, r2 +; CHECK-NEXT: asrl r2, r3, #31 +; CHECK-NEXT: rsbs.w r6, r2, #-2147483648 +; CHECK-NEXT: vmov q2[2], q2[0], r4, r2 +; CHECK-NEXT: sbcs.w r6, r9, r3 +; CHECK-NEXT: vmov q2[3], q2[1], r7, r3 +; CHECK-NEXT: csetm r6, lt +; CHECK-NEXT: bfi r5, r6, #8, #8 ; CHECK-NEXT: vmsr p0, r5 +; CHECK-NEXT: mvn r5, #-2147483648 ; CHECK-NEXT: vpsel q2, q2, q0 -; CHECK-NEXT: vmov r3, r4, d4 -; CHECK-NEXT: subs r3, r3, r6 -; CHECK-NEXT: sbcs r3, r4, #0 -; CHECK-NEXT: mov.w r4, #0 -; CHECK-NEXT: csetm r3, lt -; CHECK-NEXT: bfi r4, r3, #0, #8 -; CHECK-NEXT: vmov r3, r5, d5 -; CHECK-NEXT: subs r3, r3, r6 -; CHECK-NEXT: sbcs r3, r5, #0 -; CHECK-NEXT: csetm r3, lt -; CHECK-NEXT: bfi r4, r3, #8, #8 -; CHECK-NEXT: vmsr p0, r4 +; CHECK-NEXT: vmov r2, r3, d4 +; CHECK-NEXT: subs r2, r2, r5 +; CHECK-NEXT: sbcs r2, r3, #0 +; CHECK-NEXT: mov.w r3, #0 +; CHECK-NEXT: csetm r2, lt +; CHECK-NEXT: bfi r3, r2, #0, #8 +; CHECK-NEXT: vmov r2, r4, d5 +; CHECK-NEXT: subs r2, r2, r5 +; CHECK-NEXT: sbcs r2, r4, #0 +; CHECK-NEXT: csetm r2, lt +; CHECK-NEXT: bfi r3, r2, #8, #8 +; CHECK-NEXT: vmsr p0, r3 ; CHECK-NEXT: vpsel q2, q2, q1 -; CHECK-NEXT: vmov r3, s10 -; CHECK-NEXT: vmov r4, s8 -; CHECK-NEXT: strd r4, r3, [r2], #8 +; CHECK-NEXT: vmov r2, s10 +; CHECK-NEXT: vmov r3, s8 +; CHECK-NEXT: strd r3, r2, [r11], #8 ; CHECK-NEXT: le lr, .LBB0_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block -; CHECK-NEXT: ldrd r7, r3, [sp] @ 8-byte Folded Reload -; CHECK-NEXT: ldr r6, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: cmp r7, r3 +; CHECK-NEXT: ldrd r2, r3, [sp] @ 8-byte Folded Reload +; CHECK-NEXT: cmp r2, r3 ; CHECK-NEXT: beq .LBB0_8 ; CHECK-NEXT: .LBB0_6: @ %for.body.preheader -; CHECK-NEXT: sub.w lr, r3, r7 +; CHECK-NEXT: sub.w lr, r3, r2 ; CHECK-NEXT: mov.w r0, #-1 ; CHECK-NEXT: mov.w r1, #-2147483648 -; CHECK-NEXT: mvn r2, #-2147483648 +; CHECK-NEXT: mvn r3, #-2147483648 ; CHECK-NEXT: .LBB0_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldr r3, [r12], #4 -; CHECK-NEXT: ldr r4, [r6], #4 -; CHECK-NEXT: smull r4, r3, r4, r3 -; CHECK-NEXT: asrl r4, r3, #31 -; CHECK-NEXT: subs r5, r1, r4 -; CHECK-NEXT: sbcs.w r5, r0, r3 -; CHECK-NEXT: cset r5, lt -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r4, r4, r1, ne -; CHECK-NEXT: csel r3, r3, r0, ne -; CHECK-NEXT: subs r5, r4, r2 -; CHECK-NEXT: sbcs r3, r3, #0 -; CHECK-NEXT: csel r3, r4, r2, lt -; CHECK-NEXT: str r3, [r10], #4 +; CHECK-NEXT: ldr r2, [r12], #4 +; CHECK-NEXT: ldr r4, [r8], #4 +; CHECK-NEXT: smull r2, r5, r4, r2 +; CHECK-NEXT: asrl r2, r5, #31 +; CHECK-NEXT: subs r4, r1, r2 +; CHECK-NEXT: sbcs.w r4, r0, r5 +; CHECK-NEXT: cset r4, lt +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: csel r2, r2, r1, ne +; CHECK-NEXT: csel r4, r5, r0, ne +; CHECK-NEXT: subs r5, r2, r3 +; CHECK-NEXT: sbcs r4, r4, #0 +; CHECK-NEXT: csel r2, r2, r3, lt +; CHECK-NEXT: str r2, [r10], #4 ; CHECK-NEXT: le lr, .LBB0_7 ; CHECK-NEXT: .LBB0_8: @ %for.cond.cleanup -; CHECK-NEXT: add sp, #12 +; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.9: @@ -613,20 +612,21 @@ ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: beq .LBB3_8 ; CHECK-NEXT: @ %bb.1: @ %entry +; CHECK-NEXT: mov r8, r2 ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: bne .LBB3_3 ; CHECK-NEXT: @ %bb.2: ; CHECK-NEXT: movs r7, #0 ; CHECK-NEXT: mov r12, r0 ; CHECK-NEXT: mov r11, r1 -; CHECK-NEXT: mov r8, r2 +; CHECK-NEXT: mov r2, r8 ; CHECK-NEXT: b .LBB3_6 ; CHECK-NEXT: .LBB3_3: @ %vector.ph ; CHECK-NEXT: bic r5, r3, #1 ; CHECK-NEXT: movs r6, #1 ; CHECK-NEXT: subs r7, r5, #2 ; CHECK-NEXT: str r5, [sp] @ 4-byte Spill -; CHECK-NEXT: add.w r8, r2, r5, lsl #2 +; CHECK-NEXT: add.w r2, r8, r5, lsl #2 ; CHECK-NEXT: add.w r11, r1, r5, lsl #2 ; CHECK-NEXT: add.w lr, r6, r7, lsr #1 ; CHECK-NEXT: add.w r12, r0, r5, lsl #2 @@ -653,7 +653,7 @@ ; CHECK-NEXT: vpsel q1, q1, q0 ; CHECK-NEXT: vmov r4, s6 ; CHECK-NEXT: vmov r5, s4 -; CHECK-NEXT: strd r5, r4, [r2], #8 +; CHECK-NEXT: strd r5, r4, [r8], #8 ; CHECK-NEXT: le lr, .LBB3_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: ldr r7, [sp] @ 4-byte Reload @@ -667,11 +667,11 @@ ; CHECK-NEXT: ldr r1, [r11], #4 ; CHECK-NEXT: umull r0, r1, r1, r0 ; CHECK-NEXT: lsrl r0, r1, #31 -; CHECK-NEXT: subs.w r2, r0, #-1 +; CHECK-NEXT: subs.w r3, r0, #-1 ; CHECK-NEXT: sbcs r1, r1, #0 ; CHECK-NEXT: it hs ; CHECK-NEXT: movhs.w r0, #-1 -; CHECK-NEXT: str r0, [r8], #4 +; CHECK-NEXT: str r0, [r2], #4 ; CHECK-NEXT: le lr, .LBB3_7 ; CHECK-NEXT: .LBB3_8: @ %for.cond.cleanup ; CHECK-NEXT: add sp, #4 Index: llvm/test/CodeGen/Thumb2/mve-scatter-ptrs.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-scatter-ptrs.ll +++ llvm/test/CodeGen/Thumb2/mve-scatter-ptrs.ll @@ -9,10 +9,10 @@ ; CHECK-LABEL: ptr_v2i32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: ldrd r1, r0, [r0] -; CHECK-NEXT: str r2, [r1] -; CHECK-NEXT: vmov r1, s2 -; CHECK-NEXT: str r1, [r0] +; CHECK-NEXT: ldrd r0, r1, [r0] +; CHECK-NEXT: str r2, [r0] +; CHECK-NEXT: vmov r0, s2 +; CHECK-NEXT: str r0, [r1] ; CHECK-NEXT: bx lr entry: %offs = load <2 x ptr>, ptr %offptr, align 4 @@ -125,9 +125,9 @@ define arm_aapcs_vfpcc void @ptr_v2f32(<2 x float> %v, ptr %offptr) { ; CHECK-LABEL: ptr_v2f32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: ldrd r1, r0, [r0] -; CHECK-NEXT: vstr s0, [r1] -; CHECK-NEXT: vstr s1, [r0] +; CHECK-NEXT: ldrd r0, r1, [r0] +; CHECK-NEXT: vstr s0, [r0] +; CHECK-NEXT: vstr s1, [r1] ; CHECK-NEXT: bx lr entry: %offs = load <2 x ptr>, ptr %offptr, align 4 @@ -217,10 +217,10 @@ ; CHECK-LABEL: ptr_v2i16_trunc: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: ldrd r1, r0, [r0] -; CHECK-NEXT: strh r2, [r1] -; CHECK-NEXT: vmov r1, s2 -; CHECK-NEXT: strh r1, [r0] +; CHECK-NEXT: ldrd r0, r1, [r0] +; CHECK-NEXT: strh r2, [r0] +; CHECK-NEXT: vmov r0, s2 +; CHECK-NEXT: strh r0, [r1] ; CHECK-NEXT: bx lr entry: %offs = load <2 x ptr>, ptr %offptr, align 4 Index: llvm/test/CodeGen/Thumb2/mve-vecreduce-slp.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vecreduce-slp.ll +++ llvm/test/CodeGen/Thumb2/mve-vecreduce-slp.ll @@ -7,7 +7,7 @@ define i32 @addv2i32i32(ptr %x) { ; CHECK-LABEL: addv2i32i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: ldrd r1, r0, [r0] +; CHECK-NEXT: ldrd r0, r1, [r0] ; CHECK-NEXT: add r0, r1 ; CHECK-NEXT: bx lr entry: @@ -1308,10 +1308,10 @@ define i32 @mlav2i32i32(ptr %x, ptr %y) { ; CHECK-LABEL: mlav2i32i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: ldrd r2, r0, [r0] -; CHECK-NEXT: ldrd r3, r1, [r1] -; CHECK-NEXT: muls r2, r3, r2 -; CHECK-NEXT: mla r0, r1, r0, r2 +; CHECK-NEXT: ldrd r0, r2, [r0] +; CHECK-NEXT: ldrd r1, r3, [r1] +; CHECK-NEXT: muls r0, r1, r0 +; CHECK-NEXT: mla r0, r3, r2, r0 ; CHECK-NEXT: bx lr entry: %0 = load i32, ptr %x, align 4 Index: llvm/test/CodeGen/Thumb2/mve-vld3.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vld3.ll +++ llvm/test/CodeGen/Thumb2/mve-vld3.ll @@ -9,11 +9,11 @@ ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: ldrd r2, r0, [r0, #16] +; CHECK-NEXT: ldrd r0, r2, [r0, #16] ; CHECK-NEXT: vmov.f32 s6, s3 ; CHECK-NEXT: vmov r12, lr, d0 ; CHECK-NEXT: vmov r3, s6 -; CHECK-NEXT: add r2, r3 +; CHECK-NEXT: add r0, r3 ; CHECK-NEXT: add.w r3, r12, lr ; CHECK-NEXT: add r0, r2 ; CHECK-NEXT: vmov r2, s2 @@ -438,8 +438,8 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .pad #8 ; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: ldrd r2, r0, [r0] -; CHECK-NEXT: strd r2, r0, [sp] +; CHECK-NEXT: ldrd r0, r2, [r0] +; CHECK-NEXT: strd r0, r2, [sp] ; CHECK-NEXT: mov r0, sp ; CHECK-NEXT: vldrb.u16 q0, [r0] ; CHECK-NEXT: vmov.u16 r0, q0[4] Index: llvm/test/CodeGen/Thumb2/mve-vst2.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vst2.ll +++ llvm/test/CodeGen/Thumb2/mve-vst2.ll @@ -480,9 +480,9 @@ define void @vst2_v2f16(ptr %src, ptr %dst) { ; CHECK-LABEL: vst2_v2f16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: ldrd r2, r0, [r0] -; CHECK-NEXT: vmov.32 q1[0], r2 -; CHECK-NEXT: vmov.32 q0[0], r0 +; CHECK-NEXT: ldrd r0, r2, [r0] +; CHECK-NEXT: vmov.32 q1[0], r0 +; CHECK-NEXT: vmov.32 q0[0], r2 ; CHECK-NEXT: vmovx.f16 s5, s4 ; CHECK-NEXT: vins.f16 s4, s0 ; CHECK-NEXT: vmovx.f16 s0, s0 Index: llvm/test/CodeGen/Thumb2/postinc-distribute.mir =================================================================== --- llvm/test/CodeGen/Thumb2/postinc-distribute.mir +++ llvm/test/CodeGen/Thumb2/postinc-distribute.mir @@ -366,8 +366,7 @@ ; CHECK: liveins: $r0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 - ; CHECK-NEXT: [[t2LDRi12_:%[0-9]+]]:rgpr = t2LDRi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load (s32)) - ; CHECK-NEXT: [[t2LDRi12_1:%[0-9]+]]:rgpr = t2LDRi12 [[COPY]], 4, 14 /* CC::al */, $noreg :: (load (s32)) + ; CHECK-NEXT: [[t2LDRDi8_:%[0-9]+]]:rgpr, [[t2LDRDi8_1:%[0-9]+]]:rgpr = t2LDRDi8 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load (s32)) ; CHECK-NEXT: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg ; CHECK-NEXT: [[t2LDRi8_:%[0-9]+]]:rgpr = t2LDRi8 [[COPY]], -8, 14 /* CC::al */, $noreg :: (load (s32)) ; CHECK-NEXT: $r0 = COPY [[t2ADDri]] @@ -575,8 +574,7 @@ ; CHECK: liveins: $r0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 - ; CHECK-NEXT: [[t2LDRi12_:%[0-9]+]]:rgpr = t2LDRi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load (s32)) - ; CHECK-NEXT: [[t2LDRi12_1:%[0-9]+]]:rgpr = t2LDRi12 [[COPY]], 4, 14 /* CC::al */, $noreg :: (load (s32)) + ; CHECK-NEXT: [[t2LDRDi8_:%[0-9]+]]:rgpr, [[t2LDRDi8_1:%[0-9]+]]:rgpr = t2LDRDi8 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load (s32)) ; CHECK-NEXT: [[t2SUBri:%[0-9]+]]:rgpr = nuw t2SUBri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg ; CHECK-NEXT: [[t2LDRi8_:%[0-9]+]]:rgpr = t2LDRi8 [[COPY]], -8, 14 /* CC::al */, $noreg :: (load (s32)) ; CHECK-NEXT: $r0 = COPY [[t2SUBri]]