Index: llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp =================================================================== --- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -1140,8 +1140,48 @@ // If the instruction is already explicitly predicated, then the conversion // will be fine, but ensure that all store operations are predicated. - if (MI->mayStore()) - return IsUse; + // An unpredicated vector register spill is allowed if all of the uses of the + // stack slot are within the loop + if (MI->mayStore()) { + if (MI->getOpcode() != ARM::MVE_VSTRWU32 || + MI->getOperand(1).getReg() != ARM::SP) + return IsUse; + + // Find uses of the same stack slot. ReachingDefAnalysis doesn't work for sp + // as it relies on registers being live-out (which sp never is) to know what + // blocks to look in + unsigned StackSlot = MI->getOperand(2).getImm(); + SmallVector Frontier; + ML.getExitBlocks(Frontier); + SmallPtrSet Visited{MI->getParent()}; + unsigned int i = 0; + while (i < Frontier.size()) { + MachineBasicBlock *BB = Frontier[i]; + for (auto &I : *BB) { + // If this block has a store to the stack slot before any loads then we + // can ignore the block + if (I.getOpcode() == ARM::MVE_VSTRWU32 && + I.getOperand(1).getReg() == ARM::SP && + I.getOperand(2).getImm() == StackSlot) + break; + // If the store and the load are using the same stack slot then the + // store isn't valid for tail predication + if (I.getOpcode() == ARM::MVE_VLDRWU32 && + I.getOperand(1).getReg() == ARM::SP && + I.getOperand(2).getImm() == StackSlot) + return false; + } + + for (auto Succ : BB->successors()) { + if (!Visited.contains(Succ)) + Frontier.push_back(Succ); + } + Visited.insert(BB); + i++; + } + + return true; + } // If this instruction defines the VPR, update the predicate for the // proceeding instructions. Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector_spill_in_loop.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector_spill_in_loop.mir @@ -0,0 +1,161 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s + + +--- | + define hidden void @vector_spill_in_loop() { + entry: + ret void + } + + define hidden void @vector_spill_load_outside() { + entry: + ret void + } +... +--- +name: vector_spill_in_loop +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: vector_spill_in_loop + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $q0, $r1, $r2, $r3, $r4, $r5, $r7, $r8, $r10, $r11, $r12 + ; CHECK: $r6 = tMOVr $r2, 14 /* CC::al */, $noreg + ; CHECK: $r0 = tMOVr $r12, 14 /* CC::al */, $noreg + ; CHECK: $lr = MVE_DLSTP_16 renamable $r3 + ; CHECK: bb.1: + ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r10, $r11, $r12 + ; CHECK: renamable $r0, renamable $q6 = MVE_VLDRHU16_post killed renamable $r0, 16, 0, $noreg + ; CHECK: renamable $q3 = MVE_VLDRHU16 renamable $r6, 0, 0, $noreg + ; CHECK: renamable $q5 = MVE_VSHR_immu16 killed renamable $q3, 11, 0, $noreg, undef renamable $q5 + ; CHECK: MVE_VSTRWU32 killed renamable $q5, $sp, 80, 0, $noreg + ; CHECK: dead renamable $q7 = MVE_VLDRWU32 $sp, 80, 0, $noreg + ; CHECK: dead renamable $vpr = MVE_VCMPi16r killed renamable $q6, renamable $r8, 1, 0, killed $noreg + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1 + bb.0: + successors: %bb.1(0x80000000) + liveins: $q0, $r1, $r2, $r3, $r4, $r5, $r7, $r8, $r10, $r11, $r12 + + $r6 = tMOVr $r2, 14 /* CC::al */, $noreg + $r0 = tMOVr $r12, 14 /* CC::al */, $noreg + $r9 = tMOVr $r3, 14 /* CC::al */, $noreg + renamable $lr = t2DoLoopStartTP renamable $r1, renamable $r3 + + bb.1: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11, $r12 + + renamable $vpr = MVE_VCTP16 renamable $r9, 0, $noreg + MVE_VPST 8, implicit $vpr + renamable $r0, renamable $q6 = MVE_VLDRHU16_post killed renamable $r0, 16, 1, renamable $vpr + renamable $q3 = MVE_VLDRHU16 renamable $r6, 0, 1, renamable $vpr + MVE_VPST 2, implicit $vpr + renamable $q5 = MVE_VSHR_immu16 renamable $q3, 11, 1, renamable $vpr, undef renamable $q5 + renamable $r9 = nsw t2SUBri killed renamable $r9, 8, 14 /* CC::al */, $noreg, $noreg + MVE_VSTRWU32 killed renamable $q5, $sp, 80, 0, $noreg + MVE_VPST 8, implicit $vpr + renamable $q7 = MVE_VLDRWU32 $sp, 80, 0, $noreg + MVE_VPST 1, implicit $vpr + renamable $vpr = MVE_VCMPi16r killed renamable $q6, renamable $r8, 1, 1, killed renamable $vpr + renamable $lr = t2LoopEndDec killed renamable $lr, %bb.1, implicit-def dead $cpsr + tB %bb.2, 14 /* CC::al */, $noreg + + bb.2: + successors: %bb.3(0x04000000), %bb.0(0x7c000000) + liveins: $q0, $r1, $r2, $r3, $r4, $r5, $r7, $r8, $r10, $r11, $r12 + + renamable $r0 = tLDRspi $sp, 1, 14 /* CC::al */, $noreg + renamable $r10 = nuw t2ADDri killed renamable $r10, 1, 14 /* CC::al */, $noreg, $noreg + renamable $r12 = t2ADDrs killed renamable $r12, killed renamable $r0, 10, 14 /* CC::al */, $noreg, $noreg + renamable $r0 = tLDRspi $sp, 3, 14 /* CC::al */, $noreg + renamable $r2 = t2ADDrs killed renamable $r2, killed renamable $r0, 10, 14 /* CC::al */, $noreg, $noreg + renamable $r0 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg + tCMPhir renamable $r10, killed renamable $r0, 14 /* CC::al */, $noreg, implicit-def $cpsr + tBcc %bb.0, 1 /* CC::ne */, killed $cpsr + + bb.3: + $sp = frame-destroy tADDspi $sp, 24, 14 /* CC::al */, $noreg + $sp = frame-destroy VLDMDIA_UPD $sp, 14 /* CC::al */, $noreg, def $d8, def $d9, def $d10, def $d11, def $d12, def $d13, def $d14, def $d15 + $sp = frame-destroy tADDspi $sp, 1, 14 /* CC::al */, $noreg + $sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11, def $pc + +... +--- +name: vector_spill_load_outside +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: vector_spill_load_outside + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $q0, $r1, $r2, $r3, $r4, $r5, $r7, $r8, $r10, $r11, $r12 + ; CHECK: $r6 = tMOVr $r2, 14 /* CC::al */, $noreg + ; CHECK: $r0 = tMOVr $r12, 14 /* CC::al */, $noreg + ; CHECK: $r9 = tMOVr $r3, 14 /* CC::al */, $noreg + ; CHECK: $lr = t2DLS renamable $r1 + ; CHECK: bb.1: + ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11, $r12 + ; CHECK: renamable $vpr = MVE_VCTP16 renamable $r9, 0, $noreg + ; CHECK: MVE_VPST 8, implicit $vpr + ; CHECK: renamable $r0, renamable $q6 = MVE_VLDRHU16_post killed renamable $r0, 16, 1, renamable $vpr + ; CHECK: renamable $q3 = MVE_VLDRHU16 renamable $r6, 0, 1, renamable $vpr + ; CHECK: MVE_VPST 2, implicit $vpr + ; CHECK: renamable $q5 = MVE_VSHR_immu16 killed renamable $q3, 11, 1, renamable $vpr, undef renamable $q5 + ; CHECK: renamable $r9 = nsw t2SUBri killed renamable $r9, 8, 14 /* CC::al */, $noreg, $noreg + ; CHECK: MVE_VSTRWU32 killed renamable $q5, $sp, 80, 0, $noreg + ; CHECK: MVE_VPST 8, implicit $vpr + ; CHECK: dead renamable $q7 = MVE_VLDRWU32 $sp, 80, 0, $noreg + ; CHECK: MVE_VPST 1, implicit $vpr + ; CHECK: dead renamable $vpr = MVE_VCMPi16r killed renamable $q6, renamable $r8, 1, 1, killed renamable $vpr + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1 + bb.0: + successors: %bb.1(0x80000000) + liveins: $q0, $r1, $r2, $r3, $r4, $r5, $r7, $r8, $r10, $r11, $r12 + + $r6 = tMOVr $r2, 14 /* CC::al */, $noreg + $r0 = tMOVr $r12, 14 /* CC::al */, $noreg + $r9 = tMOVr $r3, 14 /* CC::al */, $noreg + renamable $lr = t2DoLoopStartTP renamable $r1, renamable $r3 + + bb.1: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11, $r12 + + renamable $vpr = MVE_VCTP16 renamable $r9, 0, $noreg + MVE_VPST 8, implicit $vpr + renamable $r0, renamable $q6 = MVE_VLDRHU16_post killed renamable $r0, 16, 1, renamable $vpr + renamable $q3 = MVE_VLDRHU16 renamable $r6, 0, 1, renamable $vpr + MVE_VPST 2, implicit $vpr + renamable $q5 = MVE_VSHR_immu16 renamable $q3, 11, 1, renamable $vpr, undef renamable $q5 + renamable $r9 = nsw t2SUBri killed renamable $r9, 8, 14 /* CC::al */, $noreg, $noreg + MVE_VSTRWU32 killed renamable $q5, $sp, 80, 0, $noreg + MVE_VPST 8, implicit $vpr + renamable $q7 = MVE_VLDRWU32 $sp, 80, 0, $noreg + MVE_VPST 1, implicit $vpr + renamable $vpr = MVE_VCMPi16r killed renamable $q6, renamable $r8, 1, 1, killed renamable $vpr + renamable $lr = t2LoopEndDec killed renamable $lr, %bb.1, implicit-def dead $cpsr + tB %bb.2, 14 /* CC::al */, $noreg + + bb.2: + successors: %bb.3(0x04000000), %bb.0(0x7c000000) + liveins: $q0, $r1, $r2, $r3, $r4, $r5, $r7, $r8, $r10, $r11, $r12 + + renamable $q7 = MVE_VLDRWU32 $sp, 80, 0, $noreg + renamable $r0 = tLDRspi $sp, 1, 14 /* CC::al */, $noreg + renamable $r10 = nuw t2ADDri killed renamable $r10, 1, 14 /* CC::al */, $noreg, $noreg + renamable $r12 = t2ADDrs killed renamable $r12, killed renamable $r0, 10, 14 /* CC::al */, $noreg, $noreg + renamable $r0 = tLDRspi $sp, 3, 14 /* CC::al */, $noreg + renamable $r2 = t2ADDrs killed renamable $r2, killed renamable $r0, 10, 14 /* CC::al */, $noreg, $noreg + renamable $r0 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg + tCMPhir renamable $r10, killed renamable $r0, 14 /* CC::al */, $noreg, implicit-def $cpsr + tBcc %bb.0, 1 /* CC::ne */, killed $cpsr + + bb.3: + $sp = frame-destroy tADDspi $sp, 24, 14 /* CC::al */, $noreg + $sp = frame-destroy VLDMDIA_UPD $sp, 14 /* CC::al */, $noreg, def $d8, def $d9, def $d10, def $d11, def $d12, def $d13, def $d14, def $d15 + $sp = frame-destroy tADDspi $sp, 1, 14 /* CC::al */, $noreg + $sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11, def $pc + +...