diff --git a/llvm/lib/CodeGen/LiveRegUnits.cpp b/llvm/lib/CodeGen/LiveRegUnits.cpp --- a/llvm/lib/CodeGen/LiveRegUnits.cpp +++ b/llvm/lib/CodeGen/LiveRegUnits.cpp @@ -81,8 +81,17 @@ static void addCalleeSavedRegs(LiveRegUnits &LiveUnits, const MachineFunction &MF) { const MachineRegisterInfo &MRI = MF.getRegInfo(); - for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; ++CSR) - LiveUnits.addReg(*CSR); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; ++CSR) { + const unsigned N = *CSR; + + const auto &CSI = MFI.getCalleeSavedInfo(); + auto Info = + llvm::find_if(CSI, [N](auto Info) { return Info.getReg() == N; }); + // If we have no info for this callee-saved register, assume it is liveout + if (Info == CSI.end() || Info->isRestored()) + LiveUnits.addReg(N); + } } void LiveRegUnits::addPristines(const MachineFunction &MF) { diff --git a/llvm/test/CodeGen/AArch64/scavenge-lr.mir b/llvm/test/CodeGen/AArch64/scavenge-lr.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/scavenge-lr.mir @@ -0,0 +1,221 @@ +# RUN: llc -mtriple=thumbv7-unknown-linux-android30 -run-pass=prologepilog -verify-machineinstrs %s -o - | FileCheck %s + +# When saving and restoring callee-saved registers, LR is saved but not restored, +# because it is reloaded directly into PC. Therefore it should be available to scavenge +# without requiring an emergency spill slot. + +# Used to result in +# LLVM ERROR: Error while trying to spill LR from class GPR: Cannot scavenge register without an emergency spill slot! + +# Check that LR is considered live in +# CHECK: liveins: {{.*}}$lr + +# Check that LR is saved to the stack +# CHECK: frame-setup t2STMDB_UPD {{.*}} killed $lr +# CHECK: frame-setup CFI_INSTRUCTION offset $lr, + +# Check that LR was successfully scavenged somewhere in the function +# CHECK: $lr = t2ADDri +# CHECK: VSTMQIA $q11, killed $lr + +# Check that LR is not restored at the end of the function +# CHECK-NOT: $lr = frame-destroy +# CHECK-NOT: frame-destroy VLDMDIA_UPD {{.*}} def $lr +# CHECK-NOT: frame-destroy t2LDMIA_RET {{.*}} def $lr +# CHECK: frame-destroy t2LDMIA_RET {{.*}} def $pc + +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + + %S = type { [32 x i8] } + + define void @f(%S* %arg) { + entry: + %ppp..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -8 + %ppp..sroa_cast248 = bitcast %S* %ppp..sroa_idx to <8 x float>* + %ppp.copyload = load <8 x float>, <8 x float>* %ppp..sroa_cast248, align 32 + + %xxx..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -5 + %xxx..sroa_cast248 = bitcast %S* %xxx..sroa_idx to <8 x float>* + %xxx.copyload = load <8 x float>, <8 x float>* %xxx..sroa_cast248, align 32 + + %yyy..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -2 + %yyy..sroa_cast244 = bitcast %S* %yyy..sroa_idx to <8 x float>* + %yyy.copyload = load <8 x float>, <8 x float>* %yyy..sroa_cast244, align 32 + + %zzz..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -7 + %zzz..sroa_cast241 = bitcast %S* %zzz..sroa_idx to <8 x float>* + %zzz.copyload = load <8 x float>, <8 x float>* %zzz..sroa_cast241, align 32 + + %www..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -4 + %www..sroa_cast238 = bitcast %S* %www..sroa_idx to <8 x float>* + %www.copyload = load <8 x float>, <8 x float>* %www..sroa_cast238, align 32 + + %uuu..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 1 + %uuu..sroa_cast235 = bitcast %S* %uuu..sroa_idx to <8 x float>* + %uuu.copyload = load <8 x float>, <8 x float>* %uuu..sroa_cast235, align 32 + + %vvv..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -6 + %vvv..sroa_cast230 = bitcast %S* %vvv..sroa_idx to <8 x float>* + %vvv.copyload = load <8 x float>, <8 x float>* %vvv..sroa_cast230, align 32 + + %ttt..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -3 + %ttt..sroa_cast226 = bitcast %S* %ttt..sroa_idx to <8 x float>* + %ttt.copyload = load <8 x float>, <8 x float>* %ttt..sroa_cast226, align 32 + + %sss..sroa_cast223 = bitcast %S* %arg to <8 x float>* + %sss.copyload = load <8 x float>, <8 x float>* %sss..sroa_cast223, align 32 + + %mul.i = fmul <8 x float> %ppp.copyload, %www.copyload + %mul.i185 = fmul <8 x float> %xxx.copyload, %uuu.copyload + %mul.i179 = fmul <8 x float> %mul.i185, %vvv.copyload + %mul.i173 = fmul <8 x float> %mul.i179, %ttt.copyload + %mul.i167 = fmul <8 x float> %zzz.copyload, %mul.i173 + %add.i = fadd <8 x float> %mul.i, %mul.i167 + %div.i = fdiv <8 x float> zeroinitializer, %add.i + %mul.i153 = fmul <8 x float> %uuu.copyload, %div.i + + store <8 x float> %mul.i153, <8 x float>* %ppp..sroa_cast248, align 32 + + %mul.i147 = fmul <8 x float> %uuu.copyload, %vvv.copyload + %mul.i141 = fmul <8 x float> %zzz.copyload, %sss.copyload + %mul.i135 = fmul <8 x float> %mul.i141, %div.i + %sub.i129 = fsub <8 x float> %mul.i147, %mul.i135 + + store <8 x float> %sub.i129, <8 x float>* %zzz..sroa_cast241, align 32 + store <8 x float> %div.i, <8 x float>* %vvv..sroa_cast230, align 32 + store <8 x float> %div.i, <8 x float>* %xxx..sroa_cast248, align 32 + + %mul.i123 = fmul <8 x float> %yyy.copyload, %vvv.copyload + %mul.i117 = fmul <8 x float> %mul.i123, %div.i + %sub.i111 = fsub <8 x float> %sss.copyload, %mul.i117 + store <8 x float> %sub.i111, <8 x float>* %www..sroa_cast238, align 32 + + %mul.i105 = fmul <8 x float> %ppp.copyload, %ttt.copyload + %mul.i99 = fmul <8 x float> %mul.i105, %div.i + %sub.i93 = fsub <8 x float> %xxx.copyload, %mul.i99 + store <8 x float> %sub.i93, <8 x float>* %ttt..sroa_cast226, align 32 + + %mul.i81 = fmul <8 x float> %yyy.copyload, %www.copyload + %mul.i75 = fmul <8 x float> %mul.i81, %div.i + %sub.i = fsub <8 x float> %mul.i185, %mul.i75 + store <8 x float> %sub.i, <8 x float>* %yyy..sroa_cast244, align 32 + + ret void + } +... +--- +name: f +alignment: 2 +tracksRegLiveness: true +liveins: + - { reg: '$r0' } +frameInfo: + maxAlignment: 16 + maxCallFrameSize: 0 +stack: + - { id: 0, type: spill-slot, size: 16, alignment: 16 } + - { id: 1, type: spill-slot, size: 16, alignment: 16 } + - { id: 2, type: spill-slot, size: 16, alignment: 16 } + - { id: 3, type: spill-slot, size: 16, alignment: 16 } +constants: + - id: 0 + value: 'float 0.000000e+00' + alignment: 4 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $r0 + $r2 = t2SUBri $r0, 128, 14 /* CC::al */, $noreg, $noreg + $q8 = VLD1q64 $r2, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.www..sroa_cast238, align 32) + VSTMQIA $q8, %stack.0, 14 /* CC::al */, $noreg :: (store 16 into %stack.0) + $r12 = t2SUBri $r0, 256, 14 /* CC::al */, $noreg, $noreg + $q12 = VLD1q64 $r12, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.ppp..sroa_cast248, align 32) + $q1 = VMULfq $q12, killed $q8, 14 /* CC::al */, $noreg + $r3 = nuw t2ADDri $r0, 32, 14 /* CC::al */, $noreg, $noreg + $q10 = VLD1q64 killed $r3, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.uuu..sroa_cast235, align 32) + $r5 = t2SUBri $r0, 160, 14 /* CC::al */, $noreg, $noreg + $q15 = VLD1q64 $r5, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.xxx..sroa_cast248, align 32) + $q14 = VMULfq $q15, $q10, 14 /* CC::al */, $noreg + $r6 = t2SUBri $r0, 192, 14 /* CC::al */, $noreg, $noreg + $q13 = VLD1q64 $r6, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.vvv..sroa_cast230, align 32) + $q8 = VMULfq $q14, $q13, 14 /* CC::al */, $noreg + $r4 = t2SUBri $r0, 96, 14 /* CC::al */, $noreg, $noreg + $q6 = VLD1q64 $r4, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.ttt..sroa_cast226, align 32) + $q8 = VMULfq killed $q8, $q6, 14 /* CC::al */, $noreg + $r3 = t2SUBri $r0, 224, 14 /* CC::al */, $noreg, $noreg + $q5 = VLD1q64 $r3, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.zzz..sroa_cast241, align 32) + $q1 = VMLAfq killed $q1, $q5, killed $q8, 14 /* CC::al */, $noreg + $s8 = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) + $s3 = VDIVS $s8, $s7, 14 /* CC::al */, $noreg, implicit-def $q0 + $s2 = VDIVS $s8, $s6, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0 + $s1 = VDIVS $s8, $s5, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0 + $s0 = VDIVS $s8, $s4, 14 /* CC::al */, $noreg, implicit killed $q1, implicit killed $q0, implicit-def $q0 + $r7 = t2SUBri $r0, 64, 14 /* CC::al */, $noreg, $noreg + $q8 = VLD1q64 $r7, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.yyy..sroa_cast244, align 32) + VSTMQIA $q8, %stack.1, 14 /* CC::al */, $noreg :: (store 16 into %stack.1) + $q8 = VMULfq killed $q8, $q13, 14 /* CC::al */, $noreg + $r1 = t2ADDri $r0, 48, 14 /* CC::al */, $noreg, $noreg + $q9, $r0 = VLD1q32wb_fixed killed $r0, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.sss..sroa_cast223, align 32) + $q11 = COPY $q9 + $q11 = VMLSfq killed $q11, killed $q8, $q0, 14 /* CC::al */, $noreg + $r2 = VST1q32wb_fixed killed $r2, 16, killed $q11, 14 /* CC::al */, $noreg :: (store 16 into %ir.www..sroa_cast238, align 32) + $q8 = VLD1q64 $r2, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.www..sroa_cast238 + 16, basealign 32) + VSTMQIA $q8, %stack.3, 14 /* CC::al */, $noreg :: (store 16 into %stack.3) + $q11 = VMULfq $q10, $q0, 14 /* CC::al */, $noreg + $r12 = VST1q32wb_fixed killed $r12, 16, killed $q11, 14 /* CC::al */, $noreg :: (store 16 into %ir.ppp..sroa_cast248, align 32) + $q11 = VLD1q64 $r12, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.ppp..sroa_cast248 + 16, basealign 32) + VSTMQIA $q11, %stack.2, 14 /* CC::al */, $noreg :: (store 16 into %stack.2) + $q1 = VMULfq killed $q11, killed $q8, 14 /* CC::al */, $noreg + $r5 = VST1q32wb_fixed killed $r5, 16, $q0, 14 /* CC::al */, $noreg :: (store 16 into %ir.xxx..sroa_cast248, align 32) + $q4 = VLD1q64 $r5, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.xxx..sroa_cast248 + 16, basealign 32) + $q11 = VLD1q64 killed $r1, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.uuu..sroa_cast235 + 16, basealign 32) + $q7 = VMULfq $q4, $q11, 14 /* CC::al */, $noreg + $r6 = VST1q32wb_fixed killed $r6, 16, $q0, 14 /* CC::al */, $noreg :: (store 16 into %ir.vvv..sroa_cast230, align 32) + $q3 = VLD1q64 $r6, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.vvv..sroa_cast230 + 16, basealign 32) + $q8 = VMULfq $q7, $q3, 14 /* CC::al */, $noreg + $q12 = VMULfq killed $q12, killed $q6, 14 /* CC::al */, $noreg + $q15 = VMLSfq killed $q15, killed $q12, $q0, 14 /* CC::al */, $noreg + $r4 = VST1q32wb_fixed killed $r4, 16, killed $q15, 14 /* CC::al */, $noreg :: (store 16 into %ir.ttt..sroa_cast226, align 32) + $q12 = VLD1q64 $r4, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.ttt..sroa_cast226 + 16, basealign 32) + $q8 = VMULfq killed $q8, $q12, 14 /* CC::al */, $noreg + $q9 = VMULfq killed $q5, killed $q9, 14 /* CC::al */, $noreg + $q10 = VMULfq killed $q10, killed $q13, 14 /* CC::al */, $noreg + $q10 = VMLSfq killed $q10, killed $q9, $q0, 14 /* CC::al */, $noreg + $r3 = VST1q32wb_fixed killed $r3, 16, killed $q10, 14 /* CC::al */, $noreg :: (store 16 into %ir.zzz..sroa_cast241, align 32) + $q10 = VLD1q64 $r3, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.zzz..sroa_cast241 + 16, basealign 32) + $q1 = VMLAfq killed $q1, $q10, killed $q8, 14 /* CC::al */, $noreg + $s23 = VDIVS $s8, $s7, 14 /* CC::al */, $noreg, implicit-def $q5 + $s22 = VDIVS $s8, $s6, 14 /* CC::al */, $noreg, implicit killed $q5, implicit-def $q5 + $s21 = VDIVS $s8, $s5, 14 /* CC::al */, $noreg, implicit killed $q5, implicit-def $q5 + $s20 = VDIVS killed $s8, $s4, 14 /* CC::al */, $noreg, implicit killed $q1, implicit killed $q5, implicit-def $q5 + VST1q64 killed $r5, 16, $q5, 14 /* CC::al */, $noreg :: (store 16 into %ir.xxx..sroa_cast248 + 16, basealign 32) + VST1q64 killed $r6, 16, $q5, 14 /* CC::al */, $noreg :: (store 16 into %ir.vvv..sroa_cast230 + 16, basealign 32) + $q8 = VLDMQIA %stack.0, 14 /* CC::al */, $noreg :: (load 16 from %stack.0) + $q9 = VLDMQIA %stack.1, 14 /* CC::al */, $noreg :: (load 16 from %stack.1) + $q8 = VMULfq killed $q9, killed $q8, 14 /* CC::al */, $noreg + $q14 = VMLSfq killed $q14, killed $q8, killed $q0, 14 /* CC::al */, $noreg + $r7 = VST1q32wb_fixed killed $r7, 16, killed $q14, 14 /* CC::al */, $noreg :: (store 16 into %ir.yyy..sroa_cast244, align 32) + $q8 = VLD1q64 $r7, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.yyy..sroa_cast244 + 16, basealign 32) + $q9 = VLDMQIA %stack.3, 14 /* CC::al */, $noreg :: (load 16 from %stack.3) + $q9 = VMULfq $q8, killed $q9, 14 /* CC::al */, $noreg + $q7 = VMLSfq killed $q7, killed $q9, $q5, 14 /* CC::al */, $noreg + VST1q64 killed $r7, 16, killed $q7, 14 /* CC::al */, $noreg :: (store 16 into %ir.yyy..sroa_cast244 + 16, basealign 32) + $q9 = VLDMQIA %stack.2, 14 /* CC::al */, $noreg :: (load 16 from %stack.2) + $q9 = VMULfq killed $q9, killed $q12, 14 /* CC::al */, $noreg + $q4 = VMLSfq killed $q4, killed $q9, $q5, 14 /* CC::al */, $noreg + VST1q64 killed $r4, 16, killed $q4, 14 /* CC::al */, $noreg :: (store 16 into %ir.ttt..sroa_cast226 + 16, basealign 32) + $q8 = VMULfq killed $q8, $q3, 14 /* CC::al */, $noreg + $q9 = VLD1q64 killed $r0, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.sss..sroa_cast223 + 16, basealign 32) + $q12 = COPY $q9 + $q12 = VMLSfq killed $q12, killed $q8, $q5, 14 /* CC::al */, $noreg + VST1q64 killed $r2, 16, killed $q12, 14 /* CC::al */, $noreg :: (store 16 into %ir.www..sroa_cast238 + 16, basealign 32) + $q8 = VMULfq $q11, killed $q3, 14 /* CC::al */, $noreg + $q9 = VMULfq killed $q10, killed $q9, 14 /* CC::al */, $noreg + $q8 = VMLSfq killed $q8, killed $q9, $q5, 14 /* CC::al */, $noreg + VST1q64 killed $r3, 16, killed $q8, 14 /* CC::al */, $noreg :: (store 16 into %ir.zzz..sroa_cast241 + 16, basealign 32) + $q8 = VMULfq killed $q11, killed $q5, 14 /* CC::al */, $noreg + VST1q64 killed $r12, 16, killed $q8, 14 /* CC::al */, $noreg :: (store 16 into %ir.ppp..sroa_cast248 + 16, basealign 32) + tBX_RET 14 /* CC::al */, $noreg + +... diff --git a/llvm/test/CodeGen/Thumb2/mve-multivec-spill.ll b/llvm/test/CodeGen/Thumb2/mve-multivec-spill.ll --- a/llvm/test/CodeGen/Thumb2/mve-multivec-spill.ll +++ b/llvm/test/CodeGen/Thumb2/mve-multivec-spill.ll @@ -35,18 +35,18 @@ ; CHECK-NEXT: vld21.32 {q4, q5}, [r0] ; CHECK-NEXT: bl external_function ; CHECK-NEXT: vldmia sp, {d2, d3, d4, d5} @ 32-byte Reload -; CHECK-NEXT: add r0, sp, #32 +; CHECK-NEXT: add.w lr, sp, #32 ; CHECK-NEXT: vstrw.32 q2, [r4, #80] ; CHECK-NEXT: vstrw.32 q5, [r4, #144] ; CHECK-NEXT: vstrw.32 q4, [r4, #128] ; CHECK-NEXT: vstrw.32 q7, [r4, #112] ; CHECK-NEXT: vstrw.32 q1, [r4, #64] -; CHECK-NEXT: vldmia r0, {d2, d3, d4, d5} @ 32-byte Reload -; CHECK-NEXT: add r0, sp, #64 +; CHECK-NEXT: vldmia lr, {d2, d3, d4, d5} @ 32-byte Reload +; CHECK-NEXT: add.w lr, sp, #64 ; CHECK-NEXT: vstrw.32 q2, [r4, #48] ; CHECK-NEXT: vstrw.32 q6, [r4, #96] ; CHECK-NEXT: vstrw.32 q1, [r5] -; CHECK-NEXT: vldmia r0, {d2, d3, d4, d5} @ 32-byte Reload +; CHECK-NEXT: vldmia lr, {d2, d3, d4, d5} @ 32-byte Reload ; CHECK-NEXT: vstrw.32 q2, [r4, #16] ; CHECK-NEXT: vstrw.32 q1, [r4] ; CHECK-NEXT: add sp, #112