diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -85,12 +85,15 @@ const BitVector &ClearRegs); void CMSESaveClearFPRegs(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, + const LivePhysRegs &LiveRegs, SmallVectorImpl &AvailableRegs); void CMSESaveClearFPRegsV8(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, + const LivePhysRegs &LiveRegs, SmallVectorImpl &ScratchRegs); void CMSESaveClearFPRegsV81(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, DebugLoc &DL); + MachineBasicBlock::iterator MBBI, DebugLoc &DL, + const LivePhysRegs &LiveRegs); void CMSERestoreFPRegs(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, SmallVectorImpl &AvailableRegs); @@ -1202,17 +1205,17 @@ void ARMExpandPseudo::CMSESaveClearFPRegs( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, - SmallVectorImpl &ScratchRegs) { + const LivePhysRegs &LiveRegs, SmallVectorImpl &ScratchRegs) { if (STI->hasV8_1MMainlineOps()) - CMSESaveClearFPRegsV81(MBB, MBBI, DL); + CMSESaveClearFPRegsV81(MBB, MBBI, DL, LiveRegs); else - CMSESaveClearFPRegsV8(MBB, MBBI, DL, ScratchRegs); + CMSESaveClearFPRegsV8(MBB, MBBI, DL, LiveRegs, ScratchRegs); } // Save and clear FP registers if present void ARMExpandPseudo::CMSESaveClearFPRegsV8( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, - SmallVectorImpl &ScratchRegs) { + const LivePhysRegs &LiveRegs, SmallVectorImpl &ScratchRegs) { if (!STI->hasFPRegs()) return; @@ -1271,7 +1274,11 @@ // Lazy store all fp registers to the stack MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM)) .addReg(ARM::SP) - .add(predOps(ARMCC::AL)); + .add(predOps(ARMCC::AL)); + for (auto R : {ARM::VPR, ARM::FPSCR, ARM::FPSCR_NZCV, ARM::Q0, ARM::Q1, + ARM::Q2, ARM::Q3, ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7}) + VLSTM.addReg(R, RegState::Implicit | + (LiveRegs.contains(R) ? 0 : RegState::Undef)); // Restore all arguments for (const auto &Regs : ClearedFPRegs) { @@ -1343,7 +1350,8 @@ void ARMExpandPseudo::CMSESaveClearFPRegsV81(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - DebugLoc &DL) { + DebugLoc &DL, + const LivePhysRegs &LiveRegs) { BitVector ClearRegs(32, true); bool DefFP = determineFPRegsToClear(*MBBI, ClearRegs); @@ -1358,9 +1366,13 @@ .add(predOps(ARMCC::AL)); // Lazy store all FP registers to the stack - BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM)) - .addReg(ARM::SP) - .add(predOps(ARMCC::AL)); + MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM)) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)); + for (auto R : {ARM::VPR, ARM::FPSCR, ARM::FPSCR_NZCV, ARM::Q0, ARM::Q1, + ARM::Q2, ARM::Q3, ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7}) + VLSTM.addReg(R, RegState::Implicit | + (LiveRegs.contains(R) ? 0 : RegState::Undef)); } else { // Push all the callee-saved registers (s16-s31). MachineInstrBuilder VPUSH = @@ -1754,13 +1766,15 @@ static void CMSEPushCalleeSaves(const TargetInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, int JumpReg, - bool Thumb1Only) { + const LivePhysRegs &LiveRegs, bool Thumb1Only) { const DebugLoc &DL = MBBI->getDebugLoc(); if (Thumb1Only) { // push Lo and Hi regs separately MachineInstrBuilder PushMIB = BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); - for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg) - PushMIB.addReg(Reg, Reg != JumpReg ? RegState::Undef : 0); + for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg) { + PushMIB.addReg( + Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef); + } // Thumb1 can only tPUSH low regs, so we copy the high regs to the low // regs that we just saved and push the low regs again, taking care to @@ -1773,7 +1787,7 @@ if (JumpReg == LoReg) continue; BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg) - .addReg(HiReg, RegState::Undef) + .addReg(HiReg, LiveRegs.contains(HiReg) ? 0 : RegState::Undef) .add(predOps(ARMCC::AL)); --HiReg; } @@ -1791,19 +1805,21 @@ if (JumpReg >= ARM::R4 && JumpReg <= ARM::R7) { int LoReg = JumpReg == ARM::R4 ? ARM::R5 : ARM::R4; BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg) - .addReg(ARM::R8) + .addReg(ARM::R8, LiveRegs.contains(ARM::R8) ? 0 : RegState::Undef) .add(predOps(ARMCC::AL)); BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)) .add(predOps(ARMCC::AL)) - .addReg(LoReg); + .addReg(LoReg, RegState::Kill); } } else { // push Lo and Hi registers with a single instruction MachineInstrBuilder PushMIB = BuildMI(MBB, MBBI, DL, TII.get(ARM::t2STMDB_UPD), ARM::SP) .addReg(ARM::SP) .add(predOps(ARMCC::AL)); - for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg) - PushMIB.addReg(Reg, Reg != JumpReg ? RegState::Undef : 0); + for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg) { + PushMIB.addReg( + Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef); + } } } @@ -1934,7 +1950,18 @@ case ARM::tBLXNS_CALL: { DebugLoc DL = MBBI->getDebugLoc(); unsigned JumpReg = MBBI->getOperand(0).getReg(); - CMSEPushCalleeSaves(*TII, MBB, MBBI, JumpReg, + + // Figure out which registers are live at the point immediately before the + // call. When we indiscriminately push a set of registers, the live + // registers are added as ordinary use operands, whereas dead registers + // are "undef". + LivePhysRegs LiveRegs(*TRI); + LiveRegs.addLiveOuts(MBB); + for (const MachineInstr &MI : make_range(MBB.rbegin(), MBBI.getReverse())) + LiveRegs.stepBackward(MI); + LiveRegs.stepBackward(*MBBI); + + CMSEPushCalleeSaves(*TII, MBB, MBBI, JumpReg, LiveRegs, AFI->isThumb1OnlyFunction()); SmallVector ClearRegs; @@ -1971,7 +1998,7 @@ .add(predOps(ARMCC::AL)); } - CMSESaveClearFPRegs(MBB, MBBI, DL, + CMSESaveClearFPRegs(MBB, MBBI, DL, LiveRegs, ClearRegs); // save+clear FP regs with ClearRegs CMSEClearGPRegs(MBB, MBBI, DL, ClearRegs, JumpReg); diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td --- a/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -287,7 +287,6 @@ //===----------------------------------------------------------------------===// // Lazy load / store multiple Instructions // -let mayLoad = 1 in def VLLDM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone, NoItinerary, "vlldm${p}\t$Rn", "", []>, Requires<[HasV8MMainline, Has8MSecExt]> { @@ -298,9 +297,9 @@ let Inst{15-12} = 0; let Inst{7-0} = 0; let mayLoad = 1; + let Defs = [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, VPR, FPSCR, FPSCR_NZCV]; } -let mayStore = 1 in def VLSTM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone, NoItinerary, "vlstm${p}\t$Rn", "", []>, Requires<[HasV8MMainline, Has8MSecExt]> { diff --git a/llvm/test/CodeGen/ARM/cmse-vlldm-no-reorder.ll b/llvm/test/CodeGen/ARM/cmse-vlldm-no-reorder.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/cmse-vlldm-no-reorder.ll @@ -0,0 +1,21 @@ +; RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 --float-abi=hard %s -o - | \ +; RUN: FileCheck %s + +@g = hidden local_unnamed_addr global float (...)* null, align 4 +@a = hidden local_unnamed_addr global float 0.000000e+00, align 4 + +define hidden void @f() local_unnamed_addr #0 { +entry: + %0 = load float ()*, float ()** bitcast (float (...)** @g to float ()**), align 4 + %call = tail call nnan ninf nsz float %0() #1 + store float %call, float* @a, align 4 + ret void +} + +; CHECK: blxns r{{[0-9]+}} +; CHECK: vmov r[[T:[0-9]+]], s0 +; CHECK: vlldm sp +; CHECK: vmov s0, r[[T]] + +attributes #0 = { nounwind } +attributes #1 = { nounwind "cmse_nonsecure_call" } diff --git a/llvm/test/CodeGen/ARM/cmse-vlldm-no-reorder.mir b/llvm/test/CodeGen/ARM/cmse-vlldm-no-reorder.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/cmse-vlldm-no-reorder.mir @@ -0,0 +1,112 @@ +# RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 --float-abi=hard --run-pass=arm-pseudo %s -o - | \ +# RUN: FileCheck %s +--- | + ; ModuleID = 'cmse-vlldm-no-reorder.ll' + source_filename = "cmse-vlldm-no-reorder.ll" + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8m.main" + + @g = hidden local_unnamed_addr global float (...)* null, align 4 + @a = hidden local_unnamed_addr global float 0.000000e+00, align 4 + + ; Function Attrs: nounwind + define hidden void @f() local_unnamed_addr #0 { + entry: + %0 = load float ()*, float ()** bitcast (float (...)** @g to float ()**), align 4 + %call = tail call nnan ninf nsz float %0() #2 + store float %call, float* @a, align 4 + ret void + } + + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #1 + + attributes #0 = { nounwind "target-cpu"="cortex-m33" } + attributes #1 = { nounwind } + attributes #2 = { nounwind "cmse_nonsecure_call" } + +... +--- +name: f +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: [] +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: true + hasCalls: true + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $r7, $lr + + $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r7, killed $lr + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + renamable $r0 = t2MOVi32imm @g + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from `float ()** bitcast (float (...)** @g to float ()**)`) + tBLXNS_CALL killed renamable $r0, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $s0 + renamable $r0 = t2MOVi32imm @a + VSTRS killed renamable $s0, killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4 into @a) + $sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r7, def $pc + +... + +# CHECK-LABEL: bb.0.entry: +# CHECK: $sp = t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, $r4, $r5, $r6, undef $r7, $r8, $r9, $r10, $r11 +# CHECK-NEXT: $r0 = t2BICri $r0, 1, 14 /* CC::al */, $noreg, $noreg +# CHECK-NEXT: $sp = tSUBspi $sp, 34, 14 /* CC::al */, $noreg +# CHECK-NEXT: VLSTM $sp, 14 /* CC::al */, $noreg, implicit undef $vpr, implicit undef $fpscr, implicit undef $fpscr_nzcv, implicit undef $q0, implicit undef $q1, implicit undef $q2, implicit undef $q3, implicit undef $q4, implicit undef $q5, implicit undef $q6, implicit undef $q7 +# CHECK-NEXT: $r1 = tMOVr $r0, 14 /* CC::al */, $noreg +# CHECK-NEXT: $r2 = tMOVr $r0, 14 /* CC::al */, $noreg +# CHECK-NEXT: $r3 = tMOVr $r0, 14 /* CC::al */, $noreg +# CHECK-NEXT: $r4 = tMOVr $r0, 14 /* CC::al */, $noreg +# CHECK-NEXT: $r5 = tMOVr $r0, 14 /* CC::al */, $noreg +# CHECK-NEXT: $r6 = tMOVr $r0, 14 /* CC::al */, $noreg +# CHECK-NEXT: $r7 = tMOVr $r0, 14 /* CC::al */, $noreg +# CHECK-NEXT: $r8 = tMOVr $r0, 14 /* CC::al */, $noreg +# CHECK-NEXT: $r9 = tMOVr $r0, 14 /* CC::al */, $noreg +# CHECK-NEXT: $r10 = tMOVr $r0, 14 /* CC::al */, $noreg +# CHECK-NEXT: $r11 = tMOVr $r0, 14 /* CC::al */, $noreg +# CHECK-NEXT: $r12 = tMOVr $r0, 14 /* CC::al */, $noreg +# CHECK-NEXT: t2MSR_M 3072, $r0, 14 /* CC::al */, $noreg, implicit-def $cpsr +# CHECK-NEXT: tBLXNSr 14 /* CC::al */, $noreg, killed $r0, csr_aapcs, implicit-def $lr, implicit $sp, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $s0 +# CHECK-NEXT: $r12 = VMOVRS $s0, 14 /* CC::al */, $noreg +# CHECK-NEXT: VLLDM $sp, 14 /* CC::al */, $noreg, implicit-def $q0, implicit-def $q1, implicit-def $q2, implicit-def $q3, implicit-def $q4, implicit-def $q5, implicit-def $q6, implicit-def $q7, implicit-def $vpr, implicit-def $fpscr, implicit-def $fpscr_nzcv +# CHECK-NEXT: $s0 = VMOVSR $r12, 14 /* CC::al */, $noreg +# CHECK-NEXT: $sp = tADDspi $sp, 34, 14 /* CC::al */, $noreg +# CHECK-NEXT: $sp = t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11 + \ No newline at end of file diff --git a/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir b/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir --- a/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir +++ b/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir @@ -62,7 +62,7 @@ $sp = tSUBspi $sp, 34, 14, $noreg VLSTM $sp, 14, $noreg tBLXNSr 14, $noreg, killed $r4, csr_aapcs, implicit-def $lr, implicit $sp, implicit-def dead $lr, implicit $sp, implicit-def $sp - VLLDM $sp, 14, $noreg + VLLDM $sp, 14, $noreg, implicit-def $q0, implicit-def $q1, implicit-def $q2, implicit-def $q3, implicit-def $q4, implicit-def $q5, implicit-def $q6, implicit-def $q7, implicit-def $vpr, implicit-def $fpscr, implicit-def $fpscr_nzcv $sp = tADDspi $sp, 34, 14, $noreg $sp = t2LDMIA_UPD $sp, 14, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11 $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $pc